Ejemplo n.º 1
0
    def to_csv(self, output, **kwargs):
        """
        Serializes the table to CSV and writes it to any file-like object.
        """
        rows = self.to_rows(serialize_dates=True)

        # Insert header row
        rows.insert(0, self.headers())

        writer = CSVToolsWriter(output, **kwargs)
        writer.writerows(rows)
Ejemplo n.º 2
0
def xlsx(f, output=None, **kwargs):
    """
    Convert an Excel .xlsx file to csv.
    Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
    Blank headers are also possible.

    :param f: Excel File to convert
    :type f: File
    :param output: CSV
    :type output: File
    """
    streaming = True if output else False

    if not streaming:
        output = six.StringIO()

    writer = CSVToolsWriter(output)

    book = load_workbook(f, use_iterators=True, data_only=True)

    if 'sheet' in kwargs:
        sheet = book.get_sheet_by_name(kwargs['sheet'])
    else:
        sheet = book.get_active_sheet()

    for i, row in enumerate(sheet.iter_rows()):
        if i == 0:
            writer.writerow([c.value for c in row])
            continue

        out_row = []

        for c in row:
            value = c.value

            if value.__class__ is datetime.datetime:
                # Handle default XLSX date as 00:00 time
                if value.date() == datetime.date(
                        1904, 1, 1) and not has_date_elements(c):
                    value = value.time()

                    value = normalize_datetime(value)
                elif value.time() == NULL_TIME:
                    value = value.date()
                else:
                    value = normalize_datetime(value)
            elif value.__class__ is float:
                if value % 1 == 0:
                    value = int(value)

            if value.__class__ in (datetime.datetime, datetime.date,
                                   datetime.time):
                value = value.isoformat()

            out_row.append(value)

        writer.writerow(out_row)

    if not streaming:
        data = output.getvalue()
        return data

    # Return empty string when streaming
    return ''
Ejemplo n.º 3
0
    def main(self):
        connection_string = self.args.connection_string
        do_insert = self.args.insert
        query = self.args.query

        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if self.args.table_names:
            table_names = self.args.table_names.split(',')
        else:
            table_names = []

        # If one or more filenames are specified, we need to add stdin ourselves (if available)
        if sys.stdin not in self.input_files:
            try:
                if not sys.stdin.isatty():
                    self.input_files.insert(0, sys.stdin)
            except:
                pass

        # Create an SQLite database in memory if no connection string is specified
        if query and not connection_string:
            connection_string = "sqlite:///:memory:"
            do_insert = True

        if self.args.dialect and connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if do_insert and not connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        if self.args.no_create and not do_insert:
            self.argparser.error('The --no-create option is only valid --insert is also specified.')

        # Establish database validity before reading CSV files
        if connection_string:
            try:
                engine, metadata = sql.get_connection(connection_string)
            except ImportError:
                raise ImportError(
                    'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')
            conn = engine.connect()
            trans = conn.begin()

        for f in self.input_files:
            try:
                # Try to use name specified via --table
                table_name = table_names.pop(0)
            except IndexError:
                if f == sys.stdin:
                    table_name = "stdin"
                else:
                    # Use filename as table name
                    table_name = os.path.splitext(os.path.split(f.name)[1])[0]

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                snifflimit=self.args.snifflimit,
                blanks_as_nulls=(not self.args.blanks),
                infer_types=(not self.args.no_inference),
                no_header_row=self.args.no_header_row,
                **self.reader_kwargs
            )

            f.close()

            if connection_string:
                sql_table = sql.make_table(
                    csv_table,
                    table_name,
                    self.args.no_constraints,
                    self.args.db_schema,
                    metadata
                )

                # Create table
                if not self.args.no_create:
                    sql_table.create()

                # Insert data
                if do_insert and csv_table.count_rows() > 0:
                    insert = sql_table.insert()
                    headers = csv_table.headers()
                    conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])

            # Output SQL statements
            else:
                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                self.output_file.write('%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect))

        if connection_string:
            if query:
                # Execute specified SQL queries
                queries = query.split(';')
                rows = None

                for q in queries:
                    if q:
                        rows = conn.execute(q)

                # Output result of last query as CSV
                try:
                    output = CSVToolsWriter(self.output_file, **self.writer_kwargs)
                    if not self.args.no_header_row:
                        output.writerow(rows._metadata.keys)
                    for row in rows:
                        output.writerow(row)
                except AttributeError:
                    pass

            trans.commit()
            conn.close()
Ejemplo n.º 4
0
    def main(self):
        connection_string = self.args.connection_string
        do_insert = self.args.insert
        query = self.args.query

        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if self.args.table_names:
            table_names = self.args.table_names.split(',')
        else:
            table_names = []

        # If one or more filenames are specified, we need to add stdin ourselves (if available)
        if sys.stdin not in self.input_files:
            try:
                if not sys.stdin.isatty():
                    self.input_files.insert(0, sys.stdin)
            except:
                pass

        # Create an SQLite database in memory if no connection string is specified
        if query and not connection_string:
            connection_string = "sqlite:///:memory:"
            do_insert = True

        if self.args.dialect and connection_string:
            self.argparser.error(
                'The --dialect option is only valid when --db is not specified.'
            )

        if do_insert and not connection_string:
            self.argparser.error(
                'The --insert option is only valid when --db is also specified.'
            )

        if self.args.no_create and not do_insert:
            self.argparser.error(
                'The --no-create option is only valid --insert is also specified.'
            )

        # Establish database validity before reading CSV files
        if connection_string:
            try:
                engine, metadata = sql.get_connection(connection_string)
            except ImportError:
                raise ImportError(
                    'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n'
                )
            conn = engine.connect()
            trans = conn.begin()

        for f in self.input_files:
            try:
                # Try to use name specified via --table
                table_name = table_names.pop(0)
            except IndexError:
                if f == sys.stdin:
                    table_name = "stdin"
                else:
                    # Use filename as table name
                    table_name = os.path.splitext(os.path.split(f.name)[1])[0]

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                snifflimit=self.args.snifflimit,
                blanks_as_nulls=(not self.args.blanks),
                infer_types=(not self.args.no_inference),
                no_header_row=self.args.no_header_row,
                **self.reader_kwargs)

            f.close()

            if connection_string:
                sql_table = sql.make_table(csv_table, table_name,
                                           self.args.no_constraints,
                                           self.args.db_schema, metadata)

                # Create table
                if not self.args.no_create:
                    sql_table.create()

                # Insert data
                if do_insert and csv_table.count_rows() > 0:
                    insert = sql_table.insert()
                    headers = csv_table.headers()
                    conn.execute(insert, [
                        dict(zip(headers, row)) for row in csv_table.to_rows()
                    ])

            # Output SQL statements
            else:
                sql_table = sql.make_table(csv_table, table_name,
                                           self.args.no_constraints)
                self.output_file.write(
                    '%s\n' % sql.make_create_table_statement(
                        sql_table, dialect=self.args.dialect))

        if connection_string:
            if query:
                # Execute specified SQL queries
                queries = query.split(';')
                rows = None

                for q in queries:
                    if q:
                        rows = conn.execute(q)

                # Output result of last query as CSV
                try:
                    output = CSVToolsWriter(self.output_file,
                                            **self.writer_kwargs)
                    if not self.args.no_header_row:
                        output.writerow(rows._metadata.keys)
                    for row in rows:
                        output.writerow(row)
                except AttributeError:
                    pass

            trans.commit()
            conn.close()
Ejemplo n.º 5
0
def xlsx(f, output=None, **kwargs):
    """
    Convert an Excel .xlsx file to csv.
    Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
    Blank headers are also possible.

    :param f: Excel File to convert
    :type f: File
    :param output: CSV
    :type output: File
    """
    streaming = True if output else False

    if not streaming:
        output = six.StringIO()

    writer = CSVToolsWriter(output)

    book = load_workbook(f, use_iterators=True, data_only=True)

    if 'sheet' in kwargs:
        sheet = book.get_sheet_by_name(kwargs['sheet'])
    else:
        sheet = book.get_active_sheet()

    for i, row in enumerate(sheet.iter_rows()):
        if i == 0:
            writer.writerow([c.value for c in row])
            continue

        out_row = []

        for c in row:
            value = c.value

            if value.__class__ is datetime.datetime:
                # Handle default XLSX date as 00:00 time
                if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c):
                    value = value.time()

                    value = normalize_datetime(value)
                elif value.time() == NULL_TIME:
                    value = value.date()
                else:
                    value = normalize_datetime(value)
            elif value.__class__ is float:
                if value % 1 == 0:
                    value = int(value)

            if value.__class__ in (datetime.datetime, datetime.date, datetime.time):
                value = value.isoformat()

            out_row.append(value)

        writer.writerow(out_row)

    if not streaming:
        data = output.getvalue()
        return data

    # Return empty string when streaming
    return ''