def xlsx(f, output=None, **kwargs): """ Convert an Excel .xlsx file to csv. Note: Unlike other convertor's, this one allows output columns to contain mixed data types. Blank headers are also possible. :param f: Excel File to convert :type f: File :param output: CSV :type output: File """ streaming = True if output else False if not streaming: output = six.StringIO() writer = CSVToolsWriter(output) book = load_workbook(f, use_iterators=True, data_only=True) if 'sheet' in kwargs: sheet = book.get_sheet_by_name(kwargs['sheet']) else: sheet = book.get_active_sheet() for i, row in enumerate(sheet.iter_rows()): if i == 0: writer.writerow([c.value for c in row]) continue out_row = [] for c in row: value = c.value if value.__class__ is datetime.datetime: # Handle default XLSX date as 00:00 time if value.date() == datetime.date( 1904, 1, 1) and not has_date_elements(c): value = value.time() value = normalize_datetime(value) elif value.time() == NULL_TIME: value = value.date() else: value = normalize_datetime(value) elif value.__class__ is float: if value % 1 == 0: value = int(value) if value.__class__ in (datetime.datetime, datetime.date, datetime.time): value = value.isoformat() out_row.append(value) writer.writerow(out_row) if not streaming: data = output.getvalue() return data # Return empty string when streaming return ''
def main(self): connection_string = self.args.connection_string do_insert = self.args.insert query = self.args.query self.input_files = [] for path in self.args.input_paths: self.input_files.append(self._open_input_file(path)) if self.args.table_names: table_names = self.args.table_names.split(',') else: table_names = [] # If one or more filenames are specified, we need to add stdin ourselves (if available) if sys.stdin not in self.input_files: try: if not sys.stdin.isatty(): self.input_files.insert(0, sys.stdin) except: pass # Create an SQLite database in memory if no connection string is specified if query and not connection_string: connection_string = "sqlite:///:memory:" do_insert = True if self.args.dialect and connection_string: self.argparser.error( 'The --dialect option is only valid when --db is not specified.' ) if do_insert and not connection_string: self.argparser.error( 'The --insert option is only valid when --db is also specified.' ) if self.args.no_create and not do_insert: self.argparser.error( 'The --no-create option is only valid --insert is also specified.' ) # Establish database validity before reading CSV files if connection_string: try: engine, metadata = sql.get_connection(connection_string) except ImportError: raise ImportError( 'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n' ) conn = engine.connect() trans = conn.begin() for f in self.input_files: try: # Try to use name specified via --table table_name = table_names.pop(0) except IndexError: if f == sys.stdin: table_name = "stdin" else: # Use filename as table name table_name = os.path.splitext(os.path.split(f.name)[1])[0] csv_table = table.Table.from_csv( f, name=table_name, snifflimit=self.args.snifflimit, blanks_as_nulls=(not self.args.blanks), infer_types=(not self.args.no_inference), no_header_row=self.args.no_header_row, **self.reader_kwargs) f.close() if connection_string: sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints, self.args.db_schema, metadata) # Create table if not self.args.no_create: sql_table.create() # Insert data if do_insert and csv_table.count_rows() > 0: insert = sql_table.insert() headers = csv_table.headers() conn.execute(insert, [ dict(zip(headers, row)) for row in csv_table.to_rows() ]) # Output SQL statements else: sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints) self.output_file.write( '%s\n' % sql.make_create_table_statement( sql_table, dialect=self.args.dialect)) if connection_string: if query: # Execute specified SQL queries queries = query.split(';') rows = None for q in queries: if q: rows = conn.execute(q) # Output result of last query as CSV try: output = CSVToolsWriter(self.output_file, **self.writer_kwargs) if not self.args.no_header_row: output.writerow(rows._metadata.keys) for row in rows: output.writerow(row) except AttributeError: pass trans.commit() conn.close()
def main(self): connection_string = self.args.connection_string do_insert = self.args.insert query = self.args.query self.input_files = [] for path in self.args.input_paths: self.input_files.append(self._open_input_file(path)) if self.args.table_names: table_names = self.args.table_names.split(',') else: table_names = [] # If one or more filenames are specified, we need to add stdin ourselves (if available) if sys.stdin not in self.input_files: try: if not sys.stdin.isatty(): self.input_files.insert(0, sys.stdin) except: pass # Create an SQLite database in memory if no connection string is specified if query and not connection_string: connection_string = "sqlite:///:memory:" do_insert = True if self.args.dialect and connection_string: self.argparser.error('The --dialect option is only valid when --db is not specified.') if do_insert and not connection_string: self.argparser.error('The --insert option is only valid when --db is also specified.') if self.args.no_create and not do_insert: self.argparser.error('The --no-create option is only valid --insert is also specified.') # Establish database validity before reading CSV files if connection_string: try: engine, metadata = sql.get_connection(connection_string) except ImportError: raise ImportError( 'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n') conn = engine.connect() trans = conn.begin() for f in self.input_files: try: # Try to use name specified via --table table_name = table_names.pop(0) except IndexError: if f == sys.stdin: table_name = "stdin" else: # Use filename as table name table_name = os.path.splitext(os.path.split(f.name)[1])[0] csv_table = table.Table.from_csv( f, name=table_name, snifflimit=self.args.snifflimit, blanks_as_nulls=(not self.args.blanks), infer_types=(not self.args.no_inference), no_header_row=self.args.no_header_row, **self.reader_kwargs ) f.close() if connection_string: sql_table = sql.make_table( csv_table, table_name, self.args.no_constraints, self.args.db_schema, metadata ) # Create table if not self.args.no_create: sql_table.create() # Insert data if do_insert and csv_table.count_rows() > 0: insert = sql_table.insert() headers = csv_table.headers() conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()]) # Output SQL statements else: sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints) self.output_file.write('%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)) if connection_string: if query: # Execute specified SQL queries queries = query.split(';') rows = None for q in queries: if q: rows = conn.execute(q) # Output result of last query as CSV try: output = CSVToolsWriter(self.output_file, **self.writer_kwargs) if not self.args.no_header_row: output.writerow(rows._metadata.keys) for row in rows: output.writerow(row) except AttributeError: pass trans.commit() conn.close()
def xlsx(f, output=None, **kwargs): """ Convert an Excel .xlsx file to csv. Note: Unlike other convertor's, this one allows output columns to contain mixed data types. Blank headers are also possible. :param f: Excel File to convert :type f: File :param output: CSV :type output: File """ streaming = True if output else False if not streaming: output = six.StringIO() writer = CSVToolsWriter(output) book = load_workbook(f, use_iterators=True, data_only=True) if 'sheet' in kwargs: sheet = book.get_sheet_by_name(kwargs['sheet']) else: sheet = book.get_active_sheet() for i, row in enumerate(sheet.iter_rows()): if i == 0: writer.writerow([c.value for c in row]) continue out_row = [] for c in row: value = c.value if value.__class__ is datetime.datetime: # Handle default XLSX date as 00:00 time if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c): value = value.time() value = normalize_datetime(value) elif value.time() == NULL_TIME: value = value.date() else: value = normalize_datetime(value) elif value.__class__ is float: if value % 1 == 0: value = int(value) if value.__class__ in (datetime.datetime, datetime.date, datetime.time): value = value.isoformat() out_row.append(value) writer.writerow(out_row) if not streaming: data = output.getvalue() return data # Return empty string when streaming return ''