def fix_nulls(table, active): if not active: return nullify = Nullify() for row in table: for i in range(0, len(row)): row[i] = nullify.decode_null(row[i])
def show(self): self.tables_so_far = [] work = None output_filename = None self.output_file = sys.stdout work_file = None self.start_table(None, None) try: if self.args.edit: import tempfile work = tempfile.mkdtemp() output_filename = os.path.join(work, "reference.csv") work_file = open(output_filename, "wt") self.output_file = work_file self.output_in_csv = True self.args.safe_null = True self.args.save_bookmark = [os.path.join(work, "bookmark.json")] elif self.args.output: self.output_file = open(self.args.output[0], "wt") viable_tables = [] q = self.database.query(columns=self.selected_columns) touchable = True if self.args.select_from: q = q.select_from(self.args.select_from[0]) touchable = False if self.args.distinct: q = q.distinct() if self.row_filter is not None: q = q.where_sqls(self.row_filter) if self.args.value is not None: q = q.where_kv(self.context_filters) if self.values is not None: q = q.where_kv_with_expansion(self.values) if self.args.grep: for pattern in self.args.grep: q = q.grep(pattern, case_sensitive=False) if self.args.order: if "none" not in self.args.order: q.order(self.ordering) elif touchable: q.order() if self.args.limit: q = q.limit(int(self.args.limit[0])) ts = list(q) ts.sort(key=lambda t: t["table_name"]) for t in ts: table_name = t["table_name"] table = t["table"] rows = t["rows"] self.header_shown = False self.start_table(table_name, table.columns.keys()) viable_tables.append(table_name) if self.args.types: column_types = [] for name in self.columns: if not self.ok_column(name): continue try: column = table.c[name] sql_name = str(column.type) # make sure not nulltype except CompileError: sql_name = None column_types.append(sql_name) rows = [column_types] if self.target_db: if table_name in self.target_db.tables_metadata.keys(): # clear previous results self.target_db.tables_metadata[table_name].drop(self.target_db.engine) target = {"table": None, "rows": []} def fallback_type(example): if isinstance(example, bool): return types.Boolean elif isinstance(example, int): return types.Integer elif isinstance(example, float): return types.Float elif isinstance(example, datetime): return types.DateTime return types.UnicodeText def create_table(data): if target["table"] is not None: return columns = [] for name in self.columns: if not self.ok_column(name): continue column = table.c[name] sql_type = column.type try: self.target_db.engine.dialect.type_compiler.process(sql_type) except CompileError: # some types need to be approximated sql_type = None if sql_type is None or isinstance(sql_type, types.NullType): example = data.get(name) sql_type = fallback_type(example) columns.append(Column(name, sql_type, primary_key=column.primary_key)) metadata = MetaData(bind=self.target_db.engine) target["table"] = Table(table_name, metadata, *columns) target["table"].create(self.target_db.engine) def add_row(data): if data: target["rows"].append(data) if len(target["rows"]) > 10000 or not data: target["table"].insert().execute(target["rows"]) target["rows"] = [] def sqlited(data): if isinstance(data, dict) or isinstance(data, list): return json.dumps(data) return data for row in rows: data = dict( (self.columns[c], sqlited(cell)) for c, cell in enumerate(row) if self.ok_column(self.columns[c]) ) create_table(data) add_row(data) create_table({}) add_row(None) if self.output_in_json or self.output_in_sqlite: if not self.show_header_on_need(): continue if self.output_in_json: self.save_as_json(table, rows, self.output_in_json[0]) elif not self.args.count: # csv spec is that eol is \r\n; we ignore this for our purposes # for good reasons that unfortunately there isn't space to describe # here on the back of this envelope csv_writer = csv.writer(self.output_file, lineterminator="\n") if self.args.safe_null: nullify = Nullify() for row in rows: if not self.show_header_on_need(): continue csv_writer.writerow( list( nullify.encode_null(cell) for c, cell in enumerate(row) if self.ok_column(self.columns[c]) ) ) else: for row in rows: if not self.show_header_on_need(): continue csv_writer.writerow( list(cell for c, cell in enumerate(row) if self.ok_column(self.columns[c])) ) del csv_writer else: self.show_header_on_need() ct = rows.count() print("({} row{})".format(ct, "" if ct == 1 else "s"), file=self.output_file) if len(self.tables_so_far) == 0 and len(viable_tables) == 1: self.show_header_on_need() if self.args.save_bookmark: with open(self.args.save_bookmark[0], "w") as fout: link = OrderedDict() link["url"] = self.args.catsql_database_url link["table"] = list(self.tables) if self.tables else None link["column"] = self.selected_columns link["distinct"] = self.args.distinct link["context"] = self.context_filters link["hidden_columns"] = sorted(self.context_columns) link["sql"] = self.args.sql fout.write(json.dumps(link, indent=2)) if self.args.edit and not self.failure: work_file.close() work_file = None edit_filename = os.path.join(work, "variant.csv") copyfile(output_filename, edit_filename) editor = os.environ.get("TABLE_EDITOR", None) if not editor: editor = os.environ.get("EDITOR", "nano") call([editor, edit_filename]) patchsql( [self.url, "--table"] + self.tables_so_far + ["--follow", output_filename, edit_filename, "--safe-null"] + (["--quiet"] if self.args.quiet else []) ) finally: if self.failure: print("ERROR: " "More than one table in csv/json output, consider adding:", file=sys.stderr) for name in self.tables_so_far: print(" --table {}".format(name), file=sys.stderr) if work: if work_file: try: work_file.close() except: pass work_file = None import shutil shutil.rmtree(work) work = None
def show(self): self.tables_so_far = [] work = None output_filename = None self.output_file = sys.stdout work_file = None self.start_table(None, None) try: if self.args.edit: import tempfile work = tempfile.mkdtemp() output_filename = os.path.join(work, 'reference.csv') work_file = open(output_filename, 'wt') self.output_file = work_file self.output_in_csv = True self.args.safe_null = True self.args.save_bookmark = [os.path.join(work, 'bookmark.json')] elif self.args.output: self.output_file = open(self.args.output[0], 'wt') viable_tables = [] q = self.database.query(columns=self.selected_columns) touchable = True if self.args.select_from: q = q.select_from(self.args.select_from[0]) touchable = False if self.args.distinct: q = q.distinct() if self.row_filter is not None: q = q.where_sqls(self.row_filter) if self.args.value is not None: q = q.where_kv(self.context_filters) if self.values is not None: q = q.where_kv_with_expansion(self.values) if self.args.grep: for pattern in self.args.grep: q = q.grep(pattern, case_sensitive=False) if self.args.order: if 'none' not in self.args.order: q.order(self.ordering) elif touchable: q.order() if self.args.limit: q = q.limit(int(self.args.limit[0])) ts = list(q) ts.sort(key=lambda t: t['table_name']) for t in ts: table_name = t['table_name'] table = t['table'] rows = t['rows'] self.header_shown = False keys = None if hasattr(table, 'columns'): keys = table.columns.keys() else: keys = table.keys self.start_table(table_name, keys) viable_tables.append(table_name) if self.args.types: column_types = [] for name in self.columns: if not self.ok_column(name): continue try: column = table.c[name] sql_name = str( column.type) # make sure not nulltype except CompileError: sql_name = None column_types.append(sql_name) rows = [column_types] if self.target_ss: ws = self.target_ss.create_sheet() ws.title = table_name ws.append(column for column in self.columns if self.ok_column(column)) for row in rows: ws.append(cell for c, cell in enumerate(row) if self.ok_column(self.columns[c])) s = openpyxl.styles.NamedStyle( name="Header", font=openpyxl.styles.Font(bold=True)) for cell in next(ws.rows): cell.style = s for column_cells in ws.columns: ws.column_dimensions[ column_cells[0].column].auto_size = True if self.target_db: if table_name in self.target_db.tables_metadata.keys(): # clear previous results self.target_db.tables_metadata[table_name].drop( self.target_db.engine) target = {'table': None, 'rows': []} def fallback_type(example): if isinstance(example, bool): return types.Boolean elif isinstance(example, int): return types.Integer elif isinstance(example, float): return types.Float elif isinstance(example, datetime): return types.DateTime return types.UnicodeText def create_table(data): if target['table'] is not None: return columns = [] for name in self.columns: if not self.ok_column(name): continue column = table.c[name] sql_type = column.type try: self.target_db.engine.dialect.type_compiler.process( sql_type) except CompileError: # some types need to be approximated sql_type = None if sql_type is None or isinstance( sql_type, types.NullType): example = data.get(name) sql_type = fallback_type(example) columns.append( Column(name, sql_type, primary_key=column.primary_key)) metadata = MetaData(bind=self.target_db.engine) target['table'] = Table(table_name, metadata, *columns) target['table'].create(self.target_db.engine) def add_row(data): if data: target['rows'].append(data) if len(target['rows']) > 10000 or not data: target['table'].insert().execute(target['rows']) target['rows'] = [] def sqlited(data): if isinstance(data, dict) or isinstance(data, list): return json.dumps(data) return data for row in rows: data = dict((self.columns[c], sqlited(cell)) for c, cell in enumerate(row) if self.ok_column(self.columns[c])) create_table(data) add_row(data) create_table({}) add_row(None) if self.output_in_json or self.output_in_sqlite or self.output_in_excel: if not self.show_header_on_need(): continue if self.output_in_json: self.save_as_json(table, rows, self.output_in_json[0]) elif not self.args.count: # csv spec is that eol is \r\n; we ignore this for our purposes # for good reasons that unfortunately there isn't space to describe # here on the back of this envelope csv_writer = csv.writer(self.output_file, lineterminator='\n') if not self.show_header_on_need(): continue if self.args.safe_null: nullify = Nullify() for row in rows: csv_writer.writerow( list( nullify.encode_null(cell) for c, cell in enumerate(row) if self.ok_column(self.columns[c]))) else: for row in rows: csv_writer.writerow( list(cell for c, cell in enumerate(row) if self.ok_column(self.columns[c]))) del csv_writer else: self.show_header_on_need() ct = rows.count() print("({} row{})".format(ct, '' if ct == 1 else 's'), file=self.output_file) if len(self.tables_so_far) == 0 and len(viable_tables) == 1: self.show_header_on_need() if self.args.save_bookmark: with open(self.args.save_bookmark[0], 'w') as fout: link = OrderedDict() link['url'] = self.args.catsql_database_url link['table'] = list(self.tables) if self.tables else None link['column'] = self.selected_columns link['distinct'] = self.args.distinct link['context'] = self.context_filters link['hidden_columns'] = sorted(self.context_columns) link['sql'] = self.args.sql fout.write(json.dumps(link, indent=2)) if self.args.edit and not self.failure: work_file.close() work_file = None edit_filename = os.path.join(work, 'variant.csv') copyfile(output_filename, edit_filename) editor = os.environ.get('TABLE_EDITOR', None) if not editor: editor = os.environ.get('EDITOR', 'nano') call([editor, edit_filename]) patchsql([self.url, '--table'] + self.tables_so_far + [ '--follow', output_filename, edit_filename, '--safe-null' ] + (['--quiet'] if self.args.quiet else []) + (['--schema', self.schema] if self.schema else []), database=self.database) finally: if self.failure: print( "ERROR: " "More than one table in csv/json output, consider adding:", file=sys.stderr) for name in self.tables_so_far: print(" --table {}".format(name), file=sys.stderr) if self.target_ss: self.target_ss.save(self.output_in_excel[0]) if work: if work_file: try: work_file.close() except Exception: pass work_file = None import shutil shutil.rmtree(work) work = None