def command_pgimport(input_encoding, no_create_table, source, database_uri, table_name): pgimport( filename=source, encoding=input_encoding, database_uri=database_uri, create_table=not no_create_table, table_name=table_name, progress=True, )
def main(): for table in ("empresa", "socio", "cnae_secundaria"): pgimport( str(DATA_DIRECTORY / f"{table}.csv.gz"), POSTGRES_URI, table, schema=load_schema(str(SCHEMA_DIRECTORY / f"{table}.csv")), ) create_index(table) with cnaes_csv() as (source, schema): pgimport(source, POSTGRES_URI, "cnae", schema=load_schema(schema)) create_index("cnae", "codigo")
def command_pgimport( input_encoding, no_create_table, dialect, schema, source, database_uri, table_name ): progress = ProgressBar( prefix="Importing data", pre_prefix="Detecting file size", unit="bytes" ) try: total_size = uncompressed_size(source) except (RuntimeError, ValueError): total_size = None else: progress.total = total_size progress.description = "Analyzing source file" schemas = _get_schemas_for_inputs([schema] if schema else None, [source]) import_meta = pgimport( filename=source, encoding=input_encoding, dialect=dialect, database_uri=database_uri, create_table=not no_create_table, table_name=table_name, callback=progress.update, schema=schemas[0], ) progress.description = "{} rows imported".format(import_meta["rows_imported"]) progress.close()
def main(): if wait_for_postgres() and tables_exist(): print(("There are existing tables in the database. " "Please, start with a clean database.")) return tables = ("empresa", "socio", "cnae_secundaria") files = ("empresa", "socio", "cnae-secundaria") for table, filename in zip(tables, files): pgimport( str(DATA_DIRECTORY / f"{filename}.csv.gz"), POSTGRES_URI, table, schema=load_schema(str(SCHEMA_DIRECTORY / f"{filename}.csv")), ) create_index(table) with cnaes_csv() as (source, schema): pgimport(source, POSTGRES_URI, "cnae", schema=load_schema(schema)) create_index("cnae", "codigo")
def handle(self, *args, **kwargs): dataset_slug = kwargs["dataset_slug"] tablename = kwargs["tablename"] filename = kwargs["filename"] ask_confirmation = not kwargs["no_input"] import_data = not kwargs["no_import_data"] vacuum = not kwargs["no_vacuum"] clear_view_cache = not kwargs["no_clear_view_cache"] create_filter_indexes = not kwargs["no_create_filter_indexes"] fill_choices = not kwargs["no_fill_choices"] collect_date = self.clean_collect_date(kwargs["collect_date"]) if ask_confirmation: print("This operation will DESTROY the existing data for this " "dataset table.") answer = input("Do you want to continue? (y/n) ") if answer.lower().strip() not in ("y", "yes"): exit() table = Table.objects.for_dataset(dataset_slug).named(tablename) Model = table.get_model() if import_data: # Create the table if not exists with transaction.atomic(): try: Model.delete_table() except ProgrammingError: # Does not exist pass finally: Model.create_table(create_indexes=False) Model.create_triggers() # Get file object, header and set command to run table_name = Model._meta.db_table database_uri = os.environ["DATABASE_URL"] encoding = "utf-8" # TODO: receive as a parameter timeout = 0.1 # TODO: receive as a parameter start_time = time.time() progress = ProgressBar(prefix="Importing data", unit="bytes") # TODO: change the way we do it (CSV dialect may change, encoding # etc.) file_header = open_compressed(filename).readline().strip().split(",") table_schema = table.schema schema = OrderedDict([(field_name, table_schema[field_name]) for field_name in file_header]) try: import_meta = pgimport( filename=filename, encoding=encoding, dialect="excel", database_uri=database_uri, table_name=table_name, create_table=False, timeout=timeout, callback=progress.update, schema=schema, ) except RuntimeError as exception: progress.close() print("ERROR: {}".format(exception.args[0])) exit(1) else: progress.close() table.import_date = timezone.now() table.save() if collect_date: table.version.collected_at = collect_date table.version.save() end_time = time.time() duration = end_time - start_time rows_imported = import_meta["rows_imported"] print( " done in {:7.3f}s ({} rows imported, {:.3f} rows/s).".format( duration, rows_imported, rows_imported / duration ) ) Model = table.get_model(cache=False) table.invalidate_cache() if vacuum: print("Running VACUUM ANALYSE...", end="", flush=True) start = time.time() Model.analyse_table() end = time.time() print(" done in {:.3f}s.".format(end - start)) if create_filter_indexes: # TODO: warn if field has_choices but not in Table.filtering print("Creating filter indexes...", end="", flush=True) start = time.time() Model.create_indexes() end = time.time() print(" done in {:.3f}s.".format(end - start)) if fill_choices: print("Filling choices...") start = time.time() choiceables = Field.objects.for_table(table).choiceables() for field in choiceables: print(" {}".format(field.name), end="", flush=True) start_field = time.time() field.update_choices() field.save() end_field = time.time() print(" - done in {:.3f}s.".format(end_field - start_field)) end = time.time() print(" done in {:.3f}s.".format(end - start)) if clear_view_cache: print("Clearing view cache...") cache.clear()
def handle(self, *args, **kwargs): dataset_slug = kwargs['dataset_slug'] tablename = kwargs['tablename'] filename = kwargs['filename'] ask_confirmation = not kwargs['no_input'] import_data = not kwargs['no_import_data'] vacuum = not kwargs['no_vacuum'] create_filter_indexes = not kwargs['no_create_filter_indexes'] fill_choices = not kwargs['no_fill_choices'] if ask_confirmation: print( 'This operation will DESTROY the existing data for this ' 'dataset table.' ) answer = input('Do you want to continue? (y/n) ') if answer.lower().strip() not in ('y', 'yes'): exit() table = Table.objects.for_dataset(dataset_slug).named(tablename) Model = table.get_model() if import_data: # Create the table if not exists with transaction.atomic(): try: Model.delete_table() except ProgrammingError: # Does not exist pass finally: Model.create_table(create_indexes=False) Model.create_triggers() # Get file object, header and set command to run table_name = Model._meta.db_table database_uri = os.environ['DATABASE_URL'] encoding = 'utf-8' # TODO: receive as a parameter timeout = 0.1 # TODO: receive as a parameter start_time = time.time() progress = ProgressBar(prefix='Importing data', unit='bytes') try: import_meta = pgimport( filename=filename, encoding=encoding, dialect='excel', database_uri=database_uri, table_name=table_name, create_table=False, timeout=timeout, callback=progress.update, ) except RuntimeError as exception: progress.close() print('ERROR: {}'.format(exception.args[0])) exit(1) else: progress.close() table.last_update = timezone.now() table.save() end_time = time.time() duration = end_time - start_time rows_imported = import_meta['rows_imported'] print(' done in {:7.3f}s ({} rows imported, {:.3f} rows/s).' .format(duration, rows_imported, rows_imported / duration)) Model = table.get_model(cache=False) if vacuum: print('Running VACUUM ANALYSE...', end='', flush=True) start = time.time() Model.analyse_table() end = time.time() print(' done in {:.3f}s.'.format(end - start)) if create_filter_indexes: # TODO: warn if field has_choices but not in Table.filtering print('Creating filter indexes...', end='', flush=True) start = time.time() Model.create_indexes() end = time.time() print(' done in {:.3f}s.'.format(end - start)) if fill_choices: print('Filling choices...') start = time.time() choiceables = Field.objects.for_table(table).choiceables() for field in choiceables: print(' {}'.format(field.name), end='', flush=True) start_field = time.time() field.update_choices() field.save() end_field = time.time() print(' - done in {:.3f}s.'.format(end_field - start_field)) end = time.time() print(' done in {:.3f}s.'.format(end - start))