def main(): if not table_empty(TABLE_NAME): print(f"Table '{TABLE_NAME} already populated, skipping loading of new records") return path_in = os.path.join(data_dir, FILE_NAME_IN) path_out = os.path.join(data_dir, FILE_NAME_OUT) statement = load_statement.format(dbname=sql.Identifier(TABLE_NAME)) with open(path_in) as infile: reader = csv.DictReader(infile, delimiter = ';') fieldnames = reader.fieldnames with open(path_out, 'w', newline='') as csv_file: fieldnames = fieldnames writer = csv.DictWriter(csv_file, fieldnames, delimiter=';') writer.writeheader() for row in reader: row.update({fieldname: value.strip() for (fieldname, value) in row.items()}) row.update({fieldname: value.replace('.','') for (fieldname, value) in row.items()}) writer.writerow(row) connection = get_connection() cursor = connection.cursor() try: print("Loading CBS energy use records, this might take a minute or so.") cursor.execute(statement, (path_out,)) cursor.close() connection.commit() connection.close() print("Done.") except UndefinedFile: print(f"\nError: CBS energy use data file not found.\nExpected file at {path}.")
def main(): if not table_empty(env['EP_ONLINE_DBNAME']): print( f"Table '{env['EP_ONLINE_DBNAME']}' already populated, skipping loading of new records" ) else: start_time = time.time() print( 'Starting to load records (estimated 4.7M records), this can take around 15 minutes...' ) load_energy_labels_data() print( f'\nProcessed {i:,} records in {(time.time() - start_time):.2f} seconds.' ) print( f'Max memory usage: {(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000000):.3f} (MB on macOS; probably GB on Linux).' ) delete_labels_without_vbo_id() add_foreign_key_constraint() # Even after deleting labels for dwellings not in the BAG, # there are still ~6000 labels left for buildings # with 'gebouwklasse' U, but we assume their gebouwklasse are wrong # since the BAG has them with a VBO function of 'residential'. # Thus, we can delete the column, to make sure we don't use it # anywhere else (since the column has lost its semantics after this). delete_column('energy_labels', 'gebouwklasse') add_functions() add_column_epi_imputed()
def main(): if not table_empty(TABLE_NAME): print( f"Table '{TABLE_NAME}' already populated, skipping loading of new records" ) return path = os.path.join(data_dir, FILE_NAME) statement = load_statement.format(dbname=sql.Identifier(TABLE_NAME)) connection = get_connection() cursor = connection.cursor() try: print( "Loading RVO Warmtenetten records, this might take a minute or so." ) cursor.execute(statement, (path, )) cursor.close() connection.commit() connection.close() print("Done.") except UndefinedFile: print( f"\nError: RVO Warmtenetten data file not found.\nExpected file at {path}." )
def main(): try: with open(path, 'r') as file: header = file.readline().strip() column_names = header.split(';') except FileNotFoundError: print( f"Error: WoON survey data file not found.\nExpected file at {path}." ) return columns = [(sanitize_column_name(column_name), 'text') for column_name in column_names] print('Creating table...') create_table(TABLE_NAME, columns) if not table_empty(TABLE_NAME): print( f"Table '{TABLE_NAME}' already populated, skipping loading of new records" ) return connection = get_connection() cursor = connection.cursor() # TODO: make this idempotent somehow? statement = load_statement.format(table_name=sql.Identifier(TABLE_NAME)) try: print("Loading WoON survey data...") cursor.execute(statement, (path, )) except UndefinedFile: print( f"Error: WoON survey data file not found.\nExpected file at {path}." ) columns_to_alter = ['ew_huis', 'ew_pers'] for column in columns_to_alter: alter_column_to_number(cursor, column) print("Committing...") cursor.close() connection.commit() connection.close()
def main(): if not table_empty(TABLE_NAME): print( f"Table '{TABLE_NAME}' already populated, skipping loading of new records" ) else: path = os.path.join(data_dir, FILE_NAME) statement = load_statement.format(dbname=sql.Identifier(TABLE_NAME)) try: print("Loading BAG records, this might take a minute or so.") execute(statement, (path, )) except UndefinedFile: print( f"\nError: BAG data file not found.\nExpected file at {path}.") update_unknown_construction_year() drop_demolished_dwellings() drop_dwellings_without_construction_year()