Exemple #1
0
def main():

	if not table_empty(TABLE_NAME):
		print(f"Table '{TABLE_NAME} already populated, skipping loading of new records")
		return

	path_in = os.path.join(data_dir, FILE_NAME_IN)
	path_out = os.path.join(data_dir, FILE_NAME_OUT)
	statement = load_statement.format(dbname=sql.Identifier(TABLE_NAME))

	with open(path_in) as infile:
		reader = csv.DictReader(infile, delimiter = ';')
		fieldnames = reader.fieldnames

		with open(path_out, 'w', newline='') as csv_file:
			fieldnames = fieldnames
			writer = csv.DictWriter(csv_file, fieldnames, delimiter=';')
			writer.writeheader()
			for row in reader:
				row.update({fieldname: value.strip() for (fieldname, value) in row.items()})
				row.update({fieldname: value.replace('.','') for (fieldname, value) in row.items()})
				writer.writerow(row)

	connection = get_connection()
	cursor = connection.cursor()

	try:
		print("Loading CBS energy use records, this might take a minute or so.")
		cursor.execute(statement, (path_out,))
		cursor.close()
		connection.commit()
		connection.close()
		print("Done.")
	except UndefinedFile:
		print(f"\nError: CBS energy use data file not found.\nExpected file at {path}.")
def main():

    if not table_empty(env['EP_ONLINE_DBNAME']):
        print(
            f"Table '{env['EP_ONLINE_DBNAME']}' already populated, skipping loading of new records"
        )
    else:
        start_time = time.time()
        print(
            'Starting to load records (estimated 4.7M records), this can take around 15 minutes...'
        )
        load_energy_labels_data()
        print(
            f'\nProcessed {i:,} records in {(time.time() - start_time):.2f} seconds.'
        )
        print(
            f'Max memory usage: {(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000000):.3f} (MB on macOS; probably GB on Linux).'
        )

    delete_labels_without_vbo_id()
    add_foreign_key_constraint()
    # Even after deleting labels for dwellings not in the BAG,
    # there are still ~6000 labels left for buildings
    # with 'gebouwklasse' U, but we assume their gebouwklasse are wrong
    # since the BAG has them with a VBO function of 'residential'.
    # Thus, we can delete the column, to make sure we don't use it
    # anywhere else (since the column has lost its semantics after this).
    delete_column('energy_labels', 'gebouwklasse')

    add_functions()
    add_column_epi_imputed()
Exemple #3
0
def main():

    if not table_empty(TABLE_NAME):
        print(
            f"Table '{TABLE_NAME}' already populated, skipping loading of new records"
        )
        return

    path = os.path.join(data_dir, FILE_NAME)
    statement = load_statement.format(dbname=sql.Identifier(TABLE_NAME))

    connection = get_connection()
    cursor = connection.cursor()

    try:
        print(
            "Loading RVO Warmtenetten records, this might take a minute or so."
        )
        cursor.execute(statement, (path, ))
        cursor.close()
        connection.commit()
        connection.close()
        print("Done.")
    except UndefinedFile:
        print(
            f"\nError: RVO Warmtenetten data file not found.\nExpected file at {path}."
        )
Exemple #4
0
def main():
    try:
        with open(path, 'r') as file:
            header = file.readline().strip()
            column_names = header.split(';')
    except FileNotFoundError:
        print(
            f"Error: WoON survey data file not found.\nExpected file at {path}."
        )
        return

    columns = [(sanitize_column_name(column_name), 'text')
               for column_name in column_names]

    print('Creating table...')
    create_table(TABLE_NAME, columns)

    if not table_empty(TABLE_NAME):
        print(
            f"Table '{TABLE_NAME}' already populated, skipping loading of new records"
        )
        return

    connection = get_connection()
    cursor = connection.cursor()
    # TODO: make this idempotent somehow?
    statement = load_statement.format(table_name=sql.Identifier(TABLE_NAME))
    try:
        print("Loading WoON survey data...")
        cursor.execute(statement, (path, ))

    except UndefinedFile:
        print(
            f"Error: WoON survey data file not found.\nExpected file at {path}."
        )

    columns_to_alter = ['ew_huis', 'ew_pers']
    for column in columns_to_alter:
        alter_column_to_number(cursor, column)

    print("Committing...")
    cursor.close()
    connection.commit()
    connection.close()
def main():

    if not table_empty(TABLE_NAME):
        print(
            f"Table '{TABLE_NAME}' already populated, skipping loading of new records"
        )
    else:
        path = os.path.join(data_dir, FILE_NAME)
        statement = load_statement.format(dbname=sql.Identifier(TABLE_NAME))

        try:
            print("Loading BAG records, this might take a minute or so.")
            execute(statement, (path, ))
        except UndefinedFile:
            print(
                f"\nError: BAG data file not found.\nExpected file at {path}.")

    update_unknown_construction_year()
    drop_demolished_dwellings()
    drop_dwellings_without_construction_year()