#%% Import import os from sqlalchemy import create_engine, MetaData, Table import pandas as pd from pm_helper import get_ids from datetime import datetime import matplotlib.pyplot as plt import matplotlib.dates as mdates from import_entsoe_datasets import import_entsoe_datasets #%% prepare id_dict = get_ids( ) # usage: id_dict['country']['Croatia'] gets province, feature, country filename = '/Users/dorowiemann/Documents/_Uni/_SJTU_PowerMaps/Learning-Driven-Power-Maps/data/Kraftwerksliste_trimmed.csv' sql_file = '/Users/dorowiemann/Documents/_Uni/_SJTU_PowerMaps/Learning-Driven-Power-Maps/sql/import_plants.sql' #open sql file f = open(sql_file, 'w') #open csv file csv = open(filename, 'r', encoding='latin-1') #, encoding="utf-8" lines = csv.readlines() #%% execute number_of_headerlines = 1 counter = 0 #prepare sql string sql = "INSERT INTO plant (plant_number, province_id, zip, energy_carrier, capacity)\nVALUES" #parse csv file for line in lines: counter += 1 # skip header if counter <= number_of_headerlines: continue
import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import tensorflow_docs as tfdocs import tensorflow_docs.plots import tensorflow_docs.modeling #%% connect to db and get data select_x = [ 'population', 'revenue_construction', 'revenue_manufacturing', 'tourism_guest_nights', 'area_non_agriculture' ] # engine = create_engine("postgresql://dorowiemann@localhost:5432/power_maps") id_dict = get_ids() # usage: id_dict['country']['Croatia'] data_df = get_data( select_x, id_dict ) # returns a dataframe with features as in select_x (list) and features per capita. Energy per person in kWh # %% define model input model_input = [] #extract month # data_df['month'] = 0 # for i in range(len(data_df)): # data_df['month'][i] = data_df['date'][i].month for i in range(len(select_x)): if select_x[i] != 'population': model_input.append(select_x[i] + '_per_person') # model_input.append('month') # %% # sns.pairplot(data_df[["energy_per_person","revenue_construction_per_person", "revenue_manufacturing_per_person", "tourism_guest_nights_per_person", "area_non_agriculture_per_person"]])
register_matplotlib_converters() import matplotlib.pyplot as plt import datetime from sklearn.linear_model import LinearRegression import sklearn.model_selection as model_selection from sklearn.metrics import mean_squared_error, r2_score from sklearn.decomposition import PCA from sqlalchemy import create_engine from pm_helper import get_ids, get_data select_x = ['population','area_agriculture','revenue_manufacturing','revenue_construction','tourism_guest_nights'] # %% connect and get data engine = create_engine("postgresql://dorowiemann@localhost:5432/power_maps") id_dict = get_ids() data_df = get_data(select_x, id_dict) # %% get total area of provinces sql = 'select value, province_id from data where feature_id = ' +str(id_dict['feature']['area'])+ ' and spatial_resolution = \'province\';' total_area_df = pd.read_sql_query(sql, engine) total_area_dict = total_area_df.set_index('province_id').T.to_dict('index') total_area_dict = total_area_dict['value'] #%% get non-agricultural area data_df['non-agricultural_area'] = 0 for i in range(len(data_df)): prov = data_df['province_id'][i] data_df['non-agricultural_area'][i] = total_area_dict[prov] - data_df['area_agriculture_value'][i] data_df['non-agricultural_area_per_person'] = data_df['non-agricultural_area']/data_df['population_value']