def get_data(test, sql=False, query='throughput_gas_monthly.sql'): csvName = query.split(".")[0]+'.csv' if sql: print('reading sql '+query.split(".")[0]) df = execute_sql(path=os.path.join(script_dir, "/queries"), query_name=query, db='EnergyData') df.to_csv('raw_data/'+csvName, index=False) elif test: print('reading test '+query.split(".")[0]) df = pd.read_csv('raw_data/test_data/'+csvName) else: print('reading local '+query.split(".")[0]) df = pd.read_csv('raw_data/'+csvName, encoding='latin-1') # inital processing for key points if query == 'key_points.sql': # add extra key points that dont appear in database new = range(5) others = pd.DataFrame.from_dict({"Key Point": ["Calgary", "Edmonton", "Saturn", "OSDA Kirby", "OSDA Liege"], "Corporate Entity": ["NOVA Gas Transmission Ltd." for x in new], "Latitude": [51.22022, 51.80478, 55.99558, 53.31907, 56.9473], "Longitude": [-114.4925, -113.59329, -121.1104, -111.35386, -111.80979]}) df = fixKeyPoint(df) df = df.append(others, ignore_index=True) df = normalize_text(df, ['Key Point', 'Corporate Entity']) df = normalize_numeric(df, ['Latitude', 'Longitude'], 3) df = fixCorporateEntity(df) df = df[df['Key Point'] != "FortisBC Lower Mainland"] return df
def orca_regdocs_links(sql=False): if sql: df = execute_sql(script_dir, 'projects_regdocs.sql') df.to_csv('raw_data/projects_regdocs.csv', index=False) else: df = pd.read_csv('raw_data/projects_regdocs.csv') return df
def getSql(sql=False, query='projects_regdocs.sql'): csvName = query.replace(".sql", ".csv") if sql: df = execute_sql(os.path.join(script_dir, "queries"), query) df.to_csv('raw_data/'+csvName, index=False) else: df = pd.read_csv('raw_data/'+csvName) return df
def get_sql(sql=False, query='projects_regdocs.sql'): csv_name = query.replace(".sql", ".csv") if sql: print('reading sql ' + query) df = execute_sql(os.path.join(os.getcwd(), "queries"), query) df.to_csv('raw_data/' + csv_name, index=False) else: print('reading local csv ' + csv_name) df = pd.read_csv('raw_data/' + csv_name) return df
def get_traffic_data(sql=False, query='throughput_gas_monthly.sql', db="PipelineInformation"): csv_name = query.split(".")[0] + '.csv' if sql: print('reading sql ' + query.split(".")[0]) df = execute_sql(path=os.path.join(os.getcwd(), "queries"), query_name=query, db=db) df.to_csv('raw_data/' + csv_name, index=False) else: print('reading local ' + query.split(".")[0]) df = pd.read_csv('raw_data/' + csv_name, encoding='utf-8') # inital processing for key points if query == 'key_points.sql': df = normalize_text(df, ['Key Point', 'Pipeline Name']) df = normalize_numeric(df, ['Latitude', 'Longitude'], 3) return df