Ejemplo n.º 1
0
def get_data(test, sql=False, query='throughput_gas_monthly.sql'):

    csvName = query.split(".")[0]+'.csv'
    if sql:
        print('reading sql '+query.split(".")[0])
        df = execute_sql(path=os.path.join(script_dir, "/queries"), query_name=query, db='EnergyData')
        df.to_csv('raw_data/'+csvName, index=False)
    elif test:
        print('reading test '+query.split(".")[0])
        df = pd.read_csv('raw_data/test_data/'+csvName)
    else:
        print('reading local '+query.split(".")[0])
        df = pd.read_csv('raw_data/'+csvName, encoding='latin-1')

    # inital processing for key points
    if query == 'key_points.sql':
        # add extra key points that dont appear in database
        new = range(5)
        others = pd.DataFrame.from_dict({"Key Point": ["Calgary", "Edmonton", "Saturn", "OSDA Kirby", "OSDA Liege"],
                                         "Corporate Entity": ["NOVA Gas Transmission Ltd." for x in new],
                                         "Latitude": [51.22022, 51.80478, 55.99558, 53.31907, 56.9473],
                                         "Longitude": [-114.4925, -113.59329, -121.1104, -111.35386, -111.80979]})
        df = fixKeyPoint(df)
        df = df.append(others, ignore_index=True)
        df = normalize_text(df, ['Key Point', 'Corporate Entity'])
        df = normalize_numeric(df, ['Latitude', 'Longitude'], 3)
        df = fixCorporateEntity(df)
        df = df[df['Key Point'] != "FortisBC Lower Mainland"]

    return df
Ejemplo n.º 2
0
def orca_regdocs_links(sql=False):
    if sql:
        df = execute_sql(script_dir, 'projects_regdocs.sql')
        df.to_csv('raw_data/projects_regdocs.csv', index=False)
    else:
        df = pd.read_csv('raw_data/projects_regdocs.csv')
    return df
Ejemplo n.º 3
0
def getSql(sql=False, query='projects_regdocs.sql'):
    csvName = query.replace(".sql", ".csv")
    if sql:
        df = execute_sql(os.path.join(script_dir, "queries"), query)
        df.to_csv('raw_data/'+csvName, index=False)
    else:
        df = pd.read_csv('raw_data/'+csvName)
    return df
Ejemplo n.º 4
0
def get_sql(sql=False, query='projects_regdocs.sql'):
    csv_name = query.replace(".sql", ".csv")
    if sql:
        print('reading sql ' + query)
        df = execute_sql(os.path.join(os.getcwd(), "queries"), query)
        df.to_csv('raw_data/' + csv_name, index=False)
    else:
        print('reading local csv ' + csv_name)
        df = pd.read_csv('raw_data/' + csv_name)
    return df
Ejemplo n.º 5
0
def get_traffic_data(sql=False,
                     query='throughput_gas_monthly.sql',
                     db="PipelineInformation"):

    csv_name = query.split(".")[0] + '.csv'
    if sql:
        print('reading sql ' + query.split(".")[0])
        df = execute_sql(path=os.path.join(os.getcwd(), "queries"),
                         query_name=query,
                         db=db)
        df.to_csv('raw_data/' + csv_name, index=False)

    else:
        print('reading local ' + query.split(".")[0])
        df = pd.read_csv('raw_data/' + csv_name, encoding='utf-8')

    # inital processing for key points
    if query == 'key_points.sql':
        df = normalize_text(df, ['Key Point', 'Pipeline Name'])
        df = normalize_numeric(df, ['Latitude', 'Longitude'], 3)

    return df