def extract(): """ Extract all data from the respective web sources. """ from lobbyfacts.data import etl_engine engine = etl_engine() from lobbyfacts.data.extract.reginterests import extract extract(engine) from lobbyfacts.data.extract.regaccredit import extract extract(engine)
def load(): """ Load the data from ETL into the production database. """ from lobbyfacts.data import etl_engine engine = etl_engine() from lobbyfacts.data.load.common import load load(engine) from lobbyfacts.data.load.reginterests import load load(engine) from lobbyfacts.model import update_index update_index()
def transform(): """ Apply simple data transformation and cleansing operations. """ from lobbyfacts.data import etl_engine engine = etl_engine() from lobbyfacts.data.transform.categories import transform transform(engine) #from lobbyfacts.data.transform.pbnetworks import transform #transform(engine) from lobbyfacts.data.transform.names import transform transform(engine) #from lobbyfacts.data.transform.dedup import transform #transform(engine) from lobbyfacts.data.transform.geocode import transform transform(engine)
def extract(): """ Extract all data from the respective web sources. """ from lobbyfacts.data import etl_engine engine = etl_engine() from lobbyfacts.data.extract.reginterests import extract extract(engine) from lobbyfacts.data.extract.regaccredit import extract extract(engine) #from lobbyfacts.data.extract.regexpert import extract #extract(engine) from lobbyfacts.data.extract.unreginterest import extract extract(engine) from lobbyfacts.data.extract.tag import extract extract(engine) from lobbyfacts.data.extract.meetings import extract extract(engine)
if out is None: d['etl_clean'] = False else: d[out_column] = out except ValueError, ve: d['etl_clean'] = False sl.upsert(engine, table, d, [source_column]) def transform(engine): countries_func = lambda v, c: country_by_name(v).get('iso2') map_names(countries_func, engine, 'representative', 'contact_country', 'country_code') map_names(countries_func, engine, 'country_of_member', 'country', 'country_code') map_names(countries_func, engine, 'expertgroup_member_country', 'country', 'country_code') names_func = lambda v, c: canonical(DATASET, v, context=c) map_names(names_func, engine, 'representative') #map_names(names_func, engine, 'person') map_names(names_func, engine, 'financial_data_turnover') map_names(names_func, engine, 'organisation') map_names(names_func, engine, 'network_entity') map_names(names_func, engine, 'expertgroup_member') if __name__ == '__main__': engine = etl_engine() transform(engine)