Exemple #1
0
def extract():
    """ Extract all data from the respective web sources. """
    from lobbyfacts.data import etl_engine
    engine = etl_engine()
    from lobbyfacts.data.extract.reginterests import extract
    extract(engine)
    from lobbyfacts.data.extract.regaccredit import extract
    extract(engine)
Exemple #2
0
def load():
    """ Load the data from ETL into the production database. """
    from lobbyfacts.data import etl_engine
    engine = etl_engine()
    from lobbyfacts.data.load.common import load
    load(engine)
    from lobbyfacts.data.load.reginterests import load
    load(engine)
    from lobbyfacts.model import update_index
    update_index()
Exemple #3
0
def transform():
    """ Apply simple data transformation and cleansing operations. """
    from lobbyfacts.data import etl_engine
    engine = etl_engine()
    from lobbyfacts.data.transform.categories import transform
    transform(engine)
    #from lobbyfacts.data.transform.pbnetworks import transform
    #transform(engine)
    from lobbyfacts.data.transform.names import transform
    transform(engine)
    #from lobbyfacts.data.transform.dedup import transform
    #transform(engine)
    from lobbyfacts.data.transform.geocode import transform
    transform(engine)
Exemple #4
0
def extract():
    """ Extract all data from the respective web sources. """
    from lobbyfacts.data import etl_engine
    engine = etl_engine()
    from lobbyfacts.data.extract.reginterests import extract
    extract(engine)
    from lobbyfacts.data.extract.regaccredit import extract
    extract(engine)
    #from lobbyfacts.data.extract.regexpert import extract
    #extract(engine)
    from lobbyfacts.data.extract.unreginterest import extract
    extract(engine)
    from lobbyfacts.data.extract.tag import extract
    extract(engine)
    from lobbyfacts.data.extract.meetings import extract
    extract(engine)
Exemple #5
0
            if out is None:
                d['etl_clean'] = False
            else:
                d[out_column] = out
        except ValueError, ve:
            d['etl_clean'] = False
        sl.upsert(engine, table, d, [source_column])

def transform(engine):
    countries_func = lambda v, c: country_by_name(v).get('iso2')
    map_names(countries_func, engine, 'representative',
            'contact_country', 'country_code')
    map_names(countries_func, engine, 'country_of_member',
            'country', 'country_code')
    map_names(countries_func, engine, 'expertgroup_member_country',
            'country', 'country_code')

    names_func = lambda v, c: canonical(DATASET, v, context=c)
    map_names(names_func, engine, 'representative')
    #map_names(names_func, engine, 'person')
    map_names(names_func, engine, 'financial_data_turnover')
    map_names(names_func, engine, 'organisation')
    map_names(names_func, engine, 'network_entity')
    map_names(names_func, engine, 'expertgroup_member')


if __name__ == '__main__':
    engine = etl_engine()
    transform(engine)