コード例 #1
0
def load_finances(financialData, childBase, engine):
    etlId = '%s//%s' % (financialData['start_date'].isoformat(),
                        financialData['end_date'].isoformat())

    financial_sources = \
        [(s, 'other') for s in financialData.pop("other_customized")] + \
        [(s, 'public') for s in financialData.pop("public_customized")]
    for financial_source, type_ in financial_sources:
        financial_source['type'] = type_
        financial_source['financial_data_etl_id'] = etlId
        financial_source.update(childBase)
        sl.upsert(engine, sl.get_table(engine, 'financial_data_custom_source'),
                  financial_source, ['representative_etl_id',
                      'financial_data_etl_id', 'type', 'name'])

    for turnover in financialData.pop("turnover_breakdown"):
        turnover['financial_data_etl_id'] = etlId
        turnover['name'] = turnover['name'].strip()
        turnover.update(childBase)
        sl.upsert(engine, sl.get_table(engine, 'financial_data_turnover'),
                  turnover, ['representative_etl_id', 'financial_data_etl_id',
                             'name'])

    financialData['etl_id'] = etlId
    financialData.update(childBase)
    sl.upsert(engine, sl.get_table(engine, 'financial_data'),
              financialData, ['representative_etl_id', 'etl_id'])
コード例 #2
0
ファイル: unreginterest.py プロジェクト: stef/lobbyfacts
def load_rep(line, engine, unregtag):
    rep={}
    rep['original_name'] = line[0].strip()
    rep['name'] = line[0].strip()
    rep['identification_code'] = line[1] or hashlib.sha512(line[0].strip()).hexdigest()[:16]
    rep['etl_id'] = "%s//ALL" % rep['identification_code']
    rep['web_site_url'] = line[2] or ''

    if line[3].strip():
        rep['contact_street'] = line[3]
    if line[4].strip():
        tmp=line[4].split()
        if tmp[0][0] == 'B':
            rep['contact_country'] = 'Belgium'
        elif tmp[0][0] == 'F':
            rep['contact_country'] = 'France'
        else:
            print 'bad zipcode country code', line[4]

        rep['contact_post_code'] = tmp[0][2:]
        rep['contact_town'] = ' '.join(tmp[1:])

    rep['network_extracted'] = False
    sl.upsert(engine, sl.get_table(engine, 'representative'), rep,
              ['etl_id'])

    inserted=sl.find_one(engine,sl.get_table(engine, 'representative'),**rep)
    if inserted:
        sl.upsert(engine, sl.get_table(engine, 'tags'),
                  {'representative_id': inserted['id'], 'tag_id': unregtag['id']},
                  ['representative_id', 'tag_id'])
コード例 #3
0
ファイル: reginterests.py プロジェクト: stef/lobbyfacts
def load_finances(financialData, childBase, engine):
    if financialData == {}:
        return
    etlId = "%s//%s" % (financialData["start_date"].isoformat(), financialData["end_date"].isoformat())

    financial_sources = [(s, "other") for s in financialData.pop("other_customized")] + [
        (s, "public") for s in financialData.pop("public_customized")
    ]
    for financial_source, type_ in financial_sources:
        financial_source["type"] = type_
        financial_source["financial_data_etl_id"] = etlId
        financial_source.update(childBase)
        sl.upsert(
            engine,
            sl.get_table(engine, "financial_data_custom_source"),
            financial_source,
            ["representative_etl_id", "financial_data_etl_id", "type", "name"],
        )

    for turnover in financialData.pop("turnover_breakdown"):
        turnover["financial_data_etl_id"] = etlId
        turnover["name"] = turnover["name"].strip()
        turnover.update(childBase)
        sl.upsert(
            engine,
            sl.get_table(engine, "financial_data_turnover"),
            turnover,
            ["representative_etl_id", "financial_data_etl_id", "name"],
        )

    financialData["etl_id"] = etlId
    financialData.update(childBase)
    sl.upsert(engine, sl.get_table(engine, "financial_data"), financialData, ["representative_etl_id", "etl_id"])
コード例 #4
0
ファイル: unreginterest.py プロジェクト: stef/lobbyfacts
def extract_data(engine):
    log.info("Extracting unregistered interests data...")
    taglabel='situation:unregistered'
    unregtag={'tag': taglabel}
    sl.upsert(engine, sl.get_table(engine, 'tag'), unregtag, ['tag'])
    unregtag=sl.find_one(engine,sl.get_table(engine, 'tag'),tag=taglabel)

    with app.open_resource('resources/unregistered-companies.csv') as csvfile:
        csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for i, rep in enumerate(csvreader):
            load_rep(rep, engine, unregtag)
            if i % 100 == 0:
                log.info("Extracted: %s...", i)
コード例 #5
0
ファイル: reginterests.py プロジェクト: stef/lobbyfacts
def load(engine):
    for i, rep in enumerate(sl.all(engine, sl.get_table(engine, 'representative'))):
        log.info("Loading(%s): %s", i, rep.get('name'))
        #if rep['etl_clean'] is False:
        #    log.debug("Skipping!")
        #    continue
        load_representative(engine, rep)
コード例 #6
0
def create_tasks(engine):
    log.info("Updating tasks on pyBossa...")
    app = setup()
    with flask_app.open_resource('resources/pbnetworks_template.html') as f:
        app.info['task_presenter'] = f.read()
        pbclient.update_app(app)
    tasks = pbclient.get_tasks(app.id, limit=30000)
    existing = dict([(t.data.get('info').get('signature'), t) for t in tasks])
    for rep in sl.all(engine, sl.get_table(engine, 'representative')):
        networking = rep.get('networking')
        if networking is None or len(networking.strip()) < 3:
            continue
        signature = rep.get('identification_code') + networking
        signature = sha1(signature.encode('ascii', 'ignore')).hexdigest()
        rep['signature'] = signature
        print [rep.get('name')]
        log.debug("Task: %s", rep['name'])
        rep['last_update_date'] = rep['last_update_date'].isoformat()
        rep['registration_date'] = rep['registration_date'].isoformat()
        #print [(k, type(v)) for k,v in rep.items()]
        if signature in existing:
            task = existing.get(signature)
            task.data['info'] = rep
            pbclient.update_task(task)
        else:
            pbclient.create_task(app.id, rep)
コード例 #7
0
ファイル: reginterests.py プロジェクト: stef/lobbyfacts
def load_person(person, role, childBase, engine):
    table = sl.get_table(engine, "person")
    person_ = childBase.copy()
    person_.update(person)
    person_["role"] = role
    person_["name"] = " ".join((person["title"] or "", person["first_name"] or "", person["last_name"] or ""))
    sl.upsert(engine, table, person_, ["representative_etl_id", "role", "name"])
コード例 #8
0
ファイル: reginterests.py プロジェクト: stef/lobbyfacts
def load_contact(contact, childBase, engine):
    if contact == {}:
        return
    table = sl.get_table(engine, "contact")
    contact_ = childBase.copy()
    contact_.update(contact)
    sl.upsert(engine, table, contact_, ["representative_etl_id", "country", "type"])
コード例 #9
0
ファイル: categories.py プロジェクト: tttp/lobbyfacts
def code_categories(engine):
    table = sl.get_table(engine, 'representative')
    for cat in sl.distinct(engine, table, 'main_category'):
        if not cat['main_category']: continue
        c=newcats.get(cat['main_category'],cat['main_category'])
        cat['main_category_id'] = CATEGORIES[c]
        sl.upsert(engine, table, cat, ['main_category'])
コード例 #10
0
ファイル: geocode.py プロジェクト: tttp/lobbyfacts
def transform(engine):
    log.info("Geo-coding representatives...")
    table = sl.get_table(engine, 'representative')
    for row in sl.all(engine, table):
        out = {'id': row['id']}
        if row.get('contact_lon'):
            continue
        query = {
            'format': 'json',
            'limit': 1,
            'city': row.get('contact_town'),
            'street': row.get('contact_street'),
            'country': row.get('contact_country'),
            'postalcode': row.get('contact_post_code')
            }
        response = requests.get(URL, params=query)
        try:
            json = response.json()
        except: continue
        if json and len(json):
            geo = json[0]
            log.info("%s @ %s", row.get('name'), geo.get('display_name'))
            out['contact_geoname'] = geo.get('display_name')
            out['contact_lon'] = geo.get('lon')
            out['contact_lat'] = geo.get('lat')
            sl.upsert(engine, table, out, ['id'])
コード例 #11
0
def load(engine):
    for rep in sl.all(engine, sl.get_table(engine, 'representative')):
        log.info("Loading: %s", rep.get('name'))
        if rep['etl_clean'] is False:
            log.debug("Skipping!")
            continue
        load_representative(engine, rep)
コード例 #12
0
ファイル: regexpert.py プロジェクト: erikwesselius/lobbyfacts
 def _s(data):
     if 'subgroup_status' in data:
         del data['subgroup_status']
     for policy_area in data.pop('policy_area', []):
         sl.upsert(engine, sl.get_table(engine, 'expertgroup_member_policy_area'),
                 {'expertgroup_etl_id': etlId, 'member': data['name'],
                  'policy_area': policy_area, 'subgroup': data['subgroup']},
                 ['expertgroup_etl_id', 'policy_area', 'member', 'subgroup'])
     for country in data.pop('countries/area_represented',
         data.pop('countries/areas_represented', [])):
         sl.upsert(engine, sl.get_table(engine, 'expertgroup_member_country'),
                 {'expertgroup_etl_id': etlId, 'member': data['name'],
                   'country': country, 'subgroup': data['subgroup']},
                   ['expertgroup_etl_id', 'country', 'member', 'subgroup'])
     data['expertgroup_etl_id'] = etlId
     sl.upsert(engine, sl.get_table(engine, 'expertgroup_member'),
         data, ['expertgroup_etl_id', 'name', 'subgroup'])
コード例 #13
0
def save(person, engine):
    table = sl.get_table(engine, 'person')
    orgs = list(sl.find(engine, sl.get_table(engine, 'representative'),
                   identification_code=person['org_identification_code']))
    if len(orgs):
        org = max(orgs, key=lambda o: o['last_update_date'])
        person['representative_etl_id'] = org['etl_id']
        person['role'] = 'accredited'
        name = '%s %s %s' % (person['title'] or '',
                             person['first_name'] or '',
                             person['last_name'] or '')
        person['name'] = name.strip()
        log.debug("Accreditation: %s", name)
        sl.upsert(engine, table, person,
            ['representative_etl_id', 'role', 'name'])
    else:
        log.warn("Cannot associate with a registered interest: %r", person)
コード例 #14
0
ファイル: tag.py プロジェクト: stef/lobbyfacts
def load_tag(rec, engine):
    tags=[]
    for tag in rec['tags']:
        sl.upsert(engine, sl.get_table(engine, 'tag'), {'tag': tag} , ['tag'])
        tags.append(sl.find_one(engine,sl.get_table(engine, 'tag'),tag=tag))
    if rec['id']:
        rep=sl.find_one(engine,sl.get_table(engine, 'representative'), identification_code=rec['id'])
    else:
        rep=sl.find_one(engine,sl.get_table(engine, 'representative'), original_name=rec['name'])
    if not rep:
        print >>sys.stderr, "couldn't find", rec['id'] or rec['name'].encode('utf8')
        return
    for tag in tags:
        sl.upsert(engine, sl.get_table(engine, 'tags'),
                  {'representative_id': rep['id'],
                   'tag_id': tag['id']},
                  ['representative_id', 'tag_id'])
    return
コード例 #15
0
ファイル: meetings.py プロジェクト: tttp/lobbyfacts
def extract(engine):
    table = sl.get_table(engine, 'meeting')

    i=0
    for title, url in uuids:
        for meeting in scrape(url, title):
            sl.upsert(engine, table, meeting, ['meetid', 'identification_code'])
            i+=1
            if i % 100 == 0:
                log.info("Extracted: %s...", i)
コード例 #16
0
ファイル: reginterests.py プロジェクト: stef/lobbyfacts
def load_rep(rep, engine):
    # etlId = rep['etlId'] = "%s//%s" % (rep['identificationCode'],
    #                                   rep['lastUpdateDate'].isoformat())
    etlId = rep["etl_id"] = "%s//ALL" % rep["identification_code"]
    childBase = {
        "representative_etl_id": etlId,
        "representative_update_date": rep["last_update_date"],
        "status": "active",
    }
    if not rep["original_name"]:
        log.error("Unnamed representative: %r", rep)
        return

    load_contact(rep.pop("head_contact", {}), childBase, engine)
    load_contact(rep.pop("be_contact", {}), childBase, engine)

    load_person(rep.pop("legal_person"), "legal", childBase, engine)
    load_person(rep.pop("head_person"), "head", childBase, engine)
    for actionField in rep.pop("action_fields"):
        rec = childBase.copy()
        rec["action_field"] = actionField
        sl.upsert(engine, sl.get_table(engine, "action_field"), rec, ["representative_etl_id", "action_field"])

    for interest in rep.pop("interests"):
        rec = childBase.copy()
        rec["interest"] = interest
        sl.upsert(engine, sl.get_table(engine, "interest"), rec, ["representative_etl_id", "interest"])

    for countryOfMember in rep.pop("country_of_members"):
        rec = childBase.copy()
        rec["country"] = countryOfMember
        sl.upsert(engine, sl.get_table(engine, "country_of_member"), rec, ["representative_etl_id", "country"])

    for organisation in rep.pop("organisations"):
        rec = childBase.copy()
        rec.update(organisation)
        rec["name"] = organisation["name"].strip()
        sl.upsert(engine, sl.get_table(engine, "organisation"), rec, ["representative_etl_id", "name"])

    load_finances(rep.pop("fd"), childBase, engine)
    rep["name"] = rep["original_name"].strip()
    rep["network_extracted"] = False
    sl.upsert(engine, sl.get_table(engine, "representative"), rep, ["etl_id"])
コード例 #17
0
ファイル: reginterests.py プロジェクト: tttp/lobbyfacts
def load_rep(rep, engine):
    #etlId = rep['etlId'] = "%s//%s" % (rep['identificationCode'],
    #                                   rep['lastUpdateDate'].isoformat())
    etlId = rep['etl_id'] = "%s//ALL" % rep['identification_code']
    childBase = {'representative_etl_id': etlId,
                 'representative_update_date': rep['last_update_date'],
                 'status': 'active'}
    if not rep['original_name']:
        log.error("Unnamed representative: %r", rep)
        return
    load_person(rep.pop('legal_person'), 'legal', childBase, engine)
    load_person(rep.pop('head_person'), 'head', childBase, engine)
    for actionField in rep.pop('action_fields'):
        rec = childBase.copy()
        rec['action_field'] = actionField
        sl.upsert(engine, sl.get_table(engine, 'action_field'), rec,
                  ['representative_etl_id', 'action_field'])

    for interest in rep.pop('interests'):
        rec = childBase.copy()
        rec['interest'] = interest
        sl.upsert(engine, sl.get_table(engine, 'interest'), rec,
                  ['representative_etl_id', 'interest'])

    for countryOfMember in rep.pop('country_of_members'):
        rec = childBase.copy()
        rec['country'] = countryOfMember
        sl.upsert(engine, sl.get_table(engine, 'country_of_member'), rec,
                  ['representative_etl_id', 'country'])

    for organisation in rep.pop('organisations'):
        rec = childBase.copy()
        rec.update(organisation)
        rec['name'] = organisation['name'].strip()
        sl.upsert(engine, sl.get_table(engine, 'organisation'), rec,
                  ['representative_etl_id', 'name'])

    load_finances(rep.pop('fd'), childBase, engine)
    rep['name'] = rep['original_name'].strip()
    rep['network_extracted'] = False
    sl.upsert(engine, sl.get_table(engine, 'representative'), rep,
              ['etl_id'])
コード例 #18
0
ファイル: reginterests.py プロジェクト: tttp/lobbyfacts
def load_person(person, role, childBase, engine):
    table = sl.get_table(engine, 'person')
    person_ = childBase.copy()
    person_.update(person)
    person_['role'] = role
    person_['name'] = ' '.join((person['title'] or '',
                                person['first_name'] or '',
                                person['last_name'] or ''))
    sl.upsert(engine, table, person_, ['representative_etl_id',
                                       'role',
                                       'name'])
コード例 #19
0
ファイル: dedup.py プロジェクト: erikwesselius/lobbyfacts
def dedup_fields(engine, field):
    table = sl.get_table(engine, 'representative')
    for rep in sl.all(engine, table):
        others = list(sl.find(engine, table, **{field: rep[field]}))
        if len(others) > 1:
            log.info("Duplicates for: %s", rep['name'])
            for i, re in enumerate(others):
                text = "(Duplicate %s)" % (i+1)
                sl.upsert(engine, table,
                    {'name_suffix': text,
                     'identification_code': re['identification_code']},
                    ['identification_code'])
コード例 #20
0
ファイル: meetings.py プロジェクト: stef/lobbyfacts
def extract(engine):
    table = sl.get_table(engine, 'meeting')
    try:
        sl.update(engine, 'meeting', {}, {'status': 'inactive'}, ensure=False)
        sl.update(engine, 'meeting_participants', {}, {'status': 'inactive'}, ensure=False)
    except sqlalchemy.exc.CompileError:
        pass

    i=0
    for url, org, title in get_urls():
        for meeting in scrape(url, title, org):
            sl.upsert(engine, table, meeting, ['meetid', 'identification_code'])
            i+=1
            if i % 100 == 0:
                log.info("Extracted: %s...", i)
コード例 #21
0
def fetch_taskruns(engine):
    log.info("Fetching responses from pyBossa...")
    net = sl.get_table(engine, 'network_entity')
    app = setup()
    results = defaultdict(list)
    for taskrun in _iterate(pbclient.find_taskruns, app_id=app.id):
        results[taskrun.info.get('etl_id')].extend(taskrun.info.get('matches'))
    for etl_id, matches in results.items():
        uniques = defaultdict(list)
        for m in matches: 
            uniques[m.strip().lower()].append(m)
        for vs in uniques.values():
            if not len(vs) >= QUORUM:
                continue
            sl.upsert(engine, net, {'etl_id': etl_id, 'name': vs[0].strip()},
                      ['etl_id', 'name'])
コード例 #22
0
ファイル: dedup.py プロジェクト: stef/lobbyfacts
def dedup_fields(engine, field):
    table = sl.get_table(engine, 'representative')
    seen=set([])
    for n, rep in enumerate(sl.all(engine, table)):
        if n % 100 == 0:
            print n, 'done'
        if not rep[field] or not rep[field].strip() or rep[field] in seen: continue
        seen.update(rep[field])
        others = list(sl.find(engine, table, **{field: rep[field]}))
        if len(others) > 1:
            log.info("Duplicates for: %s", rep['name'])
            for i, re in enumerate(others):
                if re == rep: continue
                text = "(Duplicate %s)" % (i+1)
                sl.upsert(engine, table,
                    {'name_suffix': text,
                     'identification_code': re['identification_code']},
                    ['identification_code'])
コード例 #23
0
ファイル: names.py プロジェクト: erikwesselius/lobbyfacts
def map_names(map_func, engine, table_name, source_column='name',
        out_column='canonical_name'):
    table = sl.get_table(engine, table_name)
    seen_values = set()
    log.info("Normalising names on '%s', column '%s'...", table_name,
             source_column)
    for row in sl.find(engine, table):
        value = row.get(source_column)
        if value in seen_values:
            continue
        seen_values.add(value)
        d = {source_column: value, 'etl_clean': True,
             out_column: None}
        try:
            out = map_func(value, row)
            if out is None:
                d['etl_clean'] = False
            else:
                d[out_column] = out
        except ValueError, ve:
            d['etl_clean'] = False
        sl.upsert(engine, table, d, [source_column])
コード例 #24
0
ファイル: regexpert.py プロジェクト: erikwesselius/lobbyfacts
def save(engine, group):
    #etlId = "%s//%s" % (group['identifier'], group['last_updated'])
    etlId = "%s//ALL" % group['identifier']
    for policy_area in group.pop('policy_area', []):
        sl.upsert(engine, sl.get_table(engine, 'expertgroup_policy_area'),
                  {'expertgroup_etl_id': etlId, 'policy_area': policy_area},
                  ['expertgroup_etl_id', 'policy_area'])
    for task in group.pop('task', []):
        sl.upsert(engine, sl.get_table(engine, 'expertgroup_task'),
                  {'expertgroup_etl_id': etlId, 'task': task},
                  ['expertgroup_etl_id', 'task'])
    for composition in group.pop('composition', []):
        sl.upsert(engine, sl.get_table(engine, 'expertgroup_composition'),
                  {'expertgroup_etl_id': etlId, 'composition': composition},
                  ['expertgroup_etl_id', 'composition'])
    for associated_dg in group.pop('associated_dg', []):
        sl.upsert(engine, sl.get_table(engine, 'expertgroup_directorate'),
                  {'expertgroup_etl_id': etlId, 'directorate': associated_dg},
                  ['expertgroup_etl_id', 'directorate'])
    for lead_dg in group.pop('lead_dg', []):
        sl.upsert(engine, sl.get_table(engine, 'expertgroup_directorate'),
                  {'expertgroup_etl_id': etlId, 'directorate': lead_dg, 'lead': True},
                  ['expertgroup_etl_id', 'directorate'])
    for member in group.pop('members'):
        save_member(engine, etlId, member)

    for subgroup in group.pop('subgroups'):
        subgroup['expertgroup_etl_id'] = etlId
        for member in subgroup.pop('members'):
            member['subgroup'] = subgroup['name']
            save_member(engine, etlId, member)
        sl.upsert(engine, sl.get_table(engine, 'expertgroup_subgroup'),
                  subgroup, ['expertgroup_etl_id', 'name'])
    void = group.pop('additional_info')

    group['etl_id'] = etlId
    group.pop('link_to_website', '')
    sl.upsert(engine, sl.get_table(engine, 'expertgroup'),
              group, ['etl_id'])
コード例 #25
0
ファイル: meetings.py プロジェクト: stef/lobbyfacts
def load(engine):
    for i, meet in enumerate(sl.all(engine, sl.get_table(engine, 'meeting'))):
        log.info("Loading(%s): %s", i, meet.get('name'))
        load_meeting(engine, meet)
コード例 #26
0
ファイル: reginterests.py プロジェクト: stef/lobbyfacts
def load_representative(engine, rep):
    entity = upsert_entity(rep.get('canonical_name'),
                name=rep.get('original_name'),
                suffix=rep.get('name_suffix'),
                acronym=rep.get('acronym'))
    assert entity is not None, entity
    assert entity.id is not None, entity
    rep['entity'] = entity
    rep['members_25'] = to_integer(rep['members_25'])
    rep['members_50'] = to_integer(rep['members_50'])
    rep['members_75'] = to_integer(rep['members_75'])
    rep['members_100'] = to_integer(rep['members_100'])
    rep['members_fte'] = to_float(rep['members_fte'])
    rep['members'] = to_integer(rep['members'])
    rep['number_of_natural_persons'] = to_integer(rep['number_of_natural_persons'])

    if rep.get('main_category'):
        main_category = upsert_category(rep.get('main_category_id'),
                                        rep.get('main_category'))
        rep['main_category'] = main_category
        if rep.get('sub_category'):
            rep['sub_category'] = upsert_category(rep.get('sub_category_id'),
                                                  rep.get('sub_category'),
                                                  main_category)

    accreditations = []
    for person_data in sl.find(engine, sl.get_table(engine, 'person'),
            representative_etl_id=rep['etl_id']):
        person = upsert_person(person_data)
        if person_data.get('role') == 'head':
            rep['head'] = person
        if person_data.get('role') == 'legal':
            rep['legal'] = person
        if person_data.get('role') == 'accredited':
            accreditations.append((person, person_data))

    representative = Representative.by_identification_code(rep['identification_code'])
    if representative is None:
        representative = Representative.create(rep)
    else:
        representative.update(rep)

    for contact_data in sl.find(engine, sl.get_table(engine, 'contact'),
            representative_etl_id=rep['etl_id'], status='active'):
        if len([x for x in contact_data.values() if x])<7: continue
        contact_ = {
            'town': contact_data['town'],
            'street': contact_data['street'],
            'post_code': contact_data['post_code'],
            'postbox': contact_data['postbox'],
            'lat': to_float(contact_data['lat']),
            'lon': to_float(contact_data['lon']),
            'phone': " ".join((contact_data.get('indic_phone') or '', contact_data.get('phone') or '')).strip(),
            'country': Country.by_code(contact_data['country_code']),
            }

        if contact_data['type'] == 'head':
            if representative.head_office_id is None:
                contact = Contact.create(contact_)
                representative.head_office=contact
                representative.contact_country=contact.country
            else:
                representative.head_office.update(contact_)
        else:
            if representative.be_office_id is None:
                contact = Contact.create(contact_)
                representative.be_office=contact
            else:
                representative.be_office.update(contact_)

    for person, data_ in accreditations:
        data_['person'] = person
        data_['representative'] = representative
        accreditation = Accreditation.by_rp(person, representative)
        if accreditation is None:
            accreditation = Accreditation.create(data_)
        else:
            accreditation.update(data_)

    for fd in sl.find(engine, sl.get_table(engine, 'financial_data'),
            representative_etl_id=rep['etl_id']):
        fd['turnover_min'] = to_integer(fd.get('turnover_min'))
        fd['turnover_max'] = to_integer(fd.get('turnover_max'))
        fd['turnover_absolute'] = to_integer(fd.get('turnover_absolute'))
        fd['cost_min'] = to_integer(fd.get('cost_min'))
        fd['cost_max'] = to_integer(fd.get('cost_max'))
        fd['cost_absolute'] = to_integer(fd.get('cost_absolute'))
        fd['direct_rep_costs_min'] = to_integer(fd.get('direct_rep_costs_min'))
        fd['direct_rep_costs_max'] = to_integer(fd.get('direct_rep_costs_max'))
        fd['total_budget'] = to_integer(fd.get('total_budget'))
        fd['public_financing_total'] = to_integer(fd.get('public_financing_total'))
        fd['public_financing_infranational'] = to_integer(fd.get('public_financing_infranational'))
        fd['public_financing_national'] = to_integer(fd.get('public_financing_national'))
        fd['eur_sources_grants'] = to_integer(fd.get('eur_sources_grants'))
        fd['eur_sources_procurement'] = to_integer(fd.get('eur_sources_procurement'))
        fd['other_sources_donation'] = to_integer(fd.get('other_sources_donation'))
        fd['other_sources_contributions'] = to_integer(fd.get('other_sources_donation'))
        fd['other_sources_total'] = to_integer(fd.get('other_sources_total'))
        fd['eur_sources_procurement_src'] = fd.get('eur_sources_procurement_src')
        fd['eur_sources_grants_src'] = fd.get('eur_sources_grants_src')
        fd['other_financial_information'] = fd.get('other_financial_information')
        fd['new_organisation'] = fd.get('new_organisation')
        fd['representative'] = representative
        financial_data = FinancialData.by_rsd(representative, fd.get('start_date'))
        if financial_data is None:
            financial_data = FinancialData.create(fd)
        else:
            financial_data.update(fd)

        for src_ in sl.find(engine, sl.get_table(engine, 'financial_data_custom_source'),
                representative_etl_id=rep['etl_id'], financial_data_etl_id=fd['etl_id']):
            src_['financial_data'] = financial_data
            src_['amount'] = to_integer(src_.get('amount'))
            src = CustomIncome.by_fdn(financial_data, src_['name'])
            if src is None:
                src = CustomIncome.create(src_)
            else:
                src.update(src_)

        for turnover_ in sl.find(engine, sl.get_table(engine, 'financial_data_turnover'),
                representative_etl_id=rep['etl_id'], financial_data_etl_id=fd['etl_id']):
            #if turnover_.get('etl_clean') is False:
            #    continue
            turnover_['entity'] = upsert_entity(turnover_.get('canonical_name'),
                                                turnover_.get('name'))
            assert turnover_['entity'] is not None, turnover_['entity']
            turnover_['financial_data'] = financial_data
            turnover_['min'] = to_integer(turnover_.get('min'))
            turnover_['max'] = to_integer(turnover_.get('max'))
            turnover = FinancialTurnover.by_fde(financial_data, turnover_['entity'])
            if turnover is None:
                turnover = FinancialTurnover.create(turnover_)
            else:
                turnover.update(turnover_)

    for org in sl.find(engine, sl.get_table(engine, 'organisation'),
            representative_etl_id=rep['etl_id']):
        #if org.get('etl_clean') is False:
        #    continue
        org['number_of_members'] = to_integer(org['number_of_members'])
        organisation = upsert_organisation(org)
        omdata = {'representative': representative,
                  'status': org.get('status'),
                  'organisation': organisation}
        om = OrganisationMembership.by_rpo(representative, organisation)
        if om is None:
            om = OrganisationMembership.create(omdata)
        else:
            om.update(omdata)

    for country_ in sl.find(engine, sl.get_table(engine, 'country_of_member'),
            representative_etl_id=rep['etl_id']):
        if not country_.get('country_code'): continue
        #if country_.get('etl_clean') is False:
        #    continue
        cdata = {'representative': representative,
                 'status': country_.get('status'),
                 'country': Country.by_code(country_.get('country_code'))}
        cm = CountryMembership.by_rpc(representative, cdata.get('country'))
        if cm is None:
            cm = CountryMembership.create(cdata)
        else:
            cm.update(cdata)

    for action_ in sl.find(engine, sl.get_table(engine, 'action_field'),
            representative_etl_id=rep['etl_id']):
        if not action_.get('action_field'): continue
        af = ActionField.by_action(action_.get('action_field'))
        if af is None:
            af = ActionField.create({'action': action_.get('action_field')})
            db.session.commit()
        adata = {'representative': representative,
                 'status': action_.get('status'),
                 'action': af}

        am = AssociatedAction.by_rpa(representative, af)
        if am is None:
            am = AssociatedAction.create(adata)
            db.session.commit()
        else:
            am.update(adata)

    for interest_ in sl.find(engine, sl.get_table(engine, 'interest'),
            representative_etl_id=rep['etl_id']):
        if not interest_.get('interest'): continue
        i = Interest.by_interest(interest_.get('interest'))
        if i is None:
            i = Interest.create({'interest': interest_.get('interest')})
            db.session.commit()
        adata = {'representative': representative,
                 'status': action_.get('status'),
                 'interest': i}

        ai = AssociatedInterest.by_rpi(representative, i)
        if ai is None:
            ai = AssociatedInterest.create(adata)
            db.session.commit()
        else:
            ai.update(adata)

    for taglink in sl.find(engine, sl.get_table(engine, 'tags'),
            representative_id=rep['id']):
        etltag=sl.find_one(engine, sl.get_table(engine, 'tag'), id=taglink['tag_id'])
        tag = upsert_tag(etltag['tag'])
        if not tag in representative.tags:
            representative.tags.append(tag)
    db.session.commit()
コード例 #27
0
def code_categories(engine):
    table = sl.get_table(engine, 'representative')
    for cat in sl.distinct(engine, table, 'main_category'):
        cat['main_category_id'] = CATEGORIES[cat['main_category']]
        sl.upsert(engine, table, cat, ['main_category'])
コード例 #28
0
ファイル: categories.py プロジェクト: stef/lobbyfacts
def remap_subcategories(engine):
    table = sl.get_table(engine, 'representative')
    for cat in sl.distinct(engine, table, 'sub_category'):
        c=newsubcats.get(cat['sub_category'])
        if c:
           sl.update(engine, 'representative', {'sub_category': cat['sub_category']}, {'sub_category': c}, ensure=False)
コード例 #29
0
def load_representative(engine, rep):
    entity = upsert_entity(rep.get('canonical_name'),
                name=rep.get('original_name'),
                suffix=rep.get('name_suffix'),
                acronym=rep.get('acronym'))
    assert entity is not None, entity
    assert entity.id is not None, entity
    rep['entity'] = entity
    rep['members'] = to_integer(rep['members'])
    rep['number_of_natural_persons'] = to_integer(rep['number_of_natural_persons'])
    rep['number_of_organisations'] = to_integer(rep['number_of_organisations'])

    rep['contact_lat'] = to_float(rep['contact_lat'])
    rep['contact_lon'] = to_float(rep['contact_lon'])

    rep['contact_phone'] = " ".join((rep.get('contact_indic_phone') or '', rep.get('contact_phone') or '')).strip()
    rep['contact_fax'] = " ".join((rep.get('contact_indic_fax') or '', rep.get('contact_fax') or '')).strip()
    rep['contact_country'] = Country.by_code(rep['country_code'])

    main_category = upsert_category(rep.get('main_category_id'),
                                    rep.get('main_category'))
    rep['main_category'] = main_category
    rep['sub_category'] = upsert_category(rep.get('sub_category_id'),
                                          rep.get('sub_category'),
                                          main_category)

    accreditations = []
    for person_data in sl.find(engine, sl.get_table(engine, 'person'),
            representative_etl_id=rep['etl_id']):
        person = upsert_person(person_data)
        if person_data.get('role') == 'head':
            rep['head'] = person
        if person_data.get('role') == 'legal':
            rep['legal'] = person
        if person_data.get('role') == 'accredited':
            accreditations.append((person, person_data))

    representative = Representative.by_identification_code(rep['identification_code'])
    if representative is None:
        representative = Representative.create(rep)
    else:
        representative.update(rep)

    for person, data_ in accreditations:
        data_['person'] = person
        data_['representative'] = representative
        accreditation = Accreditation.by_rp(person, representative)
        if accreditation is None:
            accreditation = Accreditation.create(data_)
        else:
            accreditation.update(data_)

    for fd in sl.find(engine, sl.get_table(engine, 'financial_data'),
            representative_etl_id=rep['etl_id']):
        fd['turnover_min'] = to_integer(fd.get('turnover_min'))
        fd['turnover_max'] = to_integer(fd.get('turnover_max'))
        fd['turnover_absolute'] = to_integer(fd.get('turnover_absolute'))
        fd['cost_min'] = to_integer(fd.get('cost_min'))
        fd['cost_max'] = to_integer(fd.get('cost_max'))
        fd['cost_absolute'] = to_integer(fd.get('cost_absolute'))
        fd['direct_rep_costs_min'] = to_integer(fd.get('direct_rep_costs_min'))
        fd['direct_rep_costs_max'] = to_integer(fd.get('direct_rep_costs_max'))
        fd['total_budget'] = to_integer(fd.get('total_budget'))
        fd['public_financing_total'] = to_integer(fd.get('public_financing_total'))
        fd['public_financing_infranational'] = to_integer(fd.get('public_financing_infranational'))
        fd['public_financing_national'] = to_integer(fd.get('public_financing_national'))
        fd['eur_sources_grants'] = to_integer(fd.get('eur_sources_grants'))
        fd['eur_sources_procurement'] = to_integer(fd.get('eur_sources_procurement'))
        fd['other_sources_donation'] = to_integer(fd.get('other_sources_donation'))
        fd['other_sources_contributions'] = to_integer(fd.get('other_sources_donation'))
        fd['other_sources_total'] = to_integer(fd.get('other_sources_total'))
        fd['representative'] = representative
        financial_data = FinancialData.by_rsd(representative, fd.get('start_date'))
        if financial_data is None:
            financial_data = FinancialData.create(fd)
        else:
            financial_data.update(fd)

        for turnover_ in sl.find(engine, sl.get_table(engine, 'financial_data_turnover'),
                representative_etl_id=rep['etl_id'], financial_data_etl_id=fd['etl_id']):
            if turnover_.get('etl_clean') is False:
                continue
            turnover_['entity'] = upsert_entity(turnover_.get('canonical_name'),
                                                turnover_.get('name'))
            assert turnover_['entity'] is not None, turnover_['entity']
            turnover_['financial_data'] = financial_data
            turnover_['min'] = to_integer(turnover_.get('min'))
            turnover_['max'] = to_integer(turnover_.get('max'))
            turnover = FinancialTurnover.by_fde(financial_data, turnover_['entity'])
            if turnover is None:
                turnover = FinancialTurnover.create(turnover_)
            else:
                turnover.update(turnover_)

    for org in sl.find(engine, sl.get_table(engine, 'organisation'),
            representative_etl_id=rep['etl_id']):
        if org.get('etl_clean') is False:
            continue
        org['number_of_members'] = to_integer(org['number_of_members'])
        organisation = upsert_organisation(org)
        omdata = {'representative': representative, 'organisation': organisation}
        om = OrganisationMembership.by_rpo(representative, organisation)
        if om is None:
            om = OrganisationMembership.create(omdata)
        else:
            om.update(omdata)

    for country_ in sl.find(engine, sl.get_table(engine, 'country_of_member'),
            representative_etl_id=rep['etl_id']):
        if country_.get('etl_clean') is False:
            continue
        cdata = {'representative': representative,
                 'country': Country.by_code(country_.get('country_code'))}
        cm = CountryMembership.by_rpc(representative, cdata.get('country'))
        if cm is None:
            cm = CountryMembership.create(cdata)
        else:
            cm.update(cdata)

    db.session.commit()
コード例 #30
0
ファイル: categories.py プロジェクト: stef/lobbyfacts
def code_subcategories(engine):
    table = sl.get_table(engine, 'representative')
    for cat in sl.distinct(engine, table, 'sub_category'):
        if not cat['sub_category']: continue
        cat['sub_category_id'] = SUBCATEGORIES.get(cat['sub_category'])
        sl.upsert(engine, table, cat, ['sub_category'])