def load_rep(line, engine, unregtag): rep={} rep['original_name'] = line[0].strip() rep['name'] = line[0].strip() rep['identification_code'] = line[1] or hashlib.sha512(line[0].strip()).hexdigest()[:16] rep['etl_id'] = "%s//ALL" % rep['identification_code'] rep['web_site_url'] = line[2] or '' if line[3].strip(): rep['contact_street'] = line[3] if line[4].strip(): tmp=line[4].split() if tmp[0][0] == 'B': rep['contact_country'] = 'Belgium' elif tmp[0][0] == 'F': rep['contact_country'] = 'France' else: print 'bad zipcode country code', line[4] rep['contact_post_code'] = tmp[0][2:] rep['contact_town'] = ' '.join(tmp[1:]) rep['network_extracted'] = False sl.upsert(engine, sl.get_table(engine, 'representative'), rep, ['etl_id']) inserted=sl.find_one(engine,sl.get_table(engine, 'representative'),**rep) if inserted: sl.upsert(engine, sl.get_table(engine, 'tags'), {'representative_id': inserted['id'], 'tag_id': unregtag['id']}, ['representative_id', 'tag_id'])
def load_tag(rec, engine): tags=[] for tag in rec['tags']: sl.upsert(engine, sl.get_table(engine, 'tag'), {'tag': tag} , ['tag']) tags.append(sl.find_one(engine,sl.get_table(engine, 'tag'),tag=tag)) if rec['id']: rep=sl.find_one(engine,sl.get_table(engine, 'representative'), identification_code=rec['id']) else: rep=sl.find_one(engine,sl.get_table(engine, 'representative'), original_name=rec['name']) if not rep: print >>sys.stderr, "couldn't find", rec['id'] or rec['name'].encode('utf8') return for tag in tags: sl.upsert(engine, sl.get_table(engine, 'tags'), {'representative_id': rep['id'], 'tag_id': tag['id']}, ['representative_id', 'tag_id']) return
def extract_data(engine): log.info("Extracting unregistered interests data...") taglabel='situation:unregistered' unregtag={'tag': taglabel} sl.upsert(engine, sl.get_table(engine, 'tag'), unregtag, ['tag']) unregtag=sl.find_one(engine,sl.get_table(engine, 'tag'),tag=taglabel) with app.open_resource('resources/unregistered-companies.csv') as csvfile: csvreader = csv.reader(csvfile, delimiter=',', quotechar='"') for i, rep in enumerate(csvreader): load_rep(rep, engine, unregtag) if i % 100 == 0: log.info("Extracted: %s...", i)
def load_representative(engine, rep): entity = upsert_entity(rep.get('canonical_name'), name=rep.get('original_name'), suffix=rep.get('name_suffix'), acronym=rep.get('acronym')) assert entity is not None, entity assert entity.id is not None, entity rep['entity'] = entity rep['members_25'] = to_integer(rep['members_25']) rep['members_50'] = to_integer(rep['members_50']) rep['members_75'] = to_integer(rep['members_75']) rep['members_100'] = to_integer(rep['members_100']) rep['members_fte'] = to_float(rep['members_fte']) rep['members'] = to_integer(rep['members']) rep['number_of_natural_persons'] = to_integer(rep['number_of_natural_persons']) if rep.get('main_category'): main_category = upsert_category(rep.get('main_category_id'), rep.get('main_category')) rep['main_category'] = main_category if rep.get('sub_category'): rep['sub_category'] = upsert_category(rep.get('sub_category_id'), rep.get('sub_category'), main_category) accreditations = [] for person_data in sl.find(engine, sl.get_table(engine, 'person'), representative_etl_id=rep['etl_id']): person = upsert_person(person_data) if person_data.get('role') == 'head': rep['head'] = person if person_data.get('role') == 'legal': rep['legal'] = person if person_data.get('role') == 'accredited': accreditations.append((person, person_data)) representative = Representative.by_identification_code(rep['identification_code']) if representative is None: representative = Representative.create(rep) else: representative.update(rep) for contact_data in sl.find(engine, sl.get_table(engine, 'contact'), representative_etl_id=rep['etl_id'], status='active'): if len([x for x in contact_data.values() if x])<7: continue contact_ = { 'town': contact_data['town'], 'street': contact_data['street'], 'post_code': contact_data['post_code'], 'postbox': contact_data['postbox'], 'lat': to_float(contact_data['lat']), 'lon': to_float(contact_data['lon']), 'phone': " ".join((contact_data.get('indic_phone') or '', contact_data.get('phone') or '')).strip(), 'country': Country.by_code(contact_data['country_code']), } if contact_data['type'] == 'head': if representative.head_office_id is None: contact = Contact.create(contact_) representative.head_office=contact representative.contact_country=contact.country else: representative.head_office.update(contact_) else: if representative.be_office_id is None: contact = Contact.create(contact_) representative.be_office=contact else: representative.be_office.update(contact_) for person, data_ in accreditations: data_['person'] = person data_['representative'] = representative accreditation = Accreditation.by_rp(person, representative) if accreditation is None: accreditation = Accreditation.create(data_) else: accreditation.update(data_) for fd in sl.find(engine, sl.get_table(engine, 'financial_data'), representative_etl_id=rep['etl_id']): fd['turnover_min'] = to_integer(fd.get('turnover_min')) fd['turnover_max'] = to_integer(fd.get('turnover_max')) fd['turnover_absolute'] = to_integer(fd.get('turnover_absolute')) fd['cost_min'] = to_integer(fd.get('cost_min')) fd['cost_max'] = to_integer(fd.get('cost_max')) fd['cost_absolute'] = to_integer(fd.get('cost_absolute')) fd['direct_rep_costs_min'] = to_integer(fd.get('direct_rep_costs_min')) fd['direct_rep_costs_max'] = to_integer(fd.get('direct_rep_costs_max')) fd['total_budget'] = to_integer(fd.get('total_budget')) fd['public_financing_total'] = to_integer(fd.get('public_financing_total')) fd['public_financing_infranational'] = to_integer(fd.get('public_financing_infranational')) fd['public_financing_national'] = to_integer(fd.get('public_financing_national')) fd['eur_sources_grants'] = to_integer(fd.get('eur_sources_grants')) fd['eur_sources_procurement'] = to_integer(fd.get('eur_sources_procurement')) fd['other_sources_donation'] = to_integer(fd.get('other_sources_donation')) fd['other_sources_contributions'] = to_integer(fd.get('other_sources_donation')) fd['other_sources_total'] = to_integer(fd.get('other_sources_total')) fd['eur_sources_procurement_src'] = fd.get('eur_sources_procurement_src') fd['eur_sources_grants_src'] = fd.get('eur_sources_grants_src') fd['other_financial_information'] = fd.get('other_financial_information') fd['new_organisation'] = fd.get('new_organisation') fd['representative'] = representative financial_data = FinancialData.by_rsd(representative, fd.get('start_date')) if financial_data is None: financial_data = FinancialData.create(fd) else: financial_data.update(fd) for src_ in sl.find(engine, sl.get_table(engine, 'financial_data_custom_source'), representative_etl_id=rep['etl_id'], financial_data_etl_id=fd['etl_id']): src_['financial_data'] = financial_data src_['amount'] = to_integer(src_.get('amount')) src = CustomIncome.by_fdn(financial_data, src_['name']) if src is None: src = CustomIncome.create(src_) else: src.update(src_) for turnover_ in sl.find(engine, sl.get_table(engine, 'financial_data_turnover'), representative_etl_id=rep['etl_id'], financial_data_etl_id=fd['etl_id']): #if turnover_.get('etl_clean') is False: # continue turnover_['entity'] = upsert_entity(turnover_.get('canonical_name'), turnover_.get('name')) assert turnover_['entity'] is not None, turnover_['entity'] turnover_['financial_data'] = financial_data turnover_['min'] = to_integer(turnover_.get('min')) turnover_['max'] = to_integer(turnover_.get('max')) turnover = FinancialTurnover.by_fde(financial_data, turnover_['entity']) if turnover is None: turnover = FinancialTurnover.create(turnover_) else: turnover.update(turnover_) for org in sl.find(engine, sl.get_table(engine, 'organisation'), representative_etl_id=rep['etl_id']): #if org.get('etl_clean') is False: # continue org['number_of_members'] = to_integer(org['number_of_members']) organisation = upsert_organisation(org) omdata = {'representative': representative, 'status': org.get('status'), 'organisation': organisation} om = OrganisationMembership.by_rpo(representative, organisation) if om is None: om = OrganisationMembership.create(omdata) else: om.update(omdata) for country_ in sl.find(engine, sl.get_table(engine, 'country_of_member'), representative_etl_id=rep['etl_id']): if not country_.get('country_code'): continue #if country_.get('etl_clean') is False: # continue cdata = {'representative': representative, 'status': country_.get('status'), 'country': Country.by_code(country_.get('country_code'))} cm = CountryMembership.by_rpc(representative, cdata.get('country')) if cm is None: cm = CountryMembership.create(cdata) else: cm.update(cdata) for action_ in sl.find(engine, sl.get_table(engine, 'action_field'), representative_etl_id=rep['etl_id']): if not action_.get('action_field'): continue af = ActionField.by_action(action_.get('action_field')) if af is None: af = ActionField.create({'action': action_.get('action_field')}) db.session.commit() adata = {'representative': representative, 'status': action_.get('status'), 'action': af} am = AssociatedAction.by_rpa(representative, af) if am is None: am = AssociatedAction.create(adata) db.session.commit() else: am.update(adata) for interest_ in sl.find(engine, sl.get_table(engine, 'interest'), representative_etl_id=rep['etl_id']): if not interest_.get('interest'): continue i = Interest.by_interest(interest_.get('interest')) if i is None: i = Interest.create({'interest': interest_.get('interest')}) db.session.commit() adata = {'representative': representative, 'status': action_.get('status'), 'interest': i} ai = AssociatedInterest.by_rpi(representative, i) if ai is None: ai = AssociatedInterest.create(adata) db.session.commit() else: ai.update(adata) for taglink in sl.find(engine, sl.get_table(engine, 'tags'), representative_id=rep['id']): etltag=sl.find_one(engine, sl.get_table(engine, 'tag'), id=taglink['tag_id']) tag = upsert_tag(etltag['tag']) if not tag in representative.tags: representative.tags.append(tag) db.session.commit()
def load_representative(engine, rep): entity = upsert_entity(rep.get('canonical_name'), name=rep.get('original_name'), suffix=rep.get('name_suffix'), acronym=rep.get('acronym')) assert entity is not None, entity assert entity.id is not None, entity rep['entity'] = entity rep['members_25'] = to_integer(rep['members_25']) rep['members_50'] = to_integer(rep['members_50']) rep['members_75'] = to_integer(rep['members_75']) rep['members_100'] = to_integer(rep['members_100']) rep['members_fte'] = to_integer(rep['members_fte']) rep['number_of_natural_persons'] = to_integer(rep['number_of_natural_persons']) rep['contact_lat'] = to_float(rep['contact_lat']) rep['contact_lon'] = to_float(rep['contact_lon']) rep['contact_phone'] = " ".join((rep.get('contact_indic_phone') or '', rep.get('contact_phone') or '')).strip() rep['contact_fax'] = " ".join((rep.get('contact_indic_fax') or '', rep.get('contact_fax') or '')).strip() rep['contact_country'] = Country.by_code(rep['country_code']) if rep.get('main_category'): main_category = upsert_category(rep.get('main_category_id'), rep.get('main_category')) rep['main_category'] = main_category rep['sub_category'] = upsert_category(rep.get('sub_category_id'), rep.get('sub_category'), main_category) accreditations = [] for person_data in sl.find(engine, sl.get_table(engine, 'person'), representative_etl_id=rep['etl_id']): person = upsert_person(person_data) if person_data.get('role') == 'head': rep['head'] = person if person_data.get('role') == 'legal': rep['legal'] = person if person_data.get('role') == 'accredited': accreditations.append((person, person_data)) representative = Representative.by_identification_code(rep['identification_code']) if representative is None: representative = Representative.create(rep) else: representative.update(rep) for person, data_ in accreditations: data_['person'] = person data_['representative'] = representative accreditation = Accreditation.by_rp(person, representative) if accreditation is None: accreditation = Accreditation.create(data_) else: accreditation.update(data_) for fd in sl.find(engine, sl.get_table(engine, 'financial_data'), representative_etl_id=rep['etl_id']): fd['turnover_min'] = to_integer(fd.get('turnover_min')) fd['turnover_max'] = to_integer(fd.get('turnover_max')) fd['turnover_absolute'] = to_integer(fd.get('turnover_absolute')) fd['cost_min'] = to_integer(fd.get('cost_min')) fd['cost_max'] = to_integer(fd.get('cost_max')) fd['cost_absolute'] = to_integer(fd.get('cost_absolute')) fd['direct_rep_costs_min'] = to_integer(fd.get('direct_rep_costs_min')) fd['direct_rep_costs_max'] = to_integer(fd.get('direct_rep_costs_max')) fd['total_budget'] = to_integer(fd.get('total_budget')) fd['public_financing_total'] = to_integer(fd.get('public_financing_total')) fd['public_financing_infranational'] = to_integer(fd.get('public_financing_infranational')) fd['public_financing_national'] = to_integer(fd.get('public_financing_national')) fd['eur_sources_grants'] = to_integer(fd.get('eur_sources_grants')) fd['eur_sources_procurement'] = to_integer(fd.get('eur_sources_procurement')) fd['other_sources_donation'] = to_integer(fd.get('other_sources_donation')) fd['other_sources_contributions'] = to_integer(fd.get('other_sources_donation')) fd['other_sources_total'] = to_integer(fd.get('other_sources_total')) fd['eur_sources_procurement_src'] = fd.get('eur_sources_procurement_src') fd['eur_sources_grants_src'] = fd.get('eur_sources_grants_src') fd['other_financial_information'] = fd.get('other_financial_information') fd['representative'] = representative financial_data = FinancialData.by_rsd(representative, fd.get('start_date')) if financial_data is None: financial_data = FinancialData.create(fd) else: financial_data.update(fd) for turnover_ in sl.find(engine, sl.get_table(engine, 'financial_data_turnover'), representative_etl_id=rep['etl_id'], financial_data_etl_id=fd['etl_id']): #if turnover_.get('etl_clean') is False: # continue turnover_['entity'] = upsert_entity(turnover_.get('canonical_name'), turnover_.get('name')) assert turnover_['entity'] is not None, turnover_['entity'] turnover_['financial_data'] = financial_data turnover_['min'] = to_integer(turnover_.get('min')) turnover_['max'] = to_integer(turnover_.get('max')) turnover = FinancialTurnover.by_fde(financial_data, turnover_['entity']) if turnover is None: turnover = FinancialTurnover.create(turnover_) else: turnover.update(turnover_) for org in sl.find(engine, sl.get_table(engine, 'organisation'), representative_etl_id=rep['etl_id']): #if org.get('etl_clean') is False: # continue org['number_of_members'] = to_integer(org['number_of_members']) organisation = upsert_organisation(org) omdata = {'representative': representative, 'status': org.get('status'), 'organisation': organisation} om = OrganisationMembership.by_rpo(representative, organisation) if om is None: om = OrganisationMembership.create(omdata) else: om.update(omdata) for country_ in sl.find(engine, sl.get_table(engine, 'country_of_member'), representative_etl_id=rep['etl_id']): if not country_.get('country_code'): continue #if country_.get('etl_clean') is False: # continue cdata = {'representative': representative, 'status': country_.get('status'), 'country': Country.by_code(country_.get('country_code'))} cm = CountryMembership.by_rpc(representative, cdata.get('country')) if cm is None: cm = CountryMembership.create(cdata) else: cm.update(cdata) for taglink in sl.find(engine, sl.get_table(engine, 'tags'), representative_id=rep['id']): etltag=sl.find_one(engine, sl.get_table(engine, 'tag'), id=taglink['tag_id']) tag = upsert_tag(etltag['tag']) if not tag in representative.tags: representative.tags.append(tag) db.session.commit()