def my_put(resource, item, vpapi): ex = vpapi.get(resource, where={"id": item['id']}) if len(ex['_items']) >= 1: #somehow vpapi.put does not work for me, so delete and post #vpapi.put(resource,item['id'],item) vpapi.delete(resource, item['id']) vpapi.post(resource, item)
def savevoteevent(self): r = vpapi.get('vote-events', where={'identifier':self["identifier"]}) if not r['_items']: #print(self) r = vpapi.post("vote-events",self) #else: # r = vpapi.patch('vote-events/%s' % r['_items'][0]['id'],self) if r['_status'] != 'OK': raise Exception(self.name, r) else: return r
def savemotion(self): r = vpapi.get('motions', where={'identifiers': {'$elemMatch': self["identifiers"][0]}}) if not r['_items']: #print(self) r = vpapi.post("motions",self) # else: # r = vpapi.put('motions/%s' % r['_items'][0]['id'],self) if r['_status'] != 'OK': raise Exception(self.name, r) else: return r
def test(self): last_motion = vpapi.get("votes", page="1") pprint.pprint(last_motion["_links"]) if len(last_motion["_items"]) > 0: last_motion_page_text = last_motion["_links"]["last"]["href"] index = last_motion_page_text.index("page=") + 5 last_motion_page = last_motion_page_text[index:] pprint.pprint(last_motion_page.encode("utf-8")) else: last_motion_page = None if last_motion_page: last_page_motions = vpapi.get("votes", page=last_motion_page) last_page_motions_list = [] for motion in last_page_motions["_items"]: last_page_motions_list.append(motion["vote_event_id"]) print last_page_motions_list[-1] # index_start = next(index for (index, d) in enumerate(motions) if d["identifier"] == last_page_motions_list[-1]) + 1 else: index_start = 0 print index_start
def save(scraped): import json r = vpapi.get("organizations", where={"identifiers": {"$elemMatch": scraped["identifiers"][0]}}) if not r["_items"]: r = vpapi.post("organizations", scraped) else: # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now existing = r["_items"][0] r = vpapi.put("organizations/%s" % existing["id"], scraped) if r["_status"] != "OK": raise Exception(self.name, resp) return r["id"]
def get_or_create(self, endpoint, item, refresh=False, where_keys=None): sort = [] embed = [] where = {} if where_keys: for key in where_keys: where[key] = item[key] elif endpoint == 'memberships': where = { 'person_id': item['person_id'], 'organization_id': item['organization_id'] } where['start_date'] = item.get('start_date', {"$exists": False}) sort = [('start_date', -1)] elif endpoint in ('motions', 'speeches'): where = {'sources.url': item['sources'][0]['url']} elif endpoint == 'vote-events': embed = ['votes'] if 'motion_id' in item: where = {'motion_id': item['motion_id']} else: where = {'start_date': item['start_date']} elif endpoint == 'votes': where = { 'vote_event_id': item['vote_event_id'], 'voter_id': item['voter_id'], } elif endpoint == 'events': where = {'identifier': item['identifier']} else: where = { 'identifiers': {'$elemMatch': item['identifiers'][0]}} created = False resp = vpapi.getfirst(endpoint, where=where, sort=sort) if not resp: resp = vpapi.post(endpoint, item) created = True self.log('Created %s' % resp['_links']['self']['href'], DEBUG) else: pk = resp['id'] resp = vpapi.put("%s/%s" % (endpoint, pk), item) self.log('Updated %s' % resp['_links']['self']['href'], DEBUG) if resp['_status'] != 'OK': raise Exception(resp) if refresh: resp = vpapi.get( resp['_links']['self']['href'], sort=sort, embed=embed) resp['_created'] = created return resp
def saveperson(scraped): import json for ident in scraped["identifiers"]: if ident["scheme"] == "psp.cz/osoby": identifier = ident break r = vpapi.get('people', where={'identifiers': {'$elemMatch': identifier}}) if not r['_items']: r = vpapi.post('people', scraped) else: # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now existing = r['_items'][0] # somehow vpapi.put does not work for me, so delete and post # r = vpapi.put('people', existing['id'], scraped) vpapi.delete("people", existing['id']) r = vpapi.post('people', scraped) if r['_status'] != 'OK': raise Exception(self.name, resp) return r['id']
def save_organization(scraped): r = vpapi.get('organizations', where={'identifiers': {'$elemMatch': scraped["identifiers"][0]}}) if not r['_items']: r = vpapi.post('organizations', scraped) print ("POST " + scraped['id']) # outid = r['id'] else: # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now # outid = r['_items'][0]['id'] existing = r['_items'][0] # r = vpapi.put('organizations', existing['id'], scraped) #somehow vpapi.put does not work for me, so delete and post #vpapi.put(resource,item['id'],item) vpapi.delete("organizations",existing['id']) r = vpapi.post('organizations', scraped) print ("PUT " + scraped['id']) if r['_status'] != 'OK': raise Exception(scraped.name, r) return r['id']
def get_remote_id(self, scheme, identifier): key = "%s/%s" % (scheme, identifier) if key in self._ids: return self._ids[key] domain, category = scheme.split('/') if category in ('committees', 'parties', 'chamber'): endpoint = 'organizations' else: endpoint = category resp = vpapi.get(endpoint, where={ 'identifiers': { '$elemMatch': {'scheme': scheme, 'identifier': identifier} } }) if resp['_items']: item = resp['_items'][0] self._ids[key] = item['id'] return item['id']
def savemembership(self): r = vpapi.get('memberships',where={'person_id': self["person_id"], 'organization_id': self["organization_id"], "role": "member", "start_date": self["start_date"]}) if not r['_items']: r = vpapi.post("memberships",self) else: #somehow vpapi.put does not work for me, so delete and post update = True try: if r['_items'][0]["end_date"] == self["end_date"]: update = False print("not updating: " + r['_items'][0]['id']) except: nothing = 0 if update: vpapi.delete("memberships",r['_items'][0]['id']) self['id'] = r['_items'][0]['id'] r = vpapi.post('memberships', self) print("updating: " + self['id']) # r = vpapi.put('memberships/%s' % r['_items'][0]['id'],self) if r['_status'] != 'OK': raise Exception(self.name, r)
def saveallmotionsandvoteevents(hl_hlasovani): global test organizations = {} for row in hl_hlasovani: try: organizations[row[1].strip()] except: organizations[row[1].strip()] = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) r_org = organizations[row[1].strip()] #r_org = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) motion = { "id": row[0].strip(), "organization_id": r_org["_items"][0]['id'], "requirement": guess_majority(row[12],row[11]), "result": result2result(row[14].strip()), "text": row[15].strip(), #'identifiers': [{'identifier': row[0].strip(), 'scheme': 'psp.cz/hlasovani'}] "sources": [{'url':"http://www.psp.cz/sqw/hlasy.sqw?g=" + row[0].strip()}] } print("motion: " + motion['id']) if int(motion['id']) > 60536: r_motion = savemotion(motion) #r_motion = vpapi.get('motions', where={'sources': {'$elemMatch': {"identifier": row[0].strip(), "scheme": "psp.cz/hlasovani"}}}) #<-wrong: should be with "sources" if r_motion["_status"] == "OK": vote_event = { "id": row[0].strip(), "motion_id": r_motion['id'], 'identifier': row[0].strip(), #"legislative_session_id": row[2].strip(), #not implemented in api yet "start_date": vpapi.local_to_utc(scrapeutils.cs2iso(row[5].strip() + "T" + row[6].strip())), "result": result2result(row[14].strip()), } r_voteevent = savevoteevent(vote_event) test[row[0].strip()] = {"id":row[0].strip(),"ve":True} logging.info('Motion and vote-event saved: ' + str(r_motion['id']))
def saveallmotionsandvoteevents(hl_hlasovani): organizations = {} for row in hl_hlasovani: try: organizations[row[1].strip()] except: organizations[row[1].strip()] = vpapi.get( "organizations", where={"identifiers": {"$elemMatch": {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}, ) r_org = organizations[row[1].strip()] # r_org = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) motion = { "id": row[0].strip(), "organization_id": r_org["_items"][0]["id"], "requirement": guess_majority(row[12], row[11]), "result": result2result(row[14].strip()), "text": row[15].strip(), #'identifiers': [{'identifier': row[0].strip(), 'scheme': 'psp.cz/hlasovani'}] "sources": [{"url": "http://www.psp.cz/sqw/hlasy.sqw?g=" + row[0].strip()}], } print(motion) r_motion = savemotion(motion) # r_motion = vpapi.get('motions', where={'sources': {'$elemMatch': {"identifier": row[0].strip(), "scheme": "psp.cz/hlasovani"}}}) #<-wrong: should be with "sources" if r_motion["_status"] == "OK": vote_event = { # "id": row[0].strip(), "motion_id": r_motion["id"], "identifier": row[0].strip(), "legislative_session": row[2].strip(), "start_date": scrapeutils.cs2iso(row[5].strip() + " " + row[6].strip()), "result": result2result(row[14].strip()), } r_voteevent = savevoteevent(vote_event)
def savemotion(self): r = vpapi.get('motions', where={'id': self['id']}) if not r['_items']: #print(self) r2 = vpapi.post("motions",self)
'''creates people from API see people-example.json ''' import vpapi import json vpapi.parliament("cz/psp") people = {} next = True page = 1 while next: peo = vpapi.get("people",page=page) for p in peo["_items"]: people[p["id"]] = { "id": p["id"], "name": p["name"], "birth_date": p['birth_date'], "gender": p["gender"], "sort_name": p["sort_name"], "given_name": p["given_name"], "identifiers": p["identifiers"], "family_name": p["family_name"] } try: p["honorific_prefix"] except: nothing = None else: people[p["id"]]["honorific_prefix"] = p["honorific_prefix"]
self['id'] = r['_items'][0]['id'] r = vpapi.post('memberships', self) print("updating: " + self['id']) # r = vpapi.put('memberships/%s' % r['_items'][0]['id'],self) if r['_status'] != 'OK': raise Exception(self.name, r) zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/poslanci.zip',zipped=True) zarazeni = scrapeutils.zipfile2rows(zfile,'zarazeni.unl') from datetime import datetime i = 0 for row in zarazeni: r_org = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) if len(r_org["_items"]) > 0: r_pers = vpapi.get('people', where={'identifiers': {'$elemMatch': {"identifier": row[0].strip(), "scheme": "psp.cz/osoby"}}}) if len(r_pers["_items"]) > 0: membership = { "label": "Člen", "role": "member", "person_id": r_pers["_items"][0]['id'], "organization_id": r_org["_items"][0]['id'], # "id": str(i), "start_date": datetime.strptime(row[3].strip(), '%Y-%m-%d %H').strftime('%Y-%m-%d') } if row[4].strip() != "": membership["end_date"] = datetime.strptime(row[4].strip(), '%Y-%m-%d %H').strftime('%Y-%m-%d') savemembership(membership)
"onah": { "name": "Občanské národní hnutí", "color": "#000", "position": 30, "groups": ["Poslanecký klub Občanského národního hnutí"], "abbreviation": "ONAH" } } # organizations organizations = {} next = True page = 1 while next: orgs = vpapi.get("organizations", where={"classification": "political group"}, page=page) for org in orgs["_items"]: organizations[org["id"]] = {"id": org["id"], "name": org["name"]} page = page + 1 try: orgs["_links"]["next"] except: next = False i = 0 for key in parties: party = parties[key] party['children'] = [] party['image'] = key + '.png' for keyo in organizations:
return 1 if vote == 'no': return -1 if vote == 'abstain': return -1 else: return 0 answers = {} groups = {} mps = {} vpapi.parliament('sk/nrsr') for ve in ves: print(ve) vedb = vpapi.get("vote-events", where={"sources.url":{"$regex":"ID="+ve+"$"}}) idd = vedb['_items'][0]['id'] r = vpapi.getall("votes",where={"vote_event_id":idd}) for row in r: try: answers[row['voter_id']] except: answers[row['voter_id']] = {"vote":{}} answers[row['voter_id']]['vote'][ve] = vote2vote(row['option']) if row['group_id'] is not None: try: groups[row['group_id']] except: group = vpapi.get("organizations/"+row['group_id']) groups[row['group_id']] = {"name": group['name'].replace('Klub ','')} groups[row['group_id']]['slug'] = slugify.slugify(groups[row['group_id']]['name'])
osoby = scrapeutils.zipfile2rows(zfile, 'osoby.unl') poslanec = scrapeutils.zipfile2rows(zfile, 'poslanec.unl') oosoby = {} for row in osoby: oosoby[row[0].strip()] = row persons = {} terms = {} for row in poslanec: oid = row[1].strip() try: terms[row[4].strip()] except Exception: r_t = vpapi.get("organizations", where={'identifiers': {'$elemMatch': {"identifier": row[4].strip(), "scheme": "psp.cz/organy"}}}) for ident in r_t["_items"][0]["identifiers"]: if ident["scheme"] == "psp.cz/term": terms[row[4].strip()] = ident["identifier"] try: persons[row[1].strip()] except Exception: person = { "id": row[1].strip(), "name": oosoby[oid][3].strip() + " " + oosoby[oid][2].strip(), "sort_name": oosoby[oid][2].strip() + ", " + oosoby[oid][3].strip(), "family_name": oosoby[oid][2].strip(), "given_name": oosoby[oid][3].strip(), "birth_date": scrapeutils.cs2iso(oosoby[oid][5].strip()), "identifiers": [ {"identifier": row[0].strip(), "scheme": "psp.cz/poslanec/" + terms[row[4].strip()]},
cvote_events = {} cmotions = {} # with open(directory + "/data/votes.csv", "w") as fout: # csvw = csv.writer(fout) # row = ["vote_event_id","voter_id","option","group_id"] # csvw.writerow(row) for k in sorted(vote_events): print(k + " " + getid(vote_events[k]['sources'][0]['url'])) with open(directory + "/data/votes.csv", "a") as fout: csvw = csv.writer(fout) if k > last: m_id = vote_events[k]["motion_id"] motions[m_id] = vpapi.get("motions",where={"id":m_id}) cmotions[m_id] = motions[m_id] vos = vpapi.getall("votes",where={"vote_event_id":k}) for vote in vos: # votes.append(vote) cvotes.append(vote) try: cpeople[vote["voter_id"]] = people[vote["voter_id"]] corganizations[vote["group_id"]] = organizations[vote["group_id"]] cvote_events[vote["vote_event_id"]] = vote_events[vote["vote_event_id"]] # m_id = vote_events[vote["vote_event_id"]]["motion_id"] # cmotions[m_id] = motions[m_id] vote_event_id = getid(cvote_events[vote["vote_event_id"]]['sources'][0]['url']) voter_id = cpeople[vote["voter_id"]]["identifiers"][0]["identifier"]
"onah": { "name": "Občanské národní hnutí", "color": "#000", "position": 30, "groups": ["Poslanecký klub Občanského národního hnutí"], "abbreviation": "ONAH" } } # organizations organizations = {} next = True page = 1 while next: orgs = vpapi.get("organizations",where={"classification": "political group"},page=page) for org in orgs["_items"]: organizations[org["id"]] = {"id": org["id"], "name": org["name"]} page = page + 1 try: orgs["_links"]["next"] except: next = False i = 0 for key in parties: party = parties[key] party['children'] = [] party['image'] = key + '.png' for keyo in organizations: if organizations[keyo]['name'] in party['groups']:
''' import vpapi import json vpapi.parliament("cz/psp") vote_events = {} next = True page = 1 with open('../www/json/issue.json') as data_file: issue = json.load(data_file) for key in issue["vote_events"]: rve = vpapi.get("vote-events",where={"identifier":key},embed=["motion"]) try: ve = rve["_items"][0] issue["vote_events"][key]['available_vote_event'] = True vote_event = { "id": ve["id"], "motion": { "text": ve["motion"]["text"], "requirement": ve["motion"]["requirement"], "id": ve["motion"]["id"] }, "start_date": ve["start_date"], "identifier": ve["identifier"], "result": ve["result"], } next = True
return -1 if vote == 'abstain': return -1 else: return 0 answers = {} groups = {} mps = {} vpapi.parliament('sk/nrsr') for ve in ves: print(ve) vedb = vpapi.get("vote-events", where={"sources.url": { "$regex": "ID=" + ve + "$" }}) idd = vedb['_items'][0]['id'] r = vpapi.getall("votes", where={"vote_event_id": idd}) for row in r: try: answers[row['voter_id']] except: answers[row['voter_id']] = {"vote": {}} answers[row['voter_id']]['vote'][ve] = vote2vote(row['option']) if row['group_id'] is not None: try: groups[row['group_id']] except: group = vpapi.get("organizations/" + row['group_id']) groups[row['group_id']] = {
def savevoteevent(self): r = vpapi.get('vote-events', where={'identifier':self["identifier"]}) if not r['_items']: #print(self) r = vpapi.post("vote-events",self)
for v in votes: try: data[p['code'] + '_' + v['voter_id']] except: data[p['code'] + '_' + v['voter_id']] = {} data[p['code'] + '_' + v['voter_id']]['votes'] = {} data[p['code'] + '_' + v['voter_id']]['chamber'] = p['code'] data[p['code'] + '_' + v['voter_id']]['chamber_name'] = p['name'] data[p['code'] + '_' + v['voter_id']]['id'] = v['voter_id'] data[p['code'] + '_' + v['voter_id']]['votes'][ve['id']] = o2o[v['option']] * int(ve[p['code_csv'] + '_polarity']) data[p['code'] + '_' + v['voter_id']]['group_id'] = v['group_id'] os = {} for k in data: if data[k]['chamber'] == p['code']: mpapi = vpapi.get("people",where={"id":data[k]['id']}) mp = mpapi["_items"][0] data[k]['family_name'] = mp['family_name'] data[k]['name'] = mp['family_name'] + ' ' + mp['given_name'] data[k]['given_name'] = mp['given_name'] try: o = os[data[k]["group_id"]] except: oapi = vpapi.get("organizations",where={"id":data[k]["group_id"]}) o = oapi["_items"][0] os[data[k]["group_id"]] = o data[k]['group'] = o['name'] if data[k]['chamber'] == 'senat': data[k]['party_abbreviaton'] = g2g[o['name']] if data[k]['id'] == '253': data[k]['picture'] = 'http://senat.cz/images/senatori/' + slugify.slugify(data[k]['family_name']) + slugify.slugify(data[k]['given_name'])[0:1] + '_295.jpg'
def savevotes(hl_poslanec): votes = {} voteevents = {} people = {} organizations = {} terms = {} for rowp in hl_poslanec: # if rowp[0] == 0: chybne hlasovani v db, viz http://www.psp.cz/sqw/hlasy.sqw?g=58297 # try: # terms[hl_hlasovani[i][1].strip()] # except: # r_t = vpapi.get("organizations", where={'identifiers': {'$elemMatch': {"identifier": hl_hlasovani[0][1].strip(), "scheme": "psp.cz/organy"}}}) # for ident in r_t["_items"][0]["identifiers"]: # if ident["scheme"] == "psp.cz/term": # terms[hl_hlasovani[0][1].strip()] = ident["identifier"] try: voteevents[rowp[1].strip()] except: voteevents[rowp[1].strip()] = vpapi.get("vote-events", where={"identifier": rowp[1].strip()}) r_voteevent = voteevents[rowp[1].strip()] try: people[rowp[0].strip()] except: people[rowp[0].strip()] = vpapi.get( "people", where={ "identifiers": { "$elemMatch": { "identifier": rowp[0].strip(), "scheme": {"$regex": "psp.cz/poslanec/*", "$options": "i"}, } } }, ) r_pers = people[rowp[0].strip()] try: organizations[r_pers["_items"][0]["id"]] except: organizations[r_pers["_items"][0]["id"]] = vpapi.get( "memberships", where={"person_id": r_pers["_items"][0]["id"]}, embed=["organization"] ) r_org = organizations[r_pers["_items"][0]["id"]] for rowo in r_org["_items"]: if ( rowo["organization"]["classification"] == "political group" and rowo["start_date"] <= r_voteevent["_items"][0]["start_date"] ): try: rowo["end_date"] except: fine = True else: if rowo["end_date"] >= r_voteevent["_items"][0]["start_date"]: fine = True else: fine = False # 9 lines to overcome no python's function "isset" ... )-: if fine: organization = rowo["organization"] break vote = { "voter_id": r_pers["_items"][0]["id"], "option": option2option(rowp[2].strip()), "group_id": organization["id"], "vote_event_id": r_voteevent["_items"][0]["id"], } try: votes[r_voteevent["_items"][0]["id"]] except: votes[r_voteevent["_items"][0]["id"]] = [] votes[r_voteevent["_items"][0]["id"]].append(vote.copy()) # for k in votes: # vpapi.post("votes",votes[k]) vpapi.post("votes", votes)
''' import vpapi import json vpapi.parliament("cz/psp") vote_events = {} next = True page = 1 with open('../www/json/issue.json') as data_file: issue = json.load(data_file) for key in issue["vote_events"]: rve = vpapi.get("vote-events", where={"identifier": key}, embed=["motion"]) try: ve = rve["_items"][0] issue["vote_events"][key]['available_vote_event'] = True vote_event = { "id": ve["id"], "motion": { "text": ve["motion"]["text"], "requirement": ve["motion"]["requirement"], "id": ve["motion"]["id"] }, "start_date": ve["start_date"], "identifier": ve["identifier"], "result": ve["result"], } next = True
li = td.text.strip().split('\xa0') vote = { "vote_event_id": iid, "voter_id": pp2id(" ".join([li[2], li[3]]), vote_event['start_date'], p2id), "option": option2option(li[0]), "group_id": o2id[h2s[j].text.strip()] } votes.append(vote) j += 1 ex = vpapi.get("motions", where={"id": iid}) if len(ex['_items']) < 1: vpapi.post("motions", motion) ex = vpapi.get("vote-events", where={"id": iid}) if len(ex['_items']) < 1: vpapi.post("vote-events", vote_event) ex = vpapi.get("votes", where={"vote_event_id": iid}) if len(ex['_items']) < 1: vpapi.post("votes", votes) else: if int(iid) < 14900: break except: print("XXX:" + iid) nothing = 0 # "Zmatečné hlasování"
'''creates people from API see people-example.json ''' import vpapi import json vpapi.parliament("cz/psp") people = {} next = True page = 1 while next: peo = vpapi.get("people", page=page) for p in peo["_items"]: people[p["id"]] = { "id": p["id"], "name": p["name"], "birth_date": p['birth_date'], "gender": p["gender"], "sort_name": p["sort_name"], "given_name": p["given_name"], "identifiers": p["identifiers"], "family_name": p["family_name"] } try: p["honorific_prefix"] except: nothing = None else: people[p["id"]]["honorific_prefix"] = p["honorific_prefix"]
print('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip') zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip',zipped=True) #hl_hlasovani = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'s.unl') for i in range(1,4): try: hl_poslanec = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'h'+str(i)+'.unl') #savevotes(hl_poslanec) votes = {} votesli = [] # terms = {} for rowp in hl_poslanec: try: voteevents[rowp[1].strip()] except: voteevents[rowp[1].strip()] = vpapi.get('vote-events', where={'identifier': rowp[1].strip()}) r_voteevent = voteevents[rowp[1].strip()] try: people[rowp[0].strip()] except: people[rowp[0].strip()] = vpapi.get('people', where={"identifiers": {"$elemMatch": {"identifier": rowp[0].strip(), "scheme": {"$regex": "psp.cz/poslanec/*", "$options": "i"} }}}) r_pers = people[rowp[0].strip()] try: organizations[r_pers["_items"][0]["id"]] except: organizations[r_pers["_items"][0]["id"]] = vpapi.get('memberships',where={"person_id":r_pers["_items"][0]["id"]},embed=["organization"]) r_org = organizations[r_pers["_items"][0]["id"]] for rowo in r_org["_items"]:
{'name': 'PSP','note':'abbreviation'} ], 'founding_date': scrapeutils.cs2iso(row[6].strip()) } if (row[7].strip() != ''): org["dissolution_date"] = scrapeutils.cs2iso(row[7].strip()) save_organization(org) # political groups for row in organy: if row[2] == '1': org = { "name": row[4].strip(), 'classification': 'political group', 'id': row[0].strip(), 'identifiers': [ {"identifier": row[0].strip(), "scheme": 'psp.cz/organy'} ], 'other_names': [ {'name': row[3].strip(), 'note':'abbreviation'} ], 'founding_date': scrapeutils.cs2iso(row[6].strip()) } if (row[7].strip() != ''): org["dissolution_date"] = scrapeutils.cs2iso(row[7].strip()) # get parent r_parent = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) org["parent_id"] = r_parent["_items"][0]["id"] save_organization(org)
iid += 1 gid = re.search('par_2=(\d{1,})', a.xpath('@href')[0]).group(1).strip() o = re.search('O=(\d{1,})', a.xpath('@href')[0]).group(1).strip() groups[a.text]["identifiers"][gid] = { "scheme": "senat.cz/" + o, "identifier": gid } # save it j = 0 for person in people: print(j) j += 1 ex = vpapi.get("people", where={"id": person['id']}) if len(ex['_items']) < 1: vpapi.post("people", person) #vpapi.post("people",people) group = {"name": "Senát Parlamentu ČR", "classification": "chamber", "id": "1"} # some are not available by the algorithm above: vpapi.post("organizations", group) group = { "name": "Nezařazení", "classification": "political group", "parent_id": "1", "id": str(iid) } iid += 1 vpapi.post("organizations", group) group = {