def saveallmotionsandvoteevents(hl_hlasovani): global test organizations = {} for row in hl_hlasovani: try: organizations[row[1].strip()] except: organizations[row[1].strip()] = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) r_org = organizations[row[1].strip()] #r_org = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) motion = { "id": row[0].strip(), "organization_id": r_org["_items"][0]['id'], "requirement": guess_majority(row[12],row[11]), "result": result2result(row[14].strip()), "text": row[15].strip(), #'identifiers': [{'identifier': row[0].strip(), 'scheme': 'psp.cz/hlasovani'}] "sources": [{'url':"http://www.psp.cz/sqw/hlasy.sqw?g=" + row[0].strip()}] } print("motion: " + motion['id']) if int(motion['id']) > 60536: r_motion = savemotion(motion) #r_motion = vpapi.get('motions', where={'sources': {'$elemMatch': {"identifier": row[0].strip(), "scheme": "psp.cz/hlasovani"}}}) #<-wrong: should be with "sources" if r_motion["_status"] == "OK": vote_event = { "id": row[0].strip(), "motion_id": r_motion['id'], 'identifier': row[0].strip(), #"legislative_session_id": row[2].strip(), #not implemented in api yet "start_date": vpapi.local_to_utc(scrapeutils.cs2iso(row[5].strip() + "T" + row[6].strip())), "result": result2result(row[14].strip()), } r_voteevent = savevoteevent(vote_event) test[row[0].strip()] = {"id":row[0].strip(),"ve":True} logging.info('Motion and vote-event saved: ' + str(r_motion['id']))
def saveallmotionsandvoteevents(hl_hlasovani): organizations = {} for row in hl_hlasovani: try: organizations[row[1].strip()] except: organizations[row[1].strip()] = vpapi.get( "organizations", where={"identifiers": {"$elemMatch": {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}, ) r_org = organizations[row[1].strip()] # r_org = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) motion = { "id": row[0].strip(), "organization_id": r_org["_items"][0]["id"], "requirement": guess_majority(row[12], row[11]), "result": result2result(row[14].strip()), "text": row[15].strip(), #'identifiers': [{'identifier': row[0].strip(), 'scheme': 'psp.cz/hlasovani'}] "sources": [{"url": "http://www.psp.cz/sqw/hlasy.sqw?g=" + row[0].strip()}], } print(motion) r_motion = savemotion(motion) # r_motion = vpapi.get('motions', where={'sources': {'$elemMatch': {"identifier": row[0].strip(), "scheme": "psp.cz/hlasovani"}}}) #<-wrong: should be with "sources" if r_motion["_status"] == "OK": vote_event = { # "id": row[0].strip(), "motion_id": r_motion["id"], "identifier": row[0].strip(), "legislative_session": row[2].strip(), "start_date": scrapeutils.cs2iso(row[5].strip() + " " + row[6].strip()), "result": result2result(row[14].strip()), } r_voteevent = savevoteevent(vote_event)
terms[row[4].strip()] except Exception: r_t = vpapi.get("organizations", where={'identifiers': {'$elemMatch': {"identifier": row[4].strip(), "scheme": "psp.cz/organy"}}}) for ident in r_t["_items"][0]["identifiers"]: if ident["scheme"] == "psp.cz/term": terms[row[4].strip()] = ident["identifier"] try: persons[row[1].strip()] except Exception: person = { "id": row[1].strip(), "name": oosoby[oid][3].strip() + " " + oosoby[oid][2].strip(), "sort_name": oosoby[oid][2].strip() + ", " + oosoby[oid][3].strip(), "family_name": oosoby[oid][2].strip(), "given_name": oosoby[oid][3].strip(), "birth_date": scrapeutils.cs2iso(oosoby[oid][5].strip()), "identifiers": [ {"identifier": row[0].strip(), "scheme": "psp.cz/poslanec/" + terms[row[4].strip()]}, {"identifier": row[1].strip(), "scheme": "psp.cz/osoby"} ] } if oosoby[oid][6].strip() == "M": person['gender'] = 'male' else: person['gender'] = 'female' if oosoby[oid][1].strip() != "": person['honorific_prefix'] = oosoby[oid][1].strip() if oosoby[oid][4].strip() != "": person['honorific_suffix'] = oosoby[oid][4].strip() if oosoby[oid][8].strip() != "": person['death_date'] = scrapeutils.cs2iso(oosoby[oid][8].strip())
print(iid) try: motion = { "text": tds[2].xpath('span//text()')[0], "result": result2result(tds[5].xpath('text()')[0]), "id": iid } except: motion = {"id": iid} vote_event = { "result": result2result(tds[5].xpath('text()')[0]), "id": iid, "motion_id": iid, "start_date": scrapeutils.cs2iso(tds[3].xpath('text()')[0]) + "T12:00:00" } url1 = "http://www.senat.cz/xqw/xervlet/pssenat/hlasy?G=" + iid domtree1 = html.fromstring(scrapeutils.download(url1)) tables = domtree1.xpath('//table') try: quorum = int( re.search('BA=(\d{1,})', tables[0].xpath('tr/td')[1].text).group(1).strip()) present = int( re.search('MNO=(\d{1,})', tables[0].xpath('tr/td')[0].text).group(1).strip()) motion['requirement'] = guess_majority(quorum, present)
# chamber: for row in organy: if row[2] == '11': term = row[3][3:] org = { "name": row[4].strip(), 'classification': 'chamber', 'id': row[0].strip(), 'identifiers': [ {"identifier": term, 'scheme': 'psp.cz/term'}, {"identifier": row[0].strip(), "scheme": 'psp.cz/organy'} ], 'other_names': [ {'name': 'PSP','note':'abbreviation'} ], 'founding_date': scrapeutils.cs2iso(row[6].strip()) } if (row[7].strip() != ''): org["dissolution_date"] = scrapeutils.cs2iso(row[7].strip()) save_organization(org) # political groups for row in organy: if row[2] == '1': org = { "name": row[4].strip(), 'classification': 'political group', 'id': row[0].strip(), 'identifiers': [ {"identifier": row[0].strip(), "scheme": 'psp.cz/organy'}
table = domtree.xpath('//table')[0] trs = table.xpath('tr') for tr in trs: tds = tr.xpath('td') iid = re.search('G=(\d{1,})', tds[6].xpath('a/@href')[0]).group(1).strip() motion = { "text": tds[2].xpath('span//text()')[0], "result": result2result(tds[5].xpath('text()')[0]), "id": iid } vote_event = { "result": result2result(tds[5].xpath('text()')[0]), "id": iid, "motion_id": iid, "start_date": scrapeutils.cs2iso(tds[3].xpath('text()')[0]) } url1 = "http://www.senat.cz/xqw/xervlet/pssenat/hlasy?G=" + iid domtree1 = html.fromstring(scrapeutils.download(url1)) tables = domtree1.xpath('//table') quorum = int( re.search('BA=(\d{1,})', tables[0].xpath('tr/td')[1].text).group(1).strip()) present = int( re.search('MNO=(\d{1,})', tables[0].xpath('tr/td')[0].text).group(1).strip()) motion['requirement'] = guess_majority(quorum, present)