logname = os.path.join(LOGS_DIR, logname) logname = os.path.abspath(logname) logging.basicConfig(level=logging.DEBUG, format='%(message)s', handlers=[logging.FileHandler(logname, 'w', 'utf-8')]) logging.getLogger('requests').setLevel(logging.ERROR) logging.info('Started') db_log = vpapi.post('logs', {'status': 'running', 'file': logname, 'params': []}) terms = [1993, 1996, 1998, 2002, 2006, 2010, 2013] terms = [2013] test = {} #terms = [2010] for term in terms: zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip',zipped=True) hl_hlasovani = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'s.unl') saveallmotionsandvoteevents(hl_hlasovani) #j = 0 #last_ve_id = 0 #voteevents = {} #people = {} #organizations = {} #for term in terms: # logging.info('Started year ' + str(term)) # print('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip') # zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip',zipped=True) # #hl_hlasovani = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'s.unl') # for i in range(1,4):
print("not updating: " + r['_items'][0]['id']) except: nothing = 0 if update: vpapi.delete("memberships",r['_items'][0]['id']) self['id'] = r['_items'][0]['id'] r = vpapi.post('memberships', self) print("updating: " + self['id']) # r = vpapi.put('memberships/%s' % r['_items'][0]['id'],self) if r['_status'] != 'OK': raise Exception(self.name, r) zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/poslanci.zip',zipped=True) zarazeni = scrapeutils.zipfile2rows(zfile,'zarazeni.unl') from datetime import datetime i = 0 for row in zarazeni: r_org = vpapi.get('organizations', where={'identifiers': {'$elemMatch': {"identifier": row[1].strip(), "scheme": "psp.cz/organy"}}}) if len(r_org["_items"]) > 0: r_pers = vpapi.get('people', where={'identifiers': {'$elemMatch': {"identifier": row[0].strip(), "scheme": "psp.cz/osoby"}}}) if len(r_pers["_items"]) > 0: membership = { "label": "Člen", "role": "member", "person_id": r_pers["_items"][0]['id'], "organization_id": r_org["_items"][0]['id'], # "id": str(i), "start_date": datetime.strptime(row[3].strip(), '%Y-%m-%d %H').strftime('%Y-%m-%d')
if not r['_items']: r = vpapi.post('people', scraped) else: # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now existing = r['_items'][0] # somehow vpapi.put does not work for me, so delete and post # r = vpapi.put('people', existing['id'], scraped) vpapi.delete("people", existing['id']) r = vpapi.post('people', scraped) if r['_status'] != 'OK': raise Exception(self.name, resp) return r['id'] zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/poslanci.zip', zipped=True) osoby = scrapeutils.zipfile2rows(zfile, 'osoby.unl') poslanec = scrapeutils.zipfile2rows(zfile, 'poslanec.unl') oosoby = {} for row in osoby: oosoby[row[0].strip()] = row persons = {} terms = {} for row in poslanec: oid = row[1].strip() try: terms[row[4].strip()] except Exception: r_t = vpapi.get("organizations", where={'identifiers': {'$elemMatch': {"identifier": row[4].strip(), "scheme": "psp.cz/organy"}}})
logname = datetime.utcnow().strftime('%Y-%m-%d-%H%M%S') + '.log' logname = os.path.join(LOGS_DIR, logname) logname = os.path.abspath(logname) logging.basicConfig(level=logging.DEBUG, format='%(message)s', handlers=[logging.FileHandler(logname, 'w', 'utf-8')]) logging.getLogger('requests').setLevel(logging.ERROR) logging.info('Started') db_log = vpapi.post('logs', {'status': 'running', 'file': logname, 'params': []}) terms = [1993, 1996, 1998, 2002, 2006, 2010, 2013] test = {} #terms = [2010] for term in terms: zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip',zipped=True) hl_hlasovani = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'s.unl') saveallmotionsandvoteevents(hl_hlasovani) j = 0 last_ve_id = 0 voteevents = {} people = {} organizations = {} for term in terms: logging.info('Started year ' + str(term)) print('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip') zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/hl-'+str(term)+'ps.zip',zipped=True) #hl_hlasovani = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'s.unl') for i in range(1,4):
else: # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now # outid = r['_items'][0]['id'] existing = r['_items'][0] # r = vpapi.put('organizations', existing['id'], scraped) #somehow vpapi.put does not work for me, so delete and post #vpapi.put(resource,item['id'],item) vpapi.delete("organizations",existing['id']) r = vpapi.post('organizations', scraped) print ("PUT " + scraped['id']) if r['_status'] != 'OK': raise Exception(scraped.name, r) return r['id'] zfile = scrapeutils.download('http://www.psp.cz/eknih/cdrom/opendata/poslanci.zip',zipped=True) organy = scrapeutils.zipfile2rows(zfile,'organy.unl') # chamber: for row in organy: if row[2] == '11': term = row[3][3:] org = { "name": row[4].strip(), 'classification': 'chamber', 'id': row[0].strip(), 'identifiers': [ {"identifier": term, 'scheme': 'psp.cz/term'}, {"identifier": row[0].strip(), "scheme": 'psp.cz/organy'} ], 'other_names': [ {'name': 'PSP','note':'abbreviation'} ],
votes[r_voteevent["_items"][0]["id"]] = [] votes[r_voteevent["_items"][0]["id"]].append(vote.copy()) # for k in votes: # vpapi.post("votes",votes[k]) vpapi.post("votes", votes) j = 0 for term in terms: print(term) zfile = scrapeutils.download("http://www.psp.cz/eknih/cdrom/opendata/hl-" + str(term) + "ps.zip", zipped=True) # hl_hlasovani = scrapeutils.zipfile2rows(zfile,'hl'+str(term)+'s.unl') for i in range(1, 4): print(i) try: hl_poslanec = scrapeutils.zipfile2rows(zfile, "hl" + str(term) + "h" + str(i) + ".unl") # savevotes(hl_poslanec) # savevotes(hl_poslanec) votes = {} votesli = [] voteevents = {} people = {} organizations = {} terms = {} for rowp in hl_poslanec: if rowp[1] in rosnicka_vote_events: try: voteevents[rowp[1].strip()] except: voteevents[rowp[1].strip()] = vpapi.get("vote-events", where={"identifier": rowp[1].strip()})