District_Vacancy.create( district = member['district'] , session = session.id , chamber = member['chamber'] ) except Exception as e: if 'duplicate' in e.message: pass else: print e print 'Line #{0}'.format(inspect.currentframe().f_lineno) else: # parse the member's raw name, save the results to the name_dict member['name_dict'].update(parse_name(member['raw_name'])['name_dict']) member['party'] = get_party(d_p[1].strip()) if member['party'] == '': member['party'] = None # the urls for H 2009 and 2010 are like later years, though everything else is like earlier years if session.year in [2009, 2010]: member['url'] = 'http://house.mo.gov/member.aspx?year={0}&district={1}'.format(session.year, member['district']) else: member['url'] = tds[0].find('a')['href'] members.append(member) # now, deal with the senate
try: with db.atomic(): race.save() except Exception as e: if 'duplicate' in e.message: pass else: print 'Error on line #{0}: {1}'.format(inspect.currentframe().f_lineno, e) for candidate in race.candidates: candidate.race = race.id raw_name = candidate.raw_name parsed_name = parse_name(candidate.raw_name.replace(',,', ','))['name_dict'] # set the person attribute candidate.person = get_or_create_person(parsed_name)['person'] for k, v in candidate._data.iteritems(): print ' {0}: {1}'.format(k, repr(v)) # now save try: with db.atomic(): candidate.save() except Exception as e: if 'duplicate' in e.message: pass else:
member_query = (Assembly_Member .select() .where( (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H') ) ) # if there's only one result in the member query, use that member if member_query.count() == 1: bill_co_sponsor.sponsor = member_query.get() # if there are more than one members... elif member_query.count() != 1: parsed_name = parse_name(bill_co_sponsor.raw_name)['name_dict'] # query for member by last_name, district, assembly and chamber member_query = (Assembly_Member .select() .join(Person) .where( (Person.last_name == parsed_name['last_name']) & (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H') ) ) if member_query.count() == 1: bill_co_sponsor.sponsor = member_query.get()
def get_hb_sponsor(bill, sponsor_type): """ Takes a bill and a sponsor type. Parse out the name, and queries to find the member's record. Returns a Bill_Sponsor object, ready to save. """ bill_sponsor = Bill_Sponsor( bill = bill , sponsor_type = sponsor_type ) if sponsor_type == 'S': bill_sponsor.raw_name = bill.sponsor_string elif sponsor_type == 'C': bill_sponsor.raw_name = bill.co_sponsor_string # parse the district from the raw_name try: district = re.search(r'\((\d+)\)', bill_sponsor.raw_name).group(1) except AttributeError: # if this fails, set up the first query to fail and deal with it downstream district = 0 # query for member by district, assembly and chamber member_query = (Assembly_Member .select() .where( (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H') ) ) # if there's only one result in the member query, use that member if member_query.count() == 1: bill_sponsor.sponsor = member_query.get() # if there are more than one members... elif member_query.count() != 1: # for some reason, these two legislator names are sometimes formatted differently if 'Stacey Newman' in bill_sponsor.raw_name: last = 'Newman' elif 'Scharnhorst' in bill_sponsor.raw_name: last = 'Scharnhorst' else: last = re.search(r'^([\w-]+)(?:\s\w+)?,', bill_sponsor.raw_name).group(1) if district > 0: # query for member by last_name, district, assembly and chamber member_query = (Assembly_Member .select() .join(Person) .where( (Person.last_name == last) & (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H') ) ) elif district == 0: # query for member by last_name, assembly and chamber member_query = (Assembly_Member .select() .join(Person) .where( (Person.last_name == last) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H') ) ) if member_query.count() == 1: bill_sponsor.sponsor = member_query.get() else: # remove the 'etal' from the raw_name name_string = re.sub(r'[\.\s]+et\s*al\.*', '', bill_sponsor.raw_name).strip() # parse the name parsed_name = parse_name(name_string) # remove the district district = parsed_name['name_dict'].pop('district') # get or create a person person = get_or_create_person(parsed_name['name_dict'])['person'] # get or create an assembly_member with db.atomic(): bill_sponsor.sponsor = Assembly_Member.get_or_create( person = person , district = district , assembly = bill.session.assembly , chamber = 'H' )[0] # save the new bill_sponsor record return bill_sponsor
try: content = get_content(source_doc, requests_session) except requests.exceptions.ConnectionError as e: print e print ' Connection failed. Retrying...' requests_session = requests.session() except Exception as e: print e # loop over the links extracted from the co-sponsor page for link in extract_links(content, bill.co_sponsor_link): if 'District' in link['name']: # parse the name out of the link text parse_link_name = parse_name(link['name']) if parse_link_name['success']: # find the assembly member record # first, query with the parsed last name, the district, the chamber and the assembly member_query = (Assembly_Member .select() .join(Person) .where( (Person.last_name == parse_link_name['name_dict']['last_name']) & (Assembly_Member.district == parse_link_name['name_dict']['district']) & (Assembly_Member.chamber == 'S') & (Assembly_Member.assembly == bill.session.assembly) ) ) if member_query.count() == 1:
try: content = get_content(source_doc, requests_session) except requests.exceptions.ConnectionError as e: print e print " Connection failed. Retrying..." requests_session = requests.session() except Exception as e: print e # loop over the links extracted from the co-sponsor page for link in extract_links(content, bill.co_sponsor_link): if "District" in link["name"]: # parse the name out of the link text parse_link_name = parse_name(link["name"]) if parse_link_name["success"]: # find the assembly member record # first, query with the parsed last name, the district, the chamber and the assembly member_query = ( Assembly_Member.select() .join(Person) .where( (Person.last_name == parse_link_name["name_dict"]["last_name"]) & (Assembly_Member.district == parse_link_name["name_dict"]["district"]) & (Assembly_Member.chamber == "S") & (Assembly_Member.assembly == bill.session.assembly) ) ) if member_query.count() == 1:
def get_hb_sponsor(bill, sponsor_type): """ Takes a bill and a sponsor type. Parse out the name, and queries to find the member's record. Returns a Bill_Sponsor object, ready to save. """ bill_sponsor = Bill_Sponsor(bill=bill, sponsor_type=sponsor_type) if sponsor_type == 'S': bill_sponsor.raw_name = bill.sponsor_string elif sponsor_type == 'C': bill_sponsor.raw_name = bill.co_sponsor_string # parse the district from the raw_name try: district = re.search(r'\((\d+)\)', bill_sponsor.raw_name).group(1) except AttributeError: # if this fails, set up the first query to fail and deal with it downstream district = 0 # query for member by district, assembly and chamber member_query = (Assembly_Member.select().where( (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H'))) # if there's only one result in the member query, use that member if member_query.count() == 1: bill_sponsor.sponsor = member_query.get() # if there are more than one members... elif member_query.count() != 1: # for some reason, these two legislator names are sometimes formatted differently if 'Stacey Newman' in bill_sponsor.raw_name: last = 'Newman' elif 'Scharnhorst' in bill_sponsor.raw_name: last = 'Scharnhorst' else: last = re.search(r'^([\w-]+)(?:\s\w+)?,', bill_sponsor.raw_name).group(1) if district > 0: # query for member by last_name, district, assembly and chamber member_query = (Assembly_Member.select().join(Person).where( (Person.last_name == last) & (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H'))) elif district == 0: # query for member by last_name, assembly and chamber member_query = (Assembly_Member.select().join(Person).where( (Person.last_name == last) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H'))) if member_query.count() == 1: bill_sponsor.sponsor = member_query.get() else: # remove the 'etal' from the raw_name name_string = re.sub(r'[\.\s]+et\s*al\.*', '', bill_sponsor.raw_name).strip() # parse the name parsed_name = parse_name(name_string) # remove the district district = parsed_name['name_dict'].pop('district') # get or create a person person = get_or_create_person(parsed_name['name_dict'])['person'] # get or create an assembly_member with db.atomic(): bill_sponsor.sponsor = Assembly_Member.get_or_create( person=person, district=district, assembly=bill.session.assembly, chamber='H')[0] # save the new bill_sponsor record return bill_sponsor
district = tds[1].text.strip() # query for member by district, assembly and chamber member_query = (Assembly_Member.select().where( (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H'))) # if there's only one result in the member query, use that member if member_query.count() == 1: bill_co_sponsor.sponsor = member_query.get() # if there are more than one members... elif member_query.count() != 1: parsed_name = parse_name(bill_co_sponsor.raw_name)['name_dict'] # query for member by last_name, district, assembly and chamber member_query = (Assembly_Member.select().join(Person).where( (Person.last_name == parsed_name['last_name']) & (Assembly_Member.district == district) & (Assembly_Member.assembly == bill.session.assembly) & (Assembly_Member.chamber == 'H'))) if member_query.count() == 1: bill_co_sponsor.sponsor = member_query.get() else: print 'Error: Could not find co-sponsor!' print bill_co_sponsor.raw_name print bill_co_sponsor.sponsor.person.last_name
try: with db.atomic(): race.save() except Exception as e: if 'duplicate' in e.message: pass else: print 'Error on line #{0}: {1}'.format(inspect.currentframe().f_lineno, e) # now loop over the candidates for candidate in race.candidates: # set the race attribute candidate.race = race.id parsed_name = parse_name(candidate.raw_name)['name_dict'] # set the person attribute candidate.person = get_or_create_person(parsed_name)['person'] for k, v in candidate._data.iteritems(): print ' {0}: {1}'.format(k, v) print '------' # now save try: with db.atomic(): candidate.save() except Exception as e: if 'duplicate' in e.message: