def process_page(self): name = self.name_css.match_one(self.root).text.split(maxsplit=1)[1] p = Person( name=name, state="ok", chamber="upper", party=self.party_css.match_one(self.root).text, district=self.district_css.match_one(self.root).text.split()[1], ) p.image = self.image_selector.match_one(self.root).get("href") contact_url = self.source.url.replace("District.aspx", "Contact.aspx") assert contact_url.startswith( "https://www.okhouse.gov/Members/Contact.aspx?District=") p.add_link(contact_url, note="Contact Form") # capitol address check_capitol_address = CSS(".districtheadleft").match( self.root)[0].text_content().strip() if check_capitol_address == "Capitol Address:": capitol_address_div = (CSS(".districtheadleft + div").match( self.root)[0].text_content().strip().splitlines()) p.capitol_office.address = "; ".join( [ln.strip() for ln in capitol_address_div[:-1]]) p.capitol_office.phone = capitol_address_div[-1].strip() return p
def process_item(self, row): if not row["First Name"]: return name = "{} {}".format(row["First Name"], row["Last Name"]) party = PARTIES[row["Party"]] leg = Person( name=name, district=row["District"].lstrip("0"), party=party, state="mn", chamber="upper", image=self.extra_info[name]["image"], ) if "url" in self.extra_info[name]: leg.add_link(self.extra_info[name]["url"]) if "office_phone" in self.extra_info[name]: leg.capitol_office.voice = self.extra_info[name]["office_phone"] if "email" in self.extra_info[name]: leg.email = self.extra_info[name]["email"] row["Zipcode"] = row["Zipcode"].strip() if ( a in row["Address2"] for a in ["95 University Avenue W", "100 Rev. Dr. Martin Luther King"] ): address = "{Address}\n{Address2}\n{City}, {State} {Zipcode}".format(**row) if "Rm. Number" in row: address = "{0} {1}".format(row["Rm. Number"], address) leg.capitol_office.address = address leg.add_source(self.source.url) leg.add_source(SEN_HTML_URL) return leg
def process_item(self, item): member, party, district, contact_link, phone, office = item.getchildren( ) name = member.text_content() district = district.text_content() # skip vacant districts if "Interim District" in name: self.skip() # each of these <td> have a single link leg_url = CSS("a").match_one(member).get("href") contact_url = CSS("a").match_one(contact_link).get("href") # construct this URL based on observation elsewhere on senate.michigan.gov image_url = f"https://senate.michigan.gov/_images/{district}{ord_suffix(district)}.jpg" p = Person( **split_name(name), state="mi", chamber="upper", district=district, party=self.PARTY_MAP[party.text], image=image_url, ) p.capitol_office.voice = str(phone.text_content()) p.capitol_office.address = str(office.text_content()) p.add_source(self.source.url) p.add_link(leg_url) p.add_link(contact_url, note="Contact") return p
def process_page(self): p = Person( state="fl", chamber="lower", name=fix_name(self.input.name), party=str(self.input.party), district=str(self.input.district), image=self.input.image, ) for otype in ("district", "capitol"): odoc = self.root.xpath( f"//h3[@id='{otype}-office']/following-sibling::ul") if odoc: odoc = odoc[0] else: continue spans = odoc.xpath(".//span") office = p.capitol_office if otype == "capitol" else p.district_office office.address = "; ".join( line.strip() for line in spans[0].text_content().strip().splitlines() if line.strip()) office.voice = spans[1].text_content().strip() return p
def get_data(self): for bio in CSS(".bSenBio__infoIt").match(self.root): if "Party:" in bio.text_content(): party = bio.text_content().split(":")[1].strip() p = Person( name=self.name_css.match_one(self.root).text, state="ok", chamber="upper", party=party, image=self.image_css.match_one(self.root).get("href"), district=self.district_css.match_one( self.root).text.strip().split()[1], ) p.capitol_office.address = self.address_css.match_one(self.root).text p.capitol_office.phone = self.phone_css.match_one(self.root).text p.add_link( self.contact_link_sel.match_one(self.root).get("href"), "Contact Form") return p
def test_model_class_with_export_data_method(self, query, request_ctx): item = Person(id=300, firstname="John", lastname="Connor") query.paginate().items = [item] query.paginate().total = 1 with request_ctx('/?select={"firstname": 1}&page=3'): rv = collection()(lambda: Person)() assert rv[0] == [{"firstname": "John"}] assert rv[1] == {"X-Total": 1, "X-Page": 3}
def process_page(self): # annapolis_info = ( # XPath("//dt[text()='Annapolis Info']/following-sibling::dd[1]") # .match_one(self.root) # .text_content() # ) # interim_info = ( # XPath("//dt[text()='Interim Info']/following-sibling::dd[1]") # .match_one(self.root) # .text_content() # ) # email is formatted mailto:<addr>?body... email = SimilarLink("mailto:").match_one(self.root).get("href") email = email.split(":", 1)[1].split("?")[0] p = Person( name=CSS("h2").match_one(self.root).text.split(" ", 1)[1], state="md", image=self.image_sel.match_one(self.root).get("src"), party=self.extract_dd("Party"), district=self.extract_dd("District"), chamber=None, email=email, ) p.add_link(self.source.url) p.add_source(self.source.url) return p
def handle_list_item(self, item): tds = item.getchildren() email, name, party, seat, phone = tds chamber, district = seat.text_content().strip().split() url = str(name.xpath('a/@href')[0]) person = Person( name=clean_name(name.text_content()), state='mt', party=party.text_content().strip(), chamber=('upper' if chamber == 'SD' else 'lower'), district=district, ) person.add_link(url) person.add_source(url) phone = phone.text_content().strip() if len(phone) == 14: person.capitol_office.voice = phone elif len(phone) > 30: person.capitol_office.voice = phone.split(' ')[0] email = email.xpath('./a/@href') if email: email = email[0].split(':', 1)[1] person.capitol_office.email = email return person, url
def process_item(self, item): website, district, name, party, office, phone, email = item.getchildren( ) # skip header row if website.tag == "th": self.skip() office = office.text_content() for abbr, full in self.office_names.items(): office = office.replace(abbr, full) p = Person( name=name.text_content(), state="mi", chamber="lower", district=district.text_content().lstrip("0"), party=party.text_content(), email=email.text_content(), ) p.add_link(CSS("a").match_one(website).get("href")) p.add_source(self.source.url) p.capitol_office.voice = phone.text_content() p.capitol_office.address = office return p
def process_page(self): # construct person from the details from above p = Person( state="oh", chamber="lower", district=self.input.district, name=self.input.name, party=self.input.party, image=self.input.image, ) p.add_source(self.input.url) p.add_link(self.input.url) divs = CSS(".member-info-bar-module").match(self.root) # last div is contact details contact_details = CSS(".member-info-bar-value").match(divs[-1]) for div in contact_details: dtc = div.text_content() if ", OH" in dtc: # join parts of the div together to make whole address children = div.getchildren() p.capitol_office.address = "; ".join( [children[0].text.strip(), children[0].tail.strip(), children[1].tail.strip()] ) elif "Phone:" in dtc: p.capitol_office.voice = dtc.split(": ")[1] elif "Fax:" in dtc: p.capitol_office.fax = dtc.split(": ")[1] return p
def handle_list_item(self, item, session_num): item_dict = {elem: _get_if_exists(item, elem) for elem in ELEMENTS} chamber = item.attrib["chamber"] code = item.attrib["code"].lower() person = Person( name="{FirstName} {LastName}".format(**item_dict), given_name=item_dict['FirstName'], family_name=item_dict['LastName'], state='ak', party=item_dict["Party"], chamber=('upper' if chamber == 'S' else 'lower'), district=item_dict["District"], image=f"http://akleg.gov/images/legislators/{code}.jpg") person.add_link( "http://www.akleg.gov/basis/Member/Detail/{}?code={}".format( session_num, code, )) person.add_source("http://w3.akleg.gov/") phone = "907-" + item_dict["Phone"][0:3] + "-" + item_dict["Phone"][3:] person.capitol_office.voice = phone person.capitol_office.email = item_dict["EMail"] if item_dict["Building"] == "CAPITOL": person.capitol_office.address = "State Capitol Room {}; Juneau AK, 99801".format( item_dict["Room"]) return person
def process_page(self): party = {"D": "Democratic", "R": "Republican"}[self.input.party] photo = CSS("img#ContentPlaceHolder1_imgPhoto1").match_one( self.root).get("src") p = Person( state="mo", party=party, image=photo, chamber="lower", district=self.input.district, name=f"{self.input.first_name} {self.input.last_name}", given_name=self.input.first_name, family_name=self.input.last_name, ) # TODO # p.extras["hometown"] = self.input.hometown p.capitol_office.voice = self.input.voice p.capitol_office.address = ( "MO House of Representatives; 201 West Capitol Avenue; " f"Room {self.input.room}; Jefferson City MO 65101 ") p.add_link(self.input.url) p.add_source(self.input.url) return p
def process_item(self, item): tds = item.getchildren() email, name, party, seat, phone = tds chamber, district = seat.text_content().strip().split() url = str(name.xpath("a/@href")[0]) person = Person( name=clean_name(name.text_content()), state="mt", party=party.text_content().strip(), chamber=("upper" if chamber == "SD" else "lower"), district=district, ) person.add_link(url) person.add_source(url) phone = phone.text_content().strip() if len(phone) == 14: person.capitol_office.voice = phone elif len(phone) > 30: person.capitol_office.voice = phone.split(" ")[0] email = email.xpath("./a/@href") if email: email = email[0].split(":", 1)[1] person.capitol_office.email = email return person
def process_item(self, item): try: link = CSS("a").match(item)[1] except SelectorError: self.skip() data = { "last_name": link.text_content(), "url": link.get("href"), } for key, label in self.LABELS.items(): data[key] = CSS(f"[id$={label}]").match_one(item).text_content().strip() party = {"(D)": "Democratic", "(R)": "Republican"}[data["party"]] address = "Hawaii State Capitol, Room " + data["room"] chamber = "upper" if data["chamber"] == "S" else "lower" p = Person( name=data["first_name"] + " " + data["last_name"], state="hi", chamber=chamber, district=data["district"], given_name=data["first_name"], family_name=data["last_name"], party=party, email=data["email"], ) p.capitol_office.address = address p.capitol_office.voice = data["voice"] p.capitol_office.fax = data["fax"] p.add_source(data["url"]) p.add_link(data["url"]) return p
def process_item(self, item): # strip leading zero district = str(int(item.get("id"))) image = CSS(".mem-pic a img").match_one(item).get("src") name = CSS(".mem-name a").match_one(item) district_addr, capitol_addr = self.process_addresses(item) # email, twitter, facebook are all sometimes present try: email = CSS(".mem-email a").match_one(item).text.strip() except SelectorError: email = "" try: twitter = CSS(".fa-twitter").match_one(item) twitter = twitter.getparent().get("href").split("/")[-1] except SelectorError: twitter = "" try: facebook = CSS(".fa-facebook").match_one(item) facebook = facebook.getparent().get("href").split("/")[-1] except SelectorError: facebook = "" party = self.party_mapping[district][1] p = Person( state="ny", chamber="lower", image=image, party=party, district=district, name=name.text.strip(), email=email, ) p.add_link(url=name.get("href")) p.add_source(url=name.get("href")) if twitter: p.ids["twitter"] = twitter if facebook: p.ids["facebook"] = facebook p.district_office.address = district_addr["address"] p.district_office.voice = district_addr["phone"] p.district_office.fax = district_addr["fax"] p.capitol_office.address = capitol_addr["address"] p.capitol_office.voice = capitol_addr["phone"] p.capitol_office.fax = capitol_addr["fax"] return p
def process_page(self): party_district_text = self.root.xpath("//h3/font/text()")[0] party, district = get_party_district(party_district_text) p = Person( name=self.input.name, state="va", chamber=self.chamber, party=party, district=district, ) if self.input.image: p.image = self.input.image p.add_link(self.source.url) p.add_source(self.source.url) self.get_offices(p) return p
def to_object(self, item): p = Person( state="md", chamber=item["chamber"], name=item["name"], party=item["party"], image=item["image"], district=item["district"], ) p.add_link(item["link"]) p.add_source(item["link"]) return p
def process_page(self): email = self.root.xpath('//a[contains(@href, "mailto:")]')[0].get( "href").split(":")[-1] p = Person( state="fl", chamber="upper", name=fix_name(self.input.name), party=str(self.input.party), district=str(self.input.district), email=email, image=str(self.root.xpath('//div[@id="sidebar"]//img/@src').pop()), ) for item in self.contact_xpath.match(self.root): self.handle_office(item, p) return p
def handle_list_item(self, item): name = item.xpath("./a/div[@class='team-txt']/h5/text()")[0].strip() party = item.xpath( "./a/div[@class='team-txt']/p[1]/text()")[0].split()[0] district = item.xpath( "./a/div[@class='team-txt']/p[1]/span/text()")[0].split()[-1] image = self.IMAGE_BASE + item.xpath(".//img")[0].attrib["data-src"] link = str(item.xpath("./a/@href")[0]) rep = Person( name=fix_name(name), state="fl", party=str(party), district=str(district), chamber="lower", image=image, ) rep.add_link(link) rep.add_source(self.url) rep.add_source(link) return rep
def handle_list_item(self, item): name = " ".join(item.xpath(".//text()")) name = re.sub(r"\s+", " ", name).replace(" ,", ",").strip() if "Vacant" in name: return district = item.xpath("string(../../td[1])") party = item.xpath("string(../../td[2])") leg_url = item.get("href") name = fix_name(name) leg = Person(name=str(name), state="fl", party=str(party), district=str(district), chamber="upper") leg.add_link(leg_url) leg.add_source(self.url) leg.add_source(leg_url) return leg
def insert_persons(): lst = [ { "id": 1, "firstname": "Nikola", "lastname": "Tesla", "age": 24 }, { "id": 2, "firstname": "Albert", "lastname": "Einstein", "age": 21 }, { "id": 3, "firstname": "Isaac", "lastname": "Newton", "age": 12 }, ] database.session.add_all([Person(**item) for item in lst])
def process_item(self, item): first = item["FirstName"] last = item["LastName"] initial = item["Initial"] if initial: name = f"{first} {initial}. {last}" else: name = f"{first} {last}" p = Person( name=name, family_name=last, given_name=first, state="sd", district=item["District"].lstrip("0"), chamber="upper" if item["MemberType"] == "S" else "lower", party=item["Politics"], email=item["EmailState"], image= "https://lawmakerdocuments.blob.core.usgovcloudapi.net/photos/" + item["Picture"].lower(), ) address = item["HomeAddress1"] if item["HomeAddress2"]: address += "; " + item["HomeAddress2"] address += f"{item['HomeCity']}, {item['HomeState']} {item['HomeZip']}" p.district_office.address = address p.district_office.voice = item["HomePhone"] p.capitol_office.voice = item["CapitolPhone"] p.extras["occupation"] = item["Occupation"] url = f"https://sdlegislature.gov/Legislators/Profile/{item['SessionMemberId']}/Detail" p.add_link(url) p.add_source(url) return p
def process_page(self): name = self.name_css.match_one(self.root).text.replace("Sen. ", "").strip() district = self.district_css.match_one(self.root).text.split()[1] image = self.image_css.match_one(self.root).get("src") addrlines = self.address_css.match_one(self.root).text_content() # example: # Room 11th Floor # P.O. Box 94604 # Lincoln, NE 68509 # (402) 471-2733 # Email: [email protected] mode = "address" address = [] phone = None email = None for line in addrlines.splitlines(): line = line.strip() if not line: continue if line.startswith("(402)"): phone = line mode = None if line.startswith("Email:"): email = line.replace("Email: ", "") if mode == "address": address.append(line) p = Person( chamber="legislature", party="Nonpartisan", state="ne", district=district, image=image, name=name, email=email, ) p.capitol_office.address = "; ".join(address) p.capitol_office.voice = phone p.add_source(self.source.url) p.add_link(self.source.url) return p
def process_item(self, item): photo_url = item.xpath("./img/@src")[0] url = item.xpath(".//h5/a/@href")[0] name_text = item.xpath(".//h5/a/b/text()")[0] name_match = re.match(r"^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$", name_text) name = name_match.group(1).strip() district = name_match.group(2).lstrip("0").upper() party_text = name_match.group(3) party = PARTIES[party_text] info_texts = [ x.strip() for x in item.xpath("./div/text()[normalize-space()]") if x.strip() ] address = "\n".join((info_texts[0], info_texts[1])) phone_text = info_texts[2] # if validate_phone_number(phone_text): phone = phone_text email_text = item.xpath(".//a/@href")[1].replace("mailto:", "").strip() # if validate_email_address(email_text): email = email_text rep = Person( name=name, district=district, party=party, state="mn", chamber="lower", image=photo_url, email=email, ) rep.add_link(url) rep.add_source(self.source.url) rep.capitol_office.address = address rep.capitol_office.phone = phone return rep
def process_item(self, item): chamber_id = item["district"]["chamberType"] p = Person( state="ga", chamber=self.chamber_types[chamber_id], district=str(item["district"]["number"]), name=item["fullName"], family_name=item["name"]["familyName"], given_name=item["name"]["first"], suffix=item["name"]["suffix"] or "", party=self.party_ids[item["party"]], ) # district address da = item["districtAddress"] p.email = da["email"] p.district_office.voice = da["phone"] p.district_office.fax = da["fax"] if da["address1"]: p.district_office.address = da["address1"] if da["address2"]: p.district_office.address += "; " + da["address2"] p.district_office.address += "; {city}, {state} {zip}".format(**da) p.district_office.address = p.district_office.address.strip() # photos if not item["photos"]: pass elif len(item["photos"]) == 1: p.image = item["photos"][0]["url"].split("?")[ 0] # strip off ?size=mpSm for full size else: raise Exception("unknown photos configuration: " + str(item["photos"])) # extras p.extras["residence"] = item["residence"] p.extras["city"] = item["city"] p.extras["georgia_id"] = item["id"] if item["dateVacated"]: p.end_date = item["dateVacated"] url = ( f"https://www.legis.ga.gov/members/{self.chamber_names[chamber_id]}/" f"{item['id']}?session={item['sessionId']}") p.add_source(url) p.add_link(url) return p
b1 = Ball('super') print(b1) # Ghost class gh = Ghost() print(gh) # Humans humans = make_humans() print('human name is {} sex is {}'.format(humans[0].name, humans[0].sex)) print('human name is {} sex is {}'.format(humans[1].name, humans[1].sex)) # Person pe = Person('john', 34) print(pe.getPersonInfo()) class A: """Dummy class.""" def func1(self) -> None: """""" print('func1') class B(A): """Class inherited from parent A.""" def func2(self) -> None: """Instance initialization.""" super().func1() print('func2')