Example #1
0
    def process_item(self, row):
        if not row["First Name"]:
            return
        name = "{} {}".format(row["First Name"], row["Last Name"])
        party = PARTIES[row["Party"]]
        leg = Person(
            name=name,
            district=row["District"].lstrip("0"),
            party=party,
            state="mn",
            chamber="upper",
            image=self.extra_info[name]["image"],
        )

        if "url" in self.extra_info[name]:
            leg.add_link(self.extra_info[name]["url"])
        if "office_phone" in self.extra_info[name]:
            leg.capitol_office.voice = self.extra_info[name]["office_phone"]
        if "email" in self.extra_info[name]:
            leg.email = self.extra_info[name]["email"]

        row["Zipcode"] = row["Zipcode"].strip()
        if (
            a in row["Address2"]
            for a in ["95 University Avenue W", "100 Rev. Dr. Martin Luther King"]
        ):
            address = "{Address}\n{Address2}\n{City}, {State} {Zipcode}".format(**row)
            if "Rm. Number" in row:
                address = "{0} {1}".format(row["Rm. Number"], address)
        leg.capitol_office.address = address
        leg.add_source(self.source.url)
        leg.add_source(SEN_HTML_URL)
        return leg
Example #2
0
    def process_item(self, item):
        website, district, name, party, office, phone, email = item.getchildren(
        )

        # skip header row
        if website.tag == "th":
            self.skip()

        office = office.text_content()
        for abbr, full in self.office_names.items():
            office = office.replace(abbr, full)

        p = Person(
            name=name.text_content(),
            state="mi",
            chamber="lower",
            district=district.text_content().lstrip("0"),
            party=party.text_content(),
            email=email.text_content(),
        )
        p.add_link(CSS("a").match_one(website).get("href"))
        p.add_source(self.source.url)
        p.capitol_office.voice = phone.text_content()
        p.capitol_office.address = office
        return p
Example #3
0
    def process_item(self, item):
        tds = item.getchildren()
        email, name, party, seat, phone = tds

        chamber, district = seat.text_content().strip().split()
        url = str(name.xpath("a/@href")[0])

        person = Person(
            name=clean_name(name.text_content()),
            state="mt",
            party=party.text_content().strip(),
            chamber=("upper" if chamber == "SD" else "lower"),
            district=district,
        )
        person.add_link(url)
        person.add_source(url)

        phone = phone.text_content().strip()
        if len(phone) == 14:
            person.capitol_office.voice = phone
        elif len(phone) > 30:
            person.capitol_office.voice = phone.split("    ")[0]

        email = email.xpath("./a/@href")
        if email:
            email = email[0].split(":", 1)[1]
        person.capitol_office.email = email

        return person
Example #4
0
    def handle_list_item(self, item):
        tds = item.getchildren()
        email, name, party, seat, phone = tds

        chamber, district = seat.text_content().strip().split()
        url = str(name.xpath('a/@href')[0])

        person = Person(
            name=clean_name(name.text_content()),
            state='mt',
            party=party.text_content().strip(),
            chamber=('upper' if chamber == 'SD' else 'lower'),
            district=district,
        )
        person.add_link(url)
        person.add_source(url)

        phone = phone.text_content().strip()
        if len(phone) == 14:
            person.capitol_office.voice = phone
        elif len(phone) > 30:
            person.capitol_office.voice = phone.split('    ')[0]

        email = email.xpath('./a/@href')
        if email:
            email = email[0].split(':', 1)[1]
        person.capitol_office.email = email

        return person, url
Example #5
0
    def process_item(self, item):
        try:
            link = CSS("a").match(item)[1]
        except SelectorError:
            self.skip()
        data = {
            "last_name": link.text_content(),
            "url": link.get("href"),
        }
        for key, label in self.LABELS.items():
            data[key] = CSS(f"[id$={label}]").match_one(item).text_content().strip()

        party = {"(D)": "Democratic", "(R)": "Republican"}[data["party"]]
        address = "Hawaii State Capitol, Room " + data["room"]
        chamber = "upper" if data["chamber"] == "S" else "lower"

        p = Person(
            name=data["first_name"] + " " + data["last_name"],
            state="hi",
            chamber=chamber,
            district=data["district"],
            given_name=data["first_name"],
            family_name=data["last_name"],
            party=party,
            email=data["email"],
        )
        p.capitol_office.address = address
        p.capitol_office.voice = data["voice"]
        p.capitol_office.fax = data["fax"]
        p.add_source(data["url"])
        p.add_link(data["url"])
        return p
Example #6
0
    def process_page(self):
        # construct person from the details from above
        p = Person(
            state="oh",
            chamber="lower",
            district=self.input.district,
            name=self.input.name,
            party=self.input.party,
            image=self.input.image,
        )
        p.add_source(self.input.url)
        p.add_link(self.input.url)

        divs = CSS(".member-info-bar-module").match(self.root)
        # last div is contact details
        contact_details = CSS(".member-info-bar-value").match(divs[-1])
        for div in contact_details:
            dtc = div.text_content()
            if ", OH" in dtc:
                # join parts of the div together to make whole address
                children = div.getchildren()
                p.capitol_office.address = "; ".join(
                    [children[0].text.strip(), children[0].tail.strip(), children[1].tail.strip()]
                )
            elif "Phone:" in dtc:
                p.capitol_office.voice = dtc.split(": ")[1]
            elif "Fax:" in dtc:
                p.capitol_office.fax = dtc.split(": ")[1]

        return p
Example #7
0
    def handle_list_item(self, item, session_num):
        item_dict = {elem: _get_if_exists(item, elem) for elem in ELEMENTS}
        chamber = item.attrib["chamber"]
        code = item.attrib["code"].lower()

        person = Person(
            name="{FirstName} {LastName}".format(**item_dict),
            given_name=item_dict['FirstName'],
            family_name=item_dict['LastName'],
            state='ak',
            party=item_dict["Party"],
            chamber=('upper' if chamber == 'S' else 'lower'),
            district=item_dict["District"],
            image=f"http://akleg.gov/images/legislators/{code}.jpg")
        person.add_link(
            "http://www.akleg.gov/basis/Member/Detail/{}?code={}".format(
                session_num,
                code,
            ))
        person.add_source("http://w3.akleg.gov/")

        phone = "907-" + item_dict["Phone"][0:3] + "-" + item_dict["Phone"][3:]
        person.capitol_office.voice = phone
        person.capitol_office.email = item_dict["EMail"]

        if item_dict["Building"] == "CAPITOL":
            person.capitol_office.address = "State Capitol Room {}; Juneau AK, 99801".format(
                item_dict["Room"])

        return person
Example #8
0
    def process_page(self):
        name = self.name_css.match_one(self.root).text.split(maxsplit=1)[1]
        p = Person(
            name=name,
            state="ok",
            chamber="upper",
            party=self.party_css.match_one(self.root).text,
            district=self.district_css.match_one(self.root).text.split()[1],
        )
        p.image = self.image_selector.match_one(self.root).get("href")

        contact_url = self.source.url.replace("District.aspx", "Contact.aspx")
        assert contact_url.startswith(
            "https://www.okhouse.gov/Members/Contact.aspx?District=")
        p.add_link(contact_url, note="Contact Form")

        # capitol address
        check_capitol_address = CSS(".districtheadleft").match(
            self.root)[0].text_content().strip()
        if check_capitol_address == "Capitol Address:":
            capitol_address_div = (CSS(".districtheadleft + div").match(
                self.root)[0].text_content().strip().splitlines())
            p.capitol_office.address = "; ".join(
                [ln.strip() for ln in capitol_address_div[:-1]])
            p.capitol_office.phone = capitol_address_div[-1].strip()
        return p
Example #9
0
    def process_page(self):
        # annapolis_info = (
        #     XPath("//dt[text()='Annapolis Info']/following-sibling::dd[1]")
        #     .match_one(self.root)
        #     .text_content()
        # )
        # interim_info = (
        #     XPath("//dt[text()='Interim Info']/following-sibling::dd[1]")
        #     .match_one(self.root)
        #     .text_content()
        # )

        # email is formatted mailto:<addr>?body...
        email = SimilarLink("mailto:").match_one(self.root).get("href")
        email = email.split(":", 1)[1].split("?")[0]

        p = Person(
            name=CSS("h2").match_one(self.root).text.split(" ", 1)[1],
            state="md",
            image=self.image_sel.match_one(self.root).get("src"),
            party=self.extract_dd("Party"),
            district=self.extract_dd("District"),
            chamber=None,
            email=email,
        )
        p.add_link(self.source.url)
        p.add_source(self.source.url)
        return p
Example #10
0
    def process_item(self, item):
        member, party, district, contact_link, phone, office = item.getchildren(
        )

        name = member.text_content()
        district = district.text_content()

        # skip vacant districts
        if "Interim District" in name:
            self.skip()

        # each of these <td> have a single link
        leg_url = CSS("a").match_one(member).get("href")
        contact_url = CSS("a").match_one(contact_link).get("href")
        # construct this URL based on observation elsewhere on senate.michigan.gov
        image_url = f"https://senate.michigan.gov/_images/{district}{ord_suffix(district)}.jpg"

        p = Person(
            **split_name(name),
            state="mi",
            chamber="upper",
            district=district,
            party=self.PARTY_MAP[party.text],
            image=image_url,
        )
        p.capitol_office.voice = str(phone.text_content())
        p.capitol_office.address = str(office.text_content())
        p.add_source(self.source.url)
        p.add_link(leg_url)
        p.add_link(contact_url, note="Contact")
        return p
Example #11
0
    def process_page(self):
        party = {"D": "Democratic", "R": "Republican"}[self.input.party]

        photo = CSS("img#ContentPlaceHolder1_imgPhoto1").match_one(
            self.root).get("src")

        p = Person(
            state="mo",
            party=party,
            image=photo,
            chamber="lower",
            district=self.input.district,
            name=f"{self.input.first_name} {self.input.last_name}",
            given_name=self.input.first_name,
            family_name=self.input.last_name,
        )
        # TODO
        # p.extras["hometown"] = self.input.hometown
        p.capitol_office.voice = self.input.voice
        p.capitol_office.address = (
            "MO House of Representatives; 201 West Capitol Avenue; "
            f"Room {self.input.room}; Jefferson City MO 65101 ")
        p.add_link(self.input.url)
        p.add_source(self.input.url)
        return p
Example #12
0
 def to_object(self, item):
     p = Person(
         state="md",
         chamber=item["chamber"],
         name=item["name"],
         party=item["party"],
         image=item["image"],
         district=item["district"],
     )
     p.add_link(item["link"])
     p.add_source(item["link"])
     return p
Example #13
0
    def process_page(self):
        party_district_text = self.root.xpath("//h3/font/text()")[0]
        party, district = get_party_district(party_district_text)
        p = Person(
            name=self.input.name, state="va", chamber=self.chamber, party=party, district=district,
        )
        if self.input.image:
            p.image = self.input.image
        p.add_link(self.source.url)
        p.add_source(self.source.url)

        self.get_offices(p)

        return p
Example #14
0
    def process_item(self, item):
        chamber_id = item["district"]["chamberType"]
        p = Person(
            state="ga",
            chamber=self.chamber_types[chamber_id],
            district=str(item["district"]["number"]),
            name=item["fullName"],
            family_name=item["name"]["familyName"],
            given_name=item["name"]["first"],
            suffix=item["name"]["suffix"] or "",
            party=self.party_ids[item["party"]],
        )

        # district address
        da = item["districtAddress"]
        p.email = da["email"]
        p.district_office.voice = da["phone"]
        p.district_office.fax = da["fax"]
        if da["address1"]:
            p.district_office.address = da["address1"]
            if da["address2"]:
                p.district_office.address += "; " + da["address2"]
            p.district_office.address += "; {city}, {state} {zip}".format(**da)
            p.district_office.address = p.district_office.address.strip()

        # photos
        if not item["photos"]:
            pass
        elif len(item["photos"]) == 1:
            p.image = item["photos"][0]["url"].split("?")[
                0]  # strip off ?size=mpSm for full size
        else:
            raise Exception("unknown photos configuration: " +
                            str(item["photos"]))

        # extras
        p.extras["residence"] = item["residence"]
        p.extras["city"] = item["city"]
        p.extras["georgia_id"] = item["id"]
        if item["dateVacated"]:
            p.end_date = item["dateVacated"]

        url = (
            f"https://www.legis.ga.gov/members/{self.chamber_names[chamber_id]}/"
            f"{item['id']}?session={item['sessionId']}")
        p.add_source(url)
        p.add_link(url)

        return p
Example #15
0
    def process_item(self, item):
        # strip leading zero
        district = str(int(item.get("id")))
        image = CSS(".mem-pic a img").match_one(item).get("src")
        name = CSS(".mem-name a").match_one(item)

        district_addr, capitol_addr = self.process_addresses(item)

        # email, twitter, facebook are all sometimes present
        try:
            email = CSS(".mem-email a").match_one(item).text.strip()
        except SelectorError:
            email = ""
        try:
            twitter = CSS(".fa-twitter").match_one(item)
            twitter = twitter.getparent().get("href").split("/")[-1]
        except SelectorError:
            twitter = ""
        try:
            facebook = CSS(".fa-facebook").match_one(item)
            facebook = facebook.getparent().get("href").split("/")[-1]
        except SelectorError:
            facebook = ""

        party = self.party_mapping[district][1]

        p = Person(
            state="ny",
            chamber="lower",
            image=image,
            party=party,
            district=district,
            name=name.text.strip(),
            email=email,
        )
        p.add_link(url=name.get("href"))
        p.add_source(url=name.get("href"))
        if twitter:
            p.ids["twitter"] = twitter
        if facebook:
            p.ids["facebook"] = facebook
        p.district_office.address = district_addr["address"]
        p.district_office.voice = district_addr["phone"]
        p.district_office.fax = district_addr["fax"]
        p.capitol_office.address = capitol_addr["address"]
        p.capitol_office.voice = capitol_addr["phone"]
        p.capitol_office.fax = capitol_addr["fax"]
        return p
Example #16
0
    def process_page(self):
        name = self.name_css.match_one(self.root).text.replace("Sen. ",
                                                               "").strip()
        district = self.district_css.match_one(self.root).text.split()[1]
        image = self.image_css.match_one(self.root).get("src")
        addrlines = self.address_css.match_one(self.root).text_content()

        # example:
        # Room 11th Floor
        # P.O. Box 94604
        # Lincoln, NE 68509
        # (402) 471-2733
        # Email: [email protected]
        mode = "address"
        address = []
        phone = None
        email = None
        for line in addrlines.splitlines():
            line = line.strip()
            if not line:
                continue
            if line.startswith("(402)"):
                phone = line
                mode = None
            if line.startswith("Email:"):
                email = line.replace("Email: ", "")
            if mode == "address":
                address.append(line)

        p = Person(
            chamber="legislature",
            party="Nonpartisan",
            state="ne",
            district=district,
            image=image,
            name=name,
            email=email,
        )
        p.capitol_office.address = "; ".join(address)
        p.capitol_office.voice = phone
        p.add_source(self.source.url)
        p.add_link(self.source.url)
        return p
Example #17
0
    def get_data(self):
        for bio in CSS(".bSenBio__infoIt").match(self.root):
            if "Party:" in bio.text_content():
                party = bio.text_content().split(":")[1].strip()
        p = Person(
            name=self.name_css.match_one(self.root).text,
            state="ok",
            chamber="upper",
            party=party,
            image=self.image_css.match_one(self.root).get("href"),
            district=self.district_css.match_one(
                self.root).text.strip().split()[1],
        )
        p.capitol_office.address = self.address_css.match_one(self.root).text
        p.capitol_office.phone = self.phone_css.match_one(self.root).text
        p.add_link(
            self.contact_link_sel.match_one(self.root).get("href"),
            "Contact Form")

        return p
Example #18
0
    def handle_list_item(self, item):
        name = item.xpath("./a/div[@class='team-txt']/h5/text()")[0].strip()
        party = item.xpath(
            "./a/div[@class='team-txt']/p[1]/text()")[0].split()[0]
        district = item.xpath(
            "./a/div[@class='team-txt']/p[1]/span/text()")[0].split()[-1]
        image = self.IMAGE_BASE + item.xpath(".//img")[0].attrib["data-src"]
        link = str(item.xpath("./a/@href")[0])

        rep = Person(
            name=fix_name(name),
            state="fl",
            party=str(party),
            district=str(district),
            chamber="lower",
            image=image,
        )
        rep.add_link(link)
        rep.add_source(self.url)
        rep.add_source(link)
        return rep
Example #19
0
    def process_item(self, item):
        photo_url = item.xpath("./img/@src")[0]
        url = item.xpath(".//h5/a/@href")[0]
        name_text = item.xpath(".//h5/a/b/text()")[0]

        name_match = re.match(r"^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$", name_text)
        name = name_match.group(1).strip()
        district = name_match.group(2).lstrip("0").upper()
        party_text = name_match.group(3)
        party = PARTIES[party_text]

        info_texts = [
            x.strip() for x in item.xpath("./div/text()[normalize-space()]")
            if x.strip()
        ]
        address = "\n".join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        # if validate_phone_number(phone_text):
        phone = phone_text

        email_text = item.xpath(".//a/@href")[1].replace("mailto:", "").strip()
        # if validate_email_address(email_text):
        email = email_text

        rep = Person(
            name=name,
            district=district,
            party=party,
            state="mn",
            chamber="lower",
            image=photo_url,
            email=email,
        )
        rep.add_link(url)
        rep.add_source(self.source.url)
        rep.capitol_office.address = address
        rep.capitol_office.phone = phone

        return rep
Example #20
0
    def handle_list_item(self, item):
        name = " ".join(item.xpath(".//text()"))
        name = re.sub(r"\s+", " ", name).replace(" ,", ",").strip()

        if "Vacant" in name:
            return

        district = item.xpath("string(../../td[1])")
        party = item.xpath("string(../../td[2])")
        leg_url = item.get("href")

        name = fix_name(name)
        leg = Person(name=str(name),
                     state="fl",
                     party=str(party),
                     district=str(district),
                     chamber="upper")
        leg.add_link(leg_url)
        leg.add_source(self.url)
        leg.add_source(leg_url)

        return leg
Example #21
0
    def process_item(self, item):

        first = item["FirstName"]
        last = item["LastName"]
        initial = item["Initial"]

        if initial:
            name = f"{first} {initial}. {last}"
        else:
            name = f"{first} {last}"

        p = Person(
            name=name,
            family_name=last,
            given_name=first,
            state="sd",
            district=item["District"].lstrip("0"),
            chamber="upper" if item["MemberType"] == "S" else "lower",
            party=item["Politics"],
            email=item["EmailState"],
            image=
            "https://lawmakerdocuments.blob.core.usgovcloudapi.net/photos/" +
            item["Picture"].lower(),
        )

        address = item["HomeAddress1"]
        if item["HomeAddress2"]:
            address += "; " + item["HomeAddress2"]
        address += f"{item['HomeCity']}, {item['HomeState']} {item['HomeZip']}"

        p.district_office.address = address
        p.district_office.voice = item["HomePhone"]
        p.capitol_office.voice = item["CapitolPhone"]
        p.extras["occupation"] = item["Occupation"]

        url = f"https://sdlegislature.gov/Legislators/Profile/{item['SessionMemberId']}/Detail"
        p.add_link(url)
        p.add_source(url)
        return p