Beispiel #1
0
        p = Person(
            chamber="legislature",
            party="Nonpartisan",
            state="ne",
            district=district,
            image=image,
            name=name,
            email=email,
        )
        p.capitol_office.address = "; ".join(address)
        p.capitol_office.voice = phone
        p.add_source(self.source.url)
        p.add_link(self.source.url)
        return p


class LegPageGenerator(ListPage):
    source = NullSource()
    """
    NE is an interesting test case for Spatula, since there are individual senator pages
    but no real index that's useful at all.  Right now this is using a dummy source page
    to spawn the 49 subpage scrapers.
    """
    def process_page(self):
        for n in range(1, 50):
            yield LegPage(
                source=f"http://news.legislature.ne.gov/dist{n:02d}/")


legislators = PeopleWorkflow(LegPageGenerator)
Beispiel #2
0
            state="oh",
            chamber="lower",
            district=self.input.district,
            name=self.input.name,
            party=self.input.party,
            image=self.input.image,
        )
        p.add_source(self.input.url)
        p.add_link(self.input.url)

        divs = CSS(".member-info-bar-module").match(self.root)
        # last div is contact details
        contact_details = CSS(".member-info-bar-value").match(divs[-1])
        for div in contact_details:
            dtc = div.text_content()
            if ", OH" in dtc:
                # join parts of the div together to make whole address
                children = div.getchildren()
                p.capitol_office.address = "; ".join(
                    [children[0].text.strip(), children[0].tail.strip(), children[1].tail.strip()]
                )
            elif "Phone:" in dtc:
                p.capitol_office.voice = dtc.split(": ")[1]
            elif "Fax:" in dtc:
                p.capitol_office.fax = dtc.split(": ")[1]

        return p


house_members = PeopleWorkflow(HouseList)
Beispiel #3
0
        person = Person(
            name="{FirstName} {LastName}".format(**item_dict),
            given_name=item_dict["FirstName"],
            family_name=item_dict["LastName"],
            state="ak",
            party=item_dict["Party"],
            chamber=("upper" if chamber == "S" else "lower"),
            district=item_dict["District"],
            image=f"http://akleg.gov/images/legislators/{code}.jpg",
            email=item_dict["EMail"],
        )
        person.add_link(
            "http://www.akleg.gov/basis/Member/Detail/{}?code={}".format(self.session_num, code)
        )
        person.add_source("http://w3.akleg.gov/")

        if item_dict["Phone"]:
            phone = "907-" + item_dict["Phone"][0:3] + "-" + item_dict["Phone"][3:]
            person.capitol_office.voice = phone

        if item_dict["Building"] == "CAPITOL":
            person.capitol_office.address = "State Capitol Room {}; Juneau AK, 99801".format(
                item_dict["Room"]
            )

        return person


legislators = PeopleWorkflow(Legislators)
Beispiel #4
0
            name=name,
            family_name=last,
            given_name=first,
            state="sd",
            district=item["District"].lstrip("0"),
            chamber="upper" if item["MemberType"] == "S" else "lower",
            party=item["Politics"],
            email=item["EmailState"],
            image=
            "https://lawmakerdocuments.blob.core.usgovcloudapi.net/photos/" +
            item["Picture"].lower(),
        )

        address = item["HomeAddress1"]
        if item["HomeAddress2"]:
            address += "; " + item["HomeAddress2"]
        address += f"{item['HomeCity']}, {item['HomeState']} {item['HomeZip']}"

        p.district_office.address = address
        p.district_office.voice = item["HomePhone"]
        p.capitol_office.voice = item["CapitolPhone"]
        p.extras["occupation"] = item["Occupation"]

        url = f"https://sdlegislature.gov/Legislators/Profile/{item['SessionMemberId']}/Detail"
        p.add_link(url)
        p.add_source(url)
        return p


legislators = PeopleWorkflow(DirectoryListing)
Beispiel #5
0
    def process_item(self, item):
        website, district, name, party, office, phone, email = item.getchildren(
        )

        # skip header row
        if website.tag == "th":
            self.skip()

        office = office.text_content()
        for abbr, full in self.office_names.items():
            office = office.replace(abbr, full)

        p = Person(
            name=name.text_content(),
            state="mi",
            chamber="lower",
            district=district.text_content().lstrip("0"),
            party=party.text_content(),
            email=email.text_content(),
        )
        p.add_link(CSS("a").match_one(website).get("href"))
        p.add_source(self.source.url)
        p.capitol_office.voice = phone.text_content()
        p.capitol_office.address = office
        return p


senators = PeopleWorkflow(SenList)
reps = PeopleWorkflow(RepList)
Beispiel #6
0
            email=email,
        )
        p.add_link(self.source.url)
        p.add_source(self.source.url)
        return p


class PersonList(HtmlListPage):
    selector = XPath("//div[@id='myDIV']//div[@class='p-0 member-index-cell']")

    def process_item(self, item):
        dd_text = XPath(".//dd/text()").match(item)
        district = dd_text[2].strip().split()[1]
        party = dd_text[4].strip()
        return PersonDetail(
            dict(
                chamber="upper" if "senate" in self.source.url else "lower",
                district=district,
                party=party,
            ),
            source=str(XPath(".//dd/a[1]/@href").match_one(item)),
        )


house_members = PeopleWorkflow(
    PersonList(
        source="http://mgaleg.maryland.gov/mgawebsite/Members/Index/house"))
senate_members = PeopleWorkflow(
    PersonList(
        source="http://mgaleg.maryland.gov/mgawebsite/Members/Index/senate"))
Beispiel #7
0
        return SenateDetail(self.input)


class DelegateDetail(MemberDetail):
    role = "Delegate"
    chamber = "lower"

    def process_page(self):
        p = super().process_page()
        lis_id = get_lis_id(self.chamber, self.input.url)
        if lis_id:
            lis_id = "{}{:04d}".format(lis_id[0], int(lis_id[1:]))
            p.image = f"http://memdata.virginiageneralassembly.gov/images/display_image/{lis_id}"
        return p


class SenateList(MemberList):
    chamber = "upper"
    selector = XPath('//div[@class="lColRt"]/ul/li/a')
    next_page_cls = SenatePhotoDetail


class DelegateList(MemberList):
    chamber = "lower"
    selector = XPath('//div[@class="lColLt"]/ul/li/a')
    next_page_cls = DelegateDetail


senators = PeopleWorkflow(SenateList)
delegates = PeopleWorkflow(DelegateList)
Beispiel #8
0
            p.district_office.address = p.district_office.address.strip()

        # photos
        if not item["photos"]:
            pass
        elif len(item["photos"]) == 1:
            p.image = item["photos"][0]["url"].split("?")[
                0]  # strip off ?size=mpSm for full size
        else:
            raise Exception("unknown photos configuration: " +
                            str(item["photos"]))

        # extras
        p.extras["residence"] = item["residence"]
        p.extras["city"] = item["city"]
        p.extras["georgia_id"] = item["id"]
        if item["dateVacated"]:
            p.end_date = item["dateVacated"]

        url = (
            f"https://www.legis.ga.gov/members/{self.chamber_names[chamber_id]}/"
            f"{item['id']}?session={item['sessionId']}")
        p.add_source(url)
        p.add_link(url)

        return p


legislators = PeopleWorkflow(
    DirectoryListing(source="https://www.legis.ga.gov/api/members/list/1029"))
Beispiel #9
0
    district_css = CSS(".bDistrict h2")
    address_css = CSS(".bSenBio__address p")
    phone_css = CSS(".bSenBio__tel a")
    contact_link_sel = SimilarLink(
        r"https://oksenate.gov/contact-senator\?sid=")

    def process_page(self):
        for bio in CSS(".bSenBio__infoIt").match(self.root):
            if "Party:" in bio.text_content():
                party = bio.text_content().split(":")[1].strip()
        p = Person(
            name=self.name_css.match_one(self.root).text,
            state="ok",
            chamber="upper",
            party=party,
            image=self.image_css.match_one(self.root).get("href"),
            district=self.district_css.match_one(
                self.root).text.strip().split()[1],
        )
        p.capitol_office.address = self.address_css.match_one(self.root).text
        p.capitol_office.phone = self.phone_css.match_one(self.root).text
        p.add_link(
            self.contact_link_sel.match_one(self.root).get("href"),
            "Contact Form")

        return p


house_members = PeopleWorkflow(HouseList)
senate_members = PeopleWorkflow(SenateList)
Beispiel #10
0
        ]
        address = "\n".join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        # if validate_phone_number(phone_text):
        phone = phone_text

        email_text = item.xpath(".//a/@href")[1].replace("mailto:", "").strip()
        # if validate_email_address(email_text):
        email = email_text

        rep = Person(
            name=name,
            district=district,
            party=party,
            state="mn",
            chamber="lower",
            image=photo_url,
            email=email,
        )
        rep.add_link(url)
        rep.add_source(self.source.url)
        rep.capitol_office.address = address
        rep.capitol_office.phone = phone

        return rep


reps = PeopleWorkflow(RepList)
sens = PeopleWorkflow(SenList)
Beispiel #11
0
        except SelectorError:
            facebook = ""

        party = self.party_mapping[district][1]

        p = Person(
            state="ny",
            chamber="lower",
            image=image,
            party=party,
            district=district,
            name=name.text.strip(),
            email=email,
        )
        p.add_link(url=name.get("href"))
        p.add_source(url=name.get("href"))
        if twitter:
            p.ids["twitter"] = twitter
        if facebook:
            p.ids["facebook"] = facebook
        p.district_office.address = district_addr["address"]
        p.district_office.voice = district_addr["phone"]
        p.district_office.fax = district_addr["fax"]
        p.capitol_office.address = capitol_addr["address"]
        p.capitol_office.voice = capitol_addr["phone"]
        p.capitol_office.fax = capitol_addr["fax"]
        return p


assembly_members = PeopleWorkflow(AssemblyList)