Ejemplo n.º 1
0
        except SelectorError:
            facebook = ""

        party = self.party_mapping[district][1]

        p = Person(
            state="ny",
            chamber="lower",
            image=image,
            party=party,
            district=district,
            name=name.text.strip(),
            email=email,
        )
        p.add_link(url=name.get("href"))
        p.add_source(url=name.get("href"))
        if twitter:
            p.ids["twitter"] = twitter
        if facebook:
            p.ids["facebook"] = facebook
        p.district_office.address = district_addr["address"]
        p.district_office.voice = district_addr["phone"]
        p.district_office.fax = district_addr["fax"]
        p.capitol_office.address = capitol_addr["address"]
        p.capitol_office.voice = capitol_addr["phone"]
        p.capitol_office.fax = capitol_addr["fax"]
        return p


assembly_members = Workflow(AssemblyList())
Ejemplo n.º 2
0
    def process_item(self, item):
        website, district, name, party, office, phone, email = item.getchildren(
        )

        # skip header row
        if website.tag == "th":
            self.skip()

        office = office.text_content()
        for abbr, full in self.office_names.items():
            office = office.replace(abbr, full)

        p = Person(
            name=name.text_content(),
            state="mi",
            chamber="lower",
            district=district.text_content().lstrip("0"),
            party=party.text_content(),
            email=email.text_content(),
        )
        p.add_link(CSS("a").match_one(website).get("href"))
        p.add_source(self.source.url)
        p.capitol_office.voice = phone.text_content()
        p.capitol_office.address = office
        return p


senators = Workflow(SenList())
reps = Workflow(RepList())
Ejemplo n.º 3
0
        return p


class RepList(HtmlListPage):
    source = "https://www.myfloridahouse.gov/Representatives"
    # kind of wonky xpath to not get the partial term people at the bottom of the page
    selector = XPath("(//div[@class='team-page'])[1]//div[@class='team-box']")

    IMAGE_BASE = "https://www.myfloridahouse.gov/"

    def process_item(self, item):
        name = item.xpath("./a/div[@class='team-txt']/h5/text()")[0].strip()
        party = item.xpath(
            "./a/div[@class='team-txt']/p[1]/text()")[0].split()[0]
        district = item.xpath(
            "./a/div[@class='team-txt']/p[1]/span/text()")[0].split()[-1]
        image = self.IMAGE_BASE + item.xpath(".//img")[0].attrib["data-src"]
        link = str(item.xpath("./a/@href")[0])

        return PartialPerson(
            name=name,
            party=str(party),
            district=str(district),
            image=image,
            url=link,
        )


senators = Workflow(SenList(), SenDetail)
reps = Workflow(RepList(), RepContact)
Ejemplo n.º 4
0
        data = {
            "last_name": link.text_content(),
            "url": link.get("href"),
        }
        for key, label in self.LABELS.items():
            data[key] = CSS(f"[id$={label}]").match_one(item).text_content().strip()

        party = {"(D)": "Democratic", "(R)": "Republican"}[data["party"]]
        address = "Hawaii State Capitol, Room " + data["room"]
        chamber = "upper" if data["chamber"] == "S" else "lower"

        p = Person(
            name=data["first_name"] + " " + data["last_name"],
            state="hi",
            chamber=chamber,
            district=data["district"],
            given_name=data["first_name"],
            family_name=data["last_name"],
            party=party,
            email=data["email"],
        )
        p.capitol_office.address = address
        p.capitol_office.voice = data["voice"]
        p.capitol_office.fax = data["fax"]
        p.add_source(data["url"])
        p.add_link(data["url"])
        return p


all_legislators = Workflow(HawaiiLegislators())
Ejemplo n.º 5
0
        p = Person(
            chamber="legislature",
            party="Nonpartisan",
            state="ne",
            district=district,
            image=image,
            name=name,
            email=email,
        )
        p.capitol_office.address = "; ".join(address)
        p.capitol_office.voice = phone
        p.add_source(self.source.url)
        p.add_link(self.source.url)
        return p


class LegPageGenerator(ListPage):
    source = NullSource()
    """
    NE is an interesting test case for Spatula, since there are individual senator pages
    but no real index that's useful at all.  Right now this is using a dummy source page
    to spawn the 49 subpage scrapers.
    """
    def get_data(self):
        for n in range(1, 50):
            yield f"http://news.legislature.ne.gov/dist{n:02d}/"


legislators = Workflow(LegPageGenerator(), LegPage)
Ejemplo n.º 6
0
        return URL(self.input.url)

    def process_page(self):
        party = {"D": "Democratic", "R": "Republican"}[self.input.party]

        photo = CSS("img#ContentPlaceHolder1_imgPhoto1").match_one(
            self.root).get("src")

        p = Person(
            state="mo",
            party=party,
            image=photo,
            chamber="lower",
            district=self.input.district,
            name=f"{self.input.first_name} {self.input.last_name}",
            given_name=self.input.first_name,
            family_name=self.input.last_name,
        )
        # TODO
        # p.extras["hometown"] = self.input.hometown
        p.capitol_office.voice = self.input.voice
        p.capitol_office.address = (
            "MO House of Representatives; 201 West Capitol Avenue; "
            f"Room {self.input.room}; Jefferson City MO 65101 ")
        p.add_link(self.input.url)
        p.add_source(self.input.url)
        return p


house_members = Workflow(HouseList(), HouseDetail)
Ejemplo n.º 7
0
    contact_link_sel = SimilarLink(
        r"https://oksenate.gov/contact-senator\?sid=")

    def get_source_from_input(self):
        return self.input["url"]

    def get_data(self):
        for bio in CSS(".bSenBio__infoIt").match(self.root):
            if "Party:" in bio.text_content():
                party = bio.text_content().split(":")[1].strip()
        p = Person(
            name=self.name_css.match_one(self.root).text,
            state="ok",
            chamber="upper",
            party=party,
            image=self.image_css.match_one(self.root).get("href"),
            district=self.district_css.match_one(
                self.root).text.strip().split()[1],
        )
        p.capitol_office.address = self.address_css.match_one(self.root).text
        p.capitol_office.phone = self.phone_css.match_one(self.root).text
        p.add_link(
            self.contact_link_sel.match_one(self.root).get("href"),
            "Contact Form")

        return p


house_members = Workflow(HouseList(), HouseDetail)
senate_members = Workflow(SenateList(), SenateDetail)
Ejemplo n.º 8
0
        ]
        address = "\n".join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        # if validate_phone_number(phone_text):
        phone = phone_text

        email_text = item.xpath(".//a/@href")[1].replace("mailto:", "").strip()
        # if validate_email_address(email_text):
        email = email_text

        rep = Person(
            name=name,
            district=district,
            party=party,
            state="mn",
            chamber="lower",
            image=photo_url,
            email=email,
        )
        rep.add_link(url)
        rep.add_source(self.source.url)
        rep.capitol_office.address = address
        rep.capitol_office.phone = phone

        return rep


reps = Workflow(RepList)
sens = Workflow(SenList)
Ejemplo n.º 9
0
            name=name,
            family_name=last,
            given_name=first,
            state="sd",
            district=item["District"].lstrip("0"),
            chamber="upper" if item["MemberType"] == "S" else "lower",
            party=item["Politics"],
            email=item["EmailState"],
            image=
            "https://lawmakerdocuments.blob.core.usgovcloudapi.net/photos/" +
            item["Picture"].lower(),
        )

        address = item["HomeAddress1"]
        if item["HomeAddress2"]:
            address += "; " + item["HomeAddress2"]
        address += f"{item['HomeCity']}, {item['HomeState']} {item['HomeZip']}"

        p.district_office.address = address
        p.district_office.voice = item["HomePhone"]
        p.capitol_office.voice = item["CapitolPhone"]
        p.extras["occupation"] = item["Occupation"]

        url = f"https://sdlegislature.gov/Legislators/Profile/{item['SessionMemberId']}/Detail"
        p.add_link(url)
        p.add_source(url)
        return p


legislators = Workflow(DirectoryListing())
Ejemplo n.º 10
0
            email=email,
        )
        p.add_link(self.source.url)
        p.add_source(self.source.url)
        return p


class PersonList(HtmlListPage):
    selector = XPath("//div[@id='myDIV']//div[@class='p-0 member-index-cell']")

    def process_item(self, item):
        dd_text = XPath(".//dd/text()").match(item)
        district = dd_text[2].strip().split()[1]
        party = dd_text[4].strip()
        return dict(
            chamber="upper" if "senate" in self.source.url else "lower",
            district=district,
            party=party,
            url=XPath(".//dd/a[1]/@href").match_one(item),
        )


house_members = Workflow(
    PersonList(
        source="http://mgaleg.maryland.gov/mgawebsite/Members/Index/house"),
    PersonDetail)
senate_members = Workflow(
    PersonList(
        source="http://mgaleg.maryland.gov/mgawebsite/Members/Index/senate"),
    PersonDetail)
Ejemplo n.º 11
0
        person = Person(
            name="{FirstName} {LastName}".format(**item_dict),
            given_name=item_dict["FirstName"],
            family_name=item_dict["LastName"],
            state="ak",
            party=item_dict["Party"],
            chamber=("upper" if chamber == "S" else "lower"),
            district=item_dict["District"],
            image=f"http://akleg.gov/images/legislators/{code}.jpg",
            email=item_dict["EMail"],
        )
        person.add_link(
            "http://www.akleg.gov/basis/Member/Detail/{}?code={}".format(self.session_num, code)
        )
        person.add_source("http://w3.akleg.gov/")

        if item_dict["Phone"]:
            phone = "907-" + item_dict["Phone"][0:3] + "-" + item_dict["Phone"][3:]
            person.capitol_office.voice = phone

        if item_dict["Building"] == "CAPITOL":
            person.capitol_office.address = "State Capitol Room {}; Juneau AK, 99801".format(
                item_dict["Room"]
            )

        return person


legislators = Workflow(Legislators())
Ejemplo n.º 12
0
                p.district_office.address += "; " + da["address2"]
            p.district_office.address += "; {city}, {state} {zip}".format(**da)
            p.district_office.address = p.district_office.address.strip()

        # photos
        if not item["photos"]:
            pass
        elif len(item["photos"]) == 1:
            p.image = item["photos"][0]["url"].split("?")[0]  # strip off ?size=mpSm for full size
        else:
            raise Exception("unknown photos configuration: " + str(item["photos"]))

        # extras
        p.extras["residence"] = item["residence"]
        p.extras["city"] = item["city"]
        p.extras["georgia_id"] = item["id"]
        if item["dateVacated"]:
            p.end_date = item["dateVacated"]

        url = (
            f"https://www.legis.ga.gov/members/{self.chamber_names[chamber_id]}/"
            f"{item['id']}?session={item['sessionId']}"
        )
        p.add_source(url)
        p.add_link(url)

        return p


legislators = Workflow(DirectoryListing(source="https://www.legis.ga.gov/api/members/list/1029"))
Ejemplo n.º 13
0
    input_type = PartialMember

    def get_source_from_input(self):
        lis_id = get_lis_id("upper", self.input.url)
        return f"http://apps.senate.virginia.gov/Senator/memberpage.php?id={lis_id}"

    def process_page(self):
        src = self.root.xpath('.//img[@class="profile_pic"]/@src')
        img = src[0] if src else None
        if img and img.startswith("//"):
            img = "https:" + img
        self.input.image = img
        return self.input


class DelegateDetail(MemberDetail):
    role = "Delegate"
    chamber = "lower"

    def process_page(self):
        p = super().process_page()
        lis_id = get_lis_id(self.chamber, self.input.url)
        if lis_id:
            lis_id = "{}{:04d}".format(lis_id[0], int(lis_id[1:]))
            p.image = f"http://memdata.virginiageneralassembly.gov/images/display_image/{lis_id}"
        return p


senators = Workflow(SenateList(), (SenatePhotoDetail, SenateDetail))
delegates = Workflow(DelegateList(), DelegateDetail)