Python Bill.title Examples

Programming Language: Python

Namespace/Package Name: openstates.scrape

Class/Type: Bill

Method/Function: title

Examples at hotexamples.com: 3

Python Bill.title - 3 examples found. These are the top rated real world Python examples of openstates.scrape.Bill.title extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Bill(30)

add_action(30)

add_version_link(30)

add_source(30)

add_sponsorship(30)

add_document_link(25)

as_dict(23)

subject(21)

add_abstract(17)

add_subject(16)

add_title(11)

add_related_bill(5)

title(3)

add_sponsorship_by_identifier(2)

add_identifier(2)

extras(2)

actions(1)

add_companion(1)

classification(1)

add_citation(1)

add_sponsor(1)

Example #1

Show file

File: bills.py Project: recipefordisaster/openstates-scrapers

    def scrape_bill_type(
            self,
            chamber,
            session,
            bill_type,
            type_abbr,
            committee_abbr_regex=get_committee_name_regex(),
    ):
        bills = (self.session.query(CABill).filter_by(
            session_year=session).filter_by(measure_type=type_abbr))

        archive_year = int(session[0:4])
        not_archive_year = archive_year >= 2009

        for bill in bills:
            bill_session = session
            if bill.session_num != "0":
                bill_session += " Special Session %s" % bill.session_num

            bill_id = bill.short_bill_id
            if bill_id.strip() == "SB77" and session == "20052006":
                continue

            fsbill = Bill(bill_id, bill_session, title="", chamber=chamber)
            if (bill_id.startswith("S")
                    and chamber == "lower") or (bill_id.startswith("A")
                                                and chamber == "upper"):
                print("!!!! BAD ID/CHAMBER PAIR !!!!", bill)
                continue

            # Construct a fake source url
            source_url = ("http://leginfo.legislature.ca.gov/faces/"
                          "billNavClient.xhtml?bill_id=%s") % bill.bill_id

            fsbill.add_source(source_url)
            fsbill.add_version_link(bill_id,
                                    source_url,
                                    media_type="text/html")

            title = ""
            type_ = ["bill"]
            subject = ""
            all_titles = set()
            summary = ""

            # Get digest test (aka "summary") from latest version.
            if bill.versions and not_archive_year:
                version = bill.versions[-1]
                nsmap = version.xml.nsmap
                xpath = "//caml:DigestText/xhtml:p"
                els = version.xml.xpath(xpath, namespaces=nsmap)
                chunks = []
                for el in els:
                    t = etree_text_content(el)
                    t = re.sub(r"\s+", " ", t)
                    t = re.sub(r"\)(\S)", lambda m: ") %s" % m.group(1), t)
                    chunks.append(t)
                summary = "\n\n".join(chunks)

            for version in bill.versions:
                if not version.bill_xml:
                    continue

                version_date = self._tz.localize(
                    version.bill_version_action_date)

                # create a version name to match the state's format
                # 02/06/17 - Enrolled
                version_date_human = version_date.strftime("%m/%d/%y")
                version_name = "{} - {}".format(version_date_human,
                                                version.bill_version_action)

                version_base = "https://leginfo.legislature.ca.gov/faces"

                version_url_pdf = "{}/billPdf.xhtml?bill_id={}&version={}".format(
                    version_base, version.bill_id, version.bill_version_id)

                fsbill.add_version_link(
                    version_name,
                    version_url_pdf,
                    media_type="application/pdf",
                    date=version_date.date(),
                )

                # CA is inconsistent in that some bills have a short title
                # that is longer, more descriptive than title.
                if bill.measure_type in ("AB", "SB"):
                    impact_clause = clean_title(version.title)
                    title = clean_title(version.short_title)
                else:
                    impact_clause = None
                    if len(version.title) < len(
                            version.short_title) and not version.title.lower(
                            ).startswith("an act"):
                        title = clean_title(version.short_title)
                    else:
                        title = clean_title(version.title)

                if title:
                    all_titles.add(title)

                type_ = [bill_type]

                if version.appropriation == "Yes":
                    type_.append("appropriation")

                tags = []
                if version.fiscal_committee == "Yes":
                    tags.append("fiscal committee")
                if version.local_program == "Yes":
                    tags.append("local program")
                if version.urgency == "Yes":
                    tags.append("urgency")
                if version.taxlevy == "Yes":
                    tags.append("tax levy")

                if version.subject:
                    subject = clean_title(version.subject)

            if not title:
                self.warning("Couldn't find title for %s, skipping" % bill_id)
                continue

            fsbill.title = title
            if summary:
                fsbill.add_abstract(summary, note="summary")
            fsbill.classification = type_
            fsbill.subject = [subject] if subject else []
            fsbill.extras["impact_clause"] = impact_clause
            fsbill.extras["tags"] = tags

            # We don't want the current title in alternate_titles
            all_titles.remove(title)

            for title in all_titles:
                fsbill.add_title(title)

            for author in version.authors:
                fsbill.add_sponsorship(
                    author.name,
                    classification=SPONSOR_TYPES[author.contribution],
                    primary=author.primary_author_flg == "Y",
                    entity_type="person",
                )
                # fsbill.sponsorships[-1]['extras'] = {'official_type': author.contribution}

            seen_actions = set()
            for action in bill.actions:
                if not action.action:
                    # NULL action text seems to be an error on CA's part,
                    # unless it has some meaning I'm missing
                    continue
                actor = action.actor or chamber
                actor = actor.strip()
                match = re.match(r"(Assembly|Senate)($| \(Floor)", actor)
                if match:
                    actor = {
                        "Assembly": "lower",
                        "Senate": "upper"
                    }[match.group(1)]
                elif actor.startswith("Governor"):
                    actor = "executive"
                else:

                    def replacer(matchobj):
                        if matchobj:
                            return {
                                "Assembly": "lower",
                                "Senate": "upper"
                            }[matchobj.group()]
                        else:
                            return matchobj.group()

                    actor = re.sub(r"^(Assembly|Senate)", replacer, actor)

                type_ = []

                act_str = action.action
                act_str = re.sub(r"\s+", " ", act_str)

                attrs = self.categorizer.categorize(act_str)

                # Add in the committee strings of the related committees, if any.
                kwargs = attrs
                matched_abbrs = committee_abbr_regex.findall(action.action)

                if re.search(r"Com[s]?. on",
                             action.action) and not matched_abbrs:
                    msg = "Failed to extract committee abbr from %r."
                    self.logger.warning(msg % action.action)

                if matched_abbrs:
                    committees = []
                    for abbr in matched_abbrs:
                        try:
                            name = self.committee_abbr_to_name(chamber, abbr)
                            committees.append(name)
                        except KeyError:
                            msg = ("Mapping contains no committee name for "
                                   "abbreviation %r. Action text was %r.")
                            args = (abbr, action.action)
                            self.warning(msg % args)

                    committees = filter(None, committees)
                    kwargs["committees"] = committees

                    code = re.search(r"C[SXZ]\d+", actor)
                    if code is not None:
                        code = code.group()
                        kwargs["actor_info"] = {"committee_code": code}
                    if not_archive_year:
                        assert len(list(committees)) == len(matched_abbrs)
                    for committee, abbr in zip(committees, matched_abbrs):
                        act_str = act_str.replace("Coms. on ", "")
                        act_str = act_str.replace("Com. on " + abbr, committee)
                        act_str = act_str.replace(abbr, committee)
                        if not act_str.endswith("."):
                            act_str = act_str + "."

                # Determine which chamber the action originated from.
                changed = False
                for committee_chamber in ["upper", "lower", "legislature"]:
                    if actor.startswith(committee_chamber):
                        actor = committee_chamber
                        changed = True
                        break
                if not changed:
                    actor = "legislature"

                if actor != action.actor:
                    actor_info = kwargs.get("actor_info", {})
                    actor_info["details"] = action.actor
                    kwargs["actor_info"] = actor_info

                # Add strings for related legislators, if any.
                rgx = r"(?:senator|assembly[mwp][^ .,:;]+)\s+[^ .,:;]+"
                legislators = re.findall(rgx, action.action, re.I)
                if legislators:
                    kwargs["legislators"] = legislators

                date = action.action_date
                date = self._tz.localize(date)
                date = date.date()
                if (actor, act_str, date) in seen_actions:
                    continue

                kwargs.update(self.categorizer.categorize(act_str))

                action = fsbill.add_action(
                    act_str,
                    date.strftime("%Y-%m-%d"),
                    chamber=actor,
                    classification=kwargs["classification"],
                )
                for committee in kwargs.get("committees", []):
                    action.add_related_entity(committee,
                                              entity_type="organization")
                seen_actions.add((actor, act_str, date))

            source_url = (
                "http://leginfo.legislature.ca.gov/faces/billVotesClient.xhtml?"
            )
            source_url += f"bill_id={session}{bill.session_num}{fsbill.identifier}"

            # Votes for non archived years
            if archive_year > 2009:
                for vote_num, vote in enumerate(bill.votes):
                    if vote.vote_result == "(PASS)":
                        result = True
                    else:
                        result = False

                    if not vote.location:
                        continue

                    full_loc = vote.location.description
                    first_part = full_loc.split(" ")[0].lower()
                    if first_part in ["asm", "assembly"]:
                        vote_chamber = "lower"
                        # vote_location = ' '.join(full_loc.split(' ')[1:])
                    elif first_part.startswith("sen"):
                        vote_chamber = "upper"
                        # vote_location = ' '.join(full_loc.split(' ')[1:])
                    else:
                        # raise ScrapeError("Bad location: %s" % full_loc) # To uncomment
                        continue

                    if vote.motion:
                        motion = vote.motion.motion_text or ""
                    else:
                        motion = ""

                    if "Third Reading" in motion or "3rd Reading" in motion:
                        vtype = "passage"
                    elif "Do Pass" in motion:
                        vtype = "passage"
                    else:
                        vtype = "other"

                    motion = motion.strip()
                    motion = re.compile(r"(\w+)( Extraordinary)? Session$",
                                        re.IGNORECASE).sub("", motion)
                    motion = re.compile(r"^(Senate|Assembly) ",
                                        re.IGNORECASE).sub("", motion)
                    motion = re.sub(r"^(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.?  ",
                                    "", motion)
                    motion = re.sub(r" \(\w+\)$", "", motion)
                    motion = re.sub(r"(SCR|SB|AB|AJR|ACR)\s?\d+ \w+\.?$", "",
                                    motion)
                    motion = re.sub(
                        r"(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? "
                        r"Urgency Clause$",
                        "(Urgency Clause)",
                        motion,
                    )
                    motion = re.sub(r"\s+", " ", motion)

                    if not motion:
                        self.warning("Got blank motion on vote for %s" %
                                     bill_id)
                        continue

                    # XXX this is responsible for all the CA 'committee' votes, not
                    # sure if that's a feature or bug, so I'm leaving it as is...
                    # vote_classification = chamber if (vote_location == 'Floor') else 'committee'
                    # org = {
                    # 'name': vote_location,
                    # 'classification': vote_classification
                    # }

                    fsvote = VoteEvent(
                        motion_text=motion,
                        start_date=self._tz.localize(vote.vote_date_time),
                        result="pass" if result else "fail",
                        classification=vtype,
                        # organization=org,
                        chamber=vote_chamber,
                        bill=fsbill,
                    )
                    fsvote.extras = {"threshold": vote.threshold}

                    fsvote.add_source(source_url)
                    fsvote.pupa_id = source_url + "#" + str(vote_num)

                    rc = {"yes": [], "no": [], "other": []}
                    for record in vote.votes:
                        if record.vote_code == "AYE":
                            rc["yes"].append(record.legislator_name)
                        elif record.vote_code.startswith("NO"):
                            rc["no"].append(record.legislator_name)
                        else:
                            rc["other"].append(record.legislator_name)

                    # Handle duplicate votes
                    for key in rc.keys():
                        rc[key] = list(set(rc[key]))

                    for key, voters in rc.items():
                        for voter in voters:
                            fsvote.vote(key, voter)
                        # Set counts by summed votes for accuracy
                        fsvote.set_count(key, len(voters))

                    yield fsvote
            if len(bill.votes) > 0 and archive_year <= 2009:
                vote_page_url = (
                    "http://leginfo.legislature.ca.gov/faces/billVotesClient.xhtml?"
                )
                vote_page_url += (
                    f"bill_id={session}{bill.session_num}{fsbill.identifier}")

                # parse the bill data page, finding the latest html text
                data = self.get(vote_page_url).content
                doc = html.fromstring(data)
                doc.make_links_absolute(vote_page_url)
                num_of_votes = len(doc.xpath("//div[@class='status']"))
                for vote_section in range(1, num_of_votes + 1):
                    lines = doc.xpath(
                        f"//div[@class='status'][{vote_section}]//div[@class='statusRow']"
                    )
                    date, result, motion, vtype, location = "", "", "", "", ""
                    votes = {}
                    for line in lines:
                        line = line.text_content().split()
                        if line[0] == "Date":
                            date = line[1]
                            date = datetime.datetime.strptime(date, "%m/%d/%y")
                            date = self._tz.localize(date)
                        elif line[0] == "Result":
                            result = "pass" if "PASS" in line[1] else "fail"
                        elif line[0] == "Motion":
                            motion = " ".join(line[1:])
                        elif line[0] == "Location":
                            location = " ".join(line[1:])
                        elif len(line) > 1:
                            if line[0] == "Ayes" and line[1] != "Count":
                                votes["yes"] = line[1:]
                            elif line[0] == "Noes" and line[1] != "Count":
                                votes["no"] = line[1:]
                            elif line[0] == "NVR" and line[1] != "Count":
                                votes["not voting"] = line[1:]
                    # Determine chamber based on location
                    first_part = location.split(" ")[0].lower()
                    vote_chamber = ""
                    if first_part in ["asm", "assembly"]:
                        vote_chamber = "lower"
                    elif first_part.startswith("sen"):
                        vote_chamber = "upper"

                    if "Third Reading" in motion or "3rd Reading" in motion:
                        vtype = "passage"
                    elif "Do Pass" in motion:
                        vtype = "passage"
                    else:
                        vtype = "other"
                    if len(motion) > 0:
                        fsvote = VoteEvent(
                            motion_text=motion,
                            start_date=date,
                            result=result,
                            classification=vtype,
                            chamber=vote_chamber,
                            bill=fsbill,
                        )
                        fsvote.add_source(vote_page_url)
                        fsvote.pupa_id = vote_page_url + "#" + str(
                            vote_section)

                        for how_voted, voters in votes.items():
                            for voter in voters:
                                voter = voter.replace(",", "")
                                fsvote.vote(how_voted, voter)
                        yield fsvote

            yield fsbill
            self.session.expire_all()

Example #2

Show file

    def scrape_bill_list(self, url):
        bill_list = self._get_bill_list(url)

        for bill_info in bill_list:

            (bill_id, ) = bill_info.xpath("td[1]/font/input/@value")
            (sponsor, ) = bill_info.xpath("td[2]/font/input/@value")
            (subject, ) = bill_info.xpath("td[3]//text()")
            subject = subject.strip()
            chamber = self.CHAMBERS[bill_id[0]]

            if "B" in bill_id:
                bill_type = "bill"
            elif "JR" in bill_id:
                bill_type = "joint resolution"
            elif "R" in bill_id:
                bill_type = "resolution"
            else:
                raise AssertionError(
                    "Unknown bill type for bill '{}'".format(bill_id))

            bill = Bill(
                bill_id,
                legislative_session=self.session,
                chamber=chamber,
                title="",
                classification=bill_type,
            )
            if subject:
                bill.subject = [subject]
            if sponsor:
                bill.add_sponsorship(
                    name=sponsor,
                    entity_type="person",
                    classification="primary",
                    primary=True,
                )
            bill.add_source(url)

            bill_url = ("http://alisondb.legislature.state.al.us/Alison/"
                        "SESSBillStatusResult.aspx?BILL={}".format(bill_id))
            bill.add_source(bill_url)

            bill_html = self._get_bill_response(bill_url)
            if bill_html is None:
                self.warning(
                    "Bill {} has no webpage, and will be skipped".format(
                        bill_id))
                continue
            bill_doc = lxml.html.fromstring(bill_html)

            if bill_doc.xpath(
                    '//span[@id="ContentPlaceHolder1_lblShotTitle"]'):
                title = (bill_doc.xpath(
                    '//span[@id="ContentPlaceHolder1_lblShotTitle"]')
                         [0].text_content().strip())
            if not title:
                title = "[No title given by state]"
            bill.title = title
            session = "2022FS" if self.session == "2022s1" else self.session

            version_url_base = (
                "http://alisondb.legislature.state.al.us/ALISON/"
                "SearchableInstruments/{0}/PrintFiles/{1}-".format(
                    session, bill_id))
            versions = bill_doc.xpath(
                '//table[@class="box_versions"]/tr/td[2]/font/text()')
            for version in versions:
                name = version
                if version == "Introduced":
                    version_url = version_url_base + "int.pdf"
                elif version == "Engrossed":
                    version_url = version_url_base + "eng.pdf"
                elif version == "Enrolled":
                    version_url = version_url_base + "enr.pdf"
                else:
                    raise NotImplementedError(
                        "Unknown version type found: '{}'".format(name))

                bill.add_version_link(
                    name,
                    version_url,
                    media_type="application/pdf",
                    on_duplicate="ignore",
                )

            # Fiscal notes exist, but I can't figure out how to build their URL
            fiscal_notes = bill_doc.xpath(
                '//table[@class="box_fiscalnote"]')[1:]
            for fiscal_note in fiscal_notes:
                pass

            # Budget Isolation Resolutions are handled as extra actions/votes
            birs = bill_doc.xpath(
                '//div[@class="box_bir"]//table//table/tr')[1:]
            for bir in birs:
                bir_action = bir.xpath("td[1]")[0].text_content().strip()
                # Sometimes ALISON's database puts another bill's
                # actions into the BIR action list; ignore these
                if bill_id not in bir_action:
                    self.warning(
                        "BIR action found ({}) ".format(bir_action) +
                        "that doesn't match the bill ID ({})".format(bill_id))
                    continue

                bir_date = datetime.datetime.strptime(
                    bir.xpath("td[2]/font/text()")[0], self.DATE_FORMAT)
                bir_type = bir.xpath("td[1]/font/text()")[0].split(" ")[0]
                bir_chamber = self.CHAMBERS[bir_type[0]]
                bir_text = "{0}: {1}".format(
                    bir_type,
                    bir.xpath("td[3]/font/text()")[0].strip())

                bill.add_action(
                    bir_text,
                    TIMEZONE.localize(bir_date),
                    chamber=bir_chamber,
                    classification="other",
                )

                try:
                    (bir_vote_id, ) = bir.xpath("td[4]/font/input/@value")
                except ValueError:
                    bir_vote_id = ""

                bir_vote_id = bir_vote_id.strip()
                if bir_vote_id.startswith("Roll "):
                    bir_vote_id = bir_vote_id.split(" ")[-1]

                    yield from self.scrape_vote(
                        bill=bill,
                        vote_chamber=bir_type[0],
                        bill_id="{0}%20for%20{1}".format(bir_type, bill_id),
                        vote_id=bir_vote_id,
                        vote_date=TIMEZONE.localize(bir_date),
                        action_text=bir_text,
                    )

            actions = bill_doc.xpath(
                '//table[@id="ContentPlaceHolder1_gvHistory"]/tr')[1:]
            action_date = None
            for action in actions:
                # If actions occur on the same day, only one date will exist
                if (action.xpath("td[1]/font/text()")[0].encode(
                        "ascii", "ignore").strip()):
                    action_date = datetime.datetime.strptime(
                        action.xpath("td[1]/font/text()")[0], self.DATE_FORMAT)

                (action_chamber, ) = action.xpath("td[2]/font/text()")

                possible_amendment = action.xpath("td[3]/font/u/text()")
                if (len(possible_amendment) > 0
                        and not possible_amendment[0].strip() == ""):
                    (amendment, ) = possible_amendment
                else:
                    amendment = None

                (action_text, ) = action.xpath("td[4]/font/text()")

                action_type = _categorize_action(action_text)

                # check for occasional extra last row
                if not action_chamber.strip():
                    continue

                # The committee cell is just an abbreviation, so get its name
                actor = self.CHAMBERS[action_chamber]
                try:
                    action_committee = (re.search(
                        r".*? referred to the .*? committee on (.*?)$",
                        action_text).group(1).strip())
                except AttributeError:
                    action_committee = ""

                if action_date is not None and action_text.strip():
                    act = bill.add_action(
                        action_text,
                        TIMEZONE.localize(action_date),
                        chamber=actor,
                        classification=action_type,
                    )
                    if action_committee:
                        act.add_related_entity(action_committee,
                                               entity_type="organization")

                    try:
                        vote_button = action.xpath("td[9]//text()")[0].strip()
                    except IndexError:
                        vote_button = ""

                    if vote_button.startswith("Roll "):
                        vote_id = vote_button.split(" ")[-1]

                        yield from self.scrape_vote(
                            bill=bill,
                            vote_chamber=action_chamber,
                            bill_id=bill_id,
                            vote_id=vote_id,
                            vote_date=TIMEZONE.localize(action_date),
                            action_text=action_text,
                        )

                if amendment:
                    session = "2021FS" if self.session == "2021s1" else self.session
                    amend_url = (
                        "http://alisondb.legislature.state.al.us/ALISON/"
                        "SearchableInstruments/{0}/PrintFiles/{1}.pdf".format(
                            session, amendment))

                    amend_name = "Amd/Sub {}".format(amendment)

                    bill.add_version_link(
                        amend_name,
                        amend_url,
                        media_type="application/pdf",
                        on_duplicate="ignore",
                    )

            yield bill

Example #3

Show file

File: bills.py Project: jessemortenson/openstates

    def scrape(self, session=None, chambers=None):
        # Bills endpoint can sometimes take a very long time to load
        self.timeout = 300

        if not session:
            session = self.latest_session()
            self.info("no session, using %s", session)

        if int(session) < 128:
            raise AssertionError("No data for period {}".format(session))

        elif int(session) < 131:
            # they changed their data format starting in 131st and added
            # an undocumented API
            yield from self.old_scrape(session)

        else:
            chamber_dict = {
                "Senate": "upper",
                "House": "lower",
                "House of Representatives": "lower",
                "house": "lower",
                "senate": "upper",
            }

            # so presumably not everything passes, but we haven't
            # seen anything not pass yet, so we'll need to wait
            # till it fails and get the right language in here
            vote_results = {
                "approved": True,
                "passed": True,
                "adopted": True,
                "true": True,
                "false": False,
                "failed": False,
                True: True,
                False: False,
            }

            action_dict = {
                "ref_ctte_100": "referral-committee",
                "intro_100": "introduction",
                "intro_101": "introduction",
                "pass_300": "passage",
                "intro_110": "reading-1",
                "refer_210": "referral-committee",
                "crpt_301": None,
                "crpt_317": None,
                "concur_606": "passage",
                "pass_301": "passage",
                "refer_220": "referral-committee",
                "intro_102": ["introduction", "passage"],
                "intro_105": ["introduction", "passage"],
                "intro_ref_ctte_100": "referral-committee",
                "refer_209": None,
                "intro_108": ["introduction", "passage"],
                "intro_103": ["introduction", "passage"],
                "msg_reso_503": "passage",
                "intro_107": ["introduction", "passage"],
                "imm_consid_360": "passage",
                "refer_213": None,
                "adopt_reso_100": "passage",
                "adopt_reso_110": "passage",
                "msg_507": "amendment-passage",
                "confer_713": None,
                "concur_603": None,
                "confer_712": None,
                "msg_506": "amendment-failure",
                "receive_message_100": "passage",
                "motion_920": None,
                "concur_611": None,
                "confer_735": None,
                "third_429": None,
                "final_501": None,
                "concur_608": None,
                "infpass_217": "passage",
            }

            base_url = "https://search-prod.lis.state.oh.us"
            first_page = base_url
            first_page += "/solarapi/v1/general_assembly_{session}/".format(
                session=session)
            legislators = self.get_legislator_ids(first_page)
            all_amendments = self.get_other_data_source(
                first_page, base_url, "amendments")
            all_fiscals = self.get_other_data_source(first_page, base_url,
                                                     "fiscals")
            all_synopsis = self.get_other_data_source(first_page, base_url,
                                                      "synopsiss")
            all_analysis = self.get_other_data_source(first_page, base_url,
                                                      "analysiss")

            for row in self.get_bill_rows(session):
                (
                    spacer,
                    number_link,
                    _ga,
                    title,
                    primary_sponsor,
                    status,
                    spacer,
                ) = row.xpath("td")

                # S.R.No.1 -> SR1
                bill_id = number_link.text_content().replace("No.", "")
                bill_id = bill_id.replace(".", "").replace(" ", "")
                # put one space back in between type and number
                bill_id = re.sub(r"([a-zA-Z]+)(\d+)", r"\1 \2", bill_id)

                title = title.text_content().strip()
                title = re.sub(r"^Title", "", title)

                chamber = "lower" if "H" in bill_id else "upper"
                classification = "bill" if "B" in bill_id else "resolution"

                if not title and session == "134" and bill_id == "HR 35":
                    # Exception for HR 35 which is a real bill
                    title = "No title provided"
                elif not title:
                    self.warning(f"no title for {bill_id}, skipping")
                    continue
                bill = Bill(
                    bill_id,
                    legislative_session=session,
                    chamber=chamber,
                    title=title,
                    classification=classification,
                )
                bill.add_source(number_link.xpath("a/@href")[0])

                if (session, bill_id) in BAD_BILLS:
                    self.logger.warning(
                        f"Skipping details for known bad bill {bill_id}")
                    yield bill
                    continue

                # get bill from API
                bill_api_url = (
                    "https://search-prod.lis.state.oh.us/solarapi/v1/"
                    "general_assembly_{}/{}/{}/".format(
                        session,
                        "bills" if "B" in bill_id else "resolutions",
                        bill_id.lower().replace(" ", ""),
                    ))
                data = self.get(bill_api_url, verify=False).json()
                if len(data["items"]) == 0:
                    self.logger.warning(
                        "Data for bill {bill_id} has empty 'items' array,"
                        " cannot process related information".format(
                            bill_id=bill_id.lower().replace(" ", "")))
                    yield bill
                    continue

                # add title if no short title
                if not bill.title:
                    bill.title = data["items"][0]["longtitle"]
                bill.add_title(data["items"][0]["longtitle"], "long title")

                # this stuff is version-specific
                for version in data["items"]:
                    version_name = version["version"]
                    version_link = base_url + version["pdfDownloadLink"]
                    bill.add_version_link(version_name,
                                          version_link,
                                          media_type="application/pdf")

                # we'll use latest bill_version for everything else
                bill_version = data["items"][0]
                bill.add_source(bill_api_url)

                # subjects
                for subj in bill_version["subjectindexes"]:
                    try:
                        bill.add_subject(subj["primary"])
                    except KeyError:
                        pass
                    try:
                        secondary_subj = subj["secondary"]
                    except KeyError:
                        secondary_subj = ""
                    if secondary_subj:
                        bill.add_subject(secondary_subj)

                # sponsors
                sponsors = bill_version["sponsors"]
                for sponsor in sponsors:
                    sponsor_name = self.get_sponsor_name(sponsor)
                    bill.add_sponsorship(
                        sponsor_name,
                        classification="primary",
                        entity_type="person",
                        primary=True,
                    )

                cosponsors = bill_version["cosponsors"]
                for sponsor in cosponsors:
                    sponsor_name = self.get_sponsor_name(sponsor)
                    bill.add_sponsorship(
                        sponsor_name,
                        classification="cosponsor",
                        entity_type="person",
                        primary=False,
                    )

                try:
                    action_doc = self.get(base_url +
                                          bill_version["action"][0]["link"])
                except scrapelib.HTTPError:
                    pass
                else:

                    actions = action_doc.json()
                    for action in reversed(actions["items"]):
                        actor = chamber_dict[action["chamber"]]
                        action_desc = action["description"]
                        try:
                            action_type = action_dict[action["actioncode"]]
                        except KeyError:
                            self.warning(
                                "Unknown action {desc} with code {code}."
                                " Add it to the action_dict"
                                ".".format(desc=action_desc,
                                           code=action["actioncode"]))
                            action_type = None

                        date = self._tz.localize(
                            datetime.datetime.strptime(action["datetime"],
                                                       "%Y-%m-%dT%H:%M:%S"))
                        date = "{:%Y-%m-%d}".format(date)

                        bill.add_action(action_desc,
                                        date,
                                        chamber=actor,
                                        classification=action_type)

                # attach documents gathered earlier
                self.add_document(all_amendments, bill_id, "amendment", bill,
                                  base_url)
                self.add_document(all_fiscals, bill_id, "fiscal", bill,
                                  base_url)
                self.add_document(all_synopsis, bill_id, "synopsis", bill,
                                  base_url)
                self.add_document(all_analysis, bill_id, "analysis", bill,
                                  base_url)

                # votes
                vote_url = base_url + bill_version["votes"][0]["link"]
                try:
                    vote_doc = self.get(vote_url)
                except scrapelib.HTTPError:
                    self.warning(
                        "Vote page not loading; skipping: {}".format(vote_url))
                    yield bill
                    continue
                votes = vote_doc.json()
                yield from self.process_vote(
                    votes,
                    vote_url,
                    base_url,
                    bill,
                    legislators,
                    chamber_dict,
                    vote_results,
                )

                vote_url = base_url
                vote_url += bill_version["cmtevotes"][0]["link"]
                try:
                    vote_doc = self.get(vote_url)
                except scrapelib.HTTPError:
                    self.warning(
                        "Vote page not loading; skipping: {}".format(vote_url))
                    yield bill
                    continue
                votes = vote_doc.json()
                yield from self.process_vote(
                    votes,
                    vote_url,
                    base_url,
                    bill,
                    legislators,
                    chamber_dict,
                    vote_results,
                )

                if data["items"][0]["effective_date"]:
                    effective_date = datetime.datetime.strptime(
                        data["items"][0]["effective_date"], "%Y-%m-%d")
                    effective_date = self._tz.localize(effective_date)
                    # the OH website adds an action that isn't in the action list JSON.
                    # It looks like:
                    # Effective 7/6/18
                    effective_date_oh = "{:%-m/%-d/%y}".format(effective_date)
                    effective_action = "Effective {}".format(effective_date_oh)
                    bill.add_action(
                        effective_action,
                        effective_date,
                        chamber="executive",
                        classification=["became-law"],
                    )

                # we have never seen a veto or a disapprove, but they seem important.
                # so we'll check and throw an error if we find one
                # life is fragile. so are our scrapers.
                if "veto" in bill_version:
                    veto_url = base_url + bill_version["veto"][0]["link"]
                    veto_json = self.get(veto_url).json()
                    if len(veto_json["items"]) > 0:
                        raise AssertionError("Whoa, a veto! We've never"
                                             " gotten one before."
                                             " Go write some code to deal"
                                             " with it: {}".format(veto_url))

                if "disapprove" in bill_version:
                    disapprove_url = base_url + bill_version["disapprove"][0][
                        "link"]
                    disapprove_json = self.get(disapprove_url).json()
                    if len(disapprove_json["items"]) > 0:
                        raise AssertionError(
                            "Whoa, a disapprove! We've never"
                            " gotten one before."
                            " Go write some code to deal "
                            "with it: {}".format(disapprove_url))

                yield bill