def process_vote(self, data):
        chamber = parse_psuedo_id(data['organization'])['classification']
        bill_chamber, bill_id = self.get_bill_details(data['bill'])
        if chamber == 'legislature':
            chamber = 'upper'
        if bill_chamber == 'legislature':
            bill_chamber = 'upper'

        yes_count = None
        no_count = None
        other_count = 0
        for vc in data['counts']:
            if vc['option'] == 'yes':
                yes_count = vc['value']
            elif vc['option'] == 'no':
                no_count = vc['value']
            else:
                other_count += vc['value']

        vote = Vote(
            chamber=chamber,
            date=parse_date(data['start_date']),
            motion=data['motion_text'],
            passed=data['result'] == 'pass',
            yes_count=yes_count,
            no_count=no_count,
            other_count=other_count,
            action=data['bill_action'],
            # TODO: was data['motion_classification'],
            type='other',
            session=data['legislative_session'],
            bill_chamber=bill_chamber,
            bill_id=bill_id,
        )

        for vr in data['votes']:
            if vr['option'] == 'yes':
                vote.yes(vr['voter_name'])
            elif vr['option'] == 'no':
                vote.no(vr['voter_name'])
            else:
                vote.other(vr['voter_name'])

        for source in data['sources']:
            vote.add_source(source['url'])

        vote.update(**data['extras'])

        self.save_vote(vote)
Esempio n. 2
0
    def process_vote(self, data):
        chamber = parse_psuedo_id(data['organization'])['classification']
        bill_chamber, bill_id = self.get_bill_details(data['bill'])
        if chamber == 'legislature':
            chamber = 'upper'
        if bill_chamber == 'legislature':
            bill_chamber = 'upper'

        yes_count = None
        no_count = None
        other_count = 0
        for vc in data['counts']:
            if vc['option'] == 'yes':
                yes_count = vc['value']
            elif vc['option'] == 'no':
                no_count = vc['value']
            else:
                other_count += vc['value']

        vote = Vote(chamber=chamber,
                    date=parse_date(data['start_date']),
                    motion=data['motion_text'],
                    passed=data['result'] == 'pass',
                    yes_count=yes_count,
                    no_count=no_count,
                    other_count=other_count,
                    action=data['bill_action'],
                    # TODO: was data['motion_classification'],
                    type='other',
                    session=data['legislative_session'],
                    bill_chamber=bill_chamber,
                    bill_id=bill_id,
                    )

        for vr in data['votes']:
            if vr['option'] == 'yes':
                vote.yes(vr['voter_name'])
            elif vr['option'] == 'no':
                vote.no(vr['voter_name'])
            else:
                vote.other(vr['voter_name'])

        for source in data['sources']:
            vote.add_source(source['url'])

        vote.update(**data['extras'])

        self.save_vote(vote)
Esempio n. 3
0
    def scrape_journal(self, url, chamber, session, date):

        filename, response = self.urlretrieve(url)
        self.logger.info('Saved journal to %r' % filename)
        all_text = convert_pdf(filename, type="text")

        lines = all_text.split("\n")
        lines = [line.
                 strip().
                 replace("–", "-").
                 replace("―", '"').
                 replace("‖", '"').
                 replace('“', '"').
                 replace('”', '"')
                 for line in lines]

        # Do not process headers or completely empty lines
        header_date_re = r"\d+\w{2} Day\s+\w+DAY, \w+ \d{1,2}, \d{4}\s+\d+"
        header_journal_re = r"\d+\s+JOURNAL OF THE \w+\s+\d+\w{2} Day"
        lines = iter([line for line in lines if not(
                     line == "" or
                     re.match(header_date_re, line) or
                     re.match(header_journal_re, line))])

        for line in lines:
            # Go through with vote parse if any of
            # these conditions match.
            if not line.startswith("On the question") or \
                    "shall" not in line.lower():
                continue

            # Get the bill_id
            bill_id = None
            bill_re = r'\(\s*([A-Z\.]+\s\d+)\s*\)'

            # The Senate ends its motion text with a vote announcement
            if chamber == "upper":
                end_of_motion_re = r'.* the vote was:\s*'
            # The House may or may not end motion text with a bill name
            elif chamber == "lower":
                end_of_motion_re = r'.*Shall.*\?"?(\s{})?\s*'.format(bill_re)

            while not re.match(end_of_motion_re, line, re.IGNORECASE):
                line += " " + lines.next()

            try:
                bill_id = re.search(bill_re, line).group(1)
            except AttributeError:
                self.warning("This motion did not pertain to legislation: {}".
                             format(line))
                continue

            # Get the motion text
            motion_re = r'''
                    ^On\sthe\squestion\s  # Precedes any motion
                    "  # Motion is preceded by a quote mark
                    (Shall\s.+?\??)  # The motion text begins with "Shall"
                    \s*"\s+  # Motion is followed by a quote mark
                    (?:{})?  # If the vote regards a bill, its number is listed
                    {}  # Senate has trailing text
                    \s*$
                    '''.format(
                    bill_re,
                    r',?.*?the\svote\swas:' if chamber == 'upper' else ''
                    )
            motion = re.search(motion_re,
                               line,
                               re.VERBOSE | re.IGNORECASE).group(1)

            for word, letter in (('Senate', 'S'),
                                 ('House', 'H'),
                                 ('File', 'F')):

                if bill_id is None:
                    return

                bill_id = bill_id.replace(word, letter)

            bill_chamber = dict(h='lower', s='upper')[bill_id.lower()[0]]
            self.current_id = bill_id
            votes, passed = self.parse_votes(lines)

            #at the very least, there should be a majority
            #for the bill to have passed, so check that,
            #but if the bill didn't pass, it could still be OK if it got a majority
            #eg constitutional amendments
            assert (passed == (votes['yes_count'] > votes['no_count'])) or (not passed)
            
            #also throw a warning if the bill failed but got a majority
            #it could be OK, but is probably something we'd want to check
            if not passed and votes['yes_count'] > votes['no_count']:
                self.logger.warning("The bill got a majority but did not pass. Could be worth confirming.")
            
            vote = Vote(motion=re.sub('\xad', '-', motion),
                        passed=passed,
                        chamber=chamber, date=date,
                        session=session, bill_id=bill_id,
                        bill_chamber=bill_chamber,
                        **votes)
            vote.update(votes)
            vote.add_source(url)

            self.save_vote(vote)
Esempio n. 4
0
    def scrape_journal(self, url, chamber, session, date):

        filename, response = self.urlretrieve(url)
        self.logger.info('Saved journal to %r' % filename)
        xml = convert_pdf(filename)
        try:
            et = lxml.etree.fromstring(xml)
        except lxml.etree.XMLSyntaxError:
            self.logger.warning('Skipping invalid pdf: %r' % filename)
            return

        lines = self._journal_lines(et)
        while True:
            try:
                line = next(lines)
            except StopIteration:
                break

            text = gettext(line)

            # Go through with vote parse if any of
            # these conditions match.
            if 'Shall' in text:
                if 'bill pass?' in text:
                    pass
                elif 'resolution' in text:
                    pass
                elif 'amendment' in text:
                    pass
                else:
                    continue
            else:
                continue

            # Get the bill_id.
            while True:
                line = next(lines)
                text += gettext(line)
                m = re.search(r'\(\s*([A-Z\.]+\s+\d+)\s*\)',  text)
                if m:
                    bill_id = m.group(1)
                    break

            motion = text.strip()
            motion = re.sub(r'\s+', ' ', motion)
            motion, _ = motion.rsplit('(')
            motion = motion.replace('"', '')
            motion = motion.replace(u'“', '')
            motion = motion.replace(u'\u201d', '')
            motion = motion.replace(u' ,', ',')
            motion = motion.strip()
            motion = re.sub(r'[SH].\d+', lambda m: ' %s ' % m.group(), motion)
            motion = re.sub(r'On the question\s*', '', motion, flags=re.I)

            for word, letter in (('Senate', 'S'),
                                 ('House', 'H'),
                                 ('File', 'F')):
                bill_id = bill_id.replace(word, letter)

            bill_chamber = dict(h='lower', s='upper')[bill_id.lower()[0]]
            self.current_id = bill_id
            votes = self.parse_votes(lines)
            totals = filter(lambda x: isinstance(x, int), votes.values())
            passed = (1.0 * votes['yes_count'] / sum(totals)) >= 0.5
            vote = Vote(motion=motion,
                        passed=passed,
                        chamber=chamber, date=date,
                        session=session, bill_id=bill_id,
                        bill_chamber=bill_chamber,
                        **votes)
            vote.update(votes)
            vote.add_source(url)
            self.save_vote(vote)
Esempio n. 5
0
    def scrape(self, chamber, session):
        self.all_bills = {}
        self.slug = self.metadata['session_details'][session]['slug']

        page = self.lxmlize(self.bill_directory_url.format(self.slug.upper()))
        page.make_links_absolute(self.base_url)

        ulid = 'senateBills' if chamber == 'upper' else 'houseBills'  # id of <ul>
        header = page.xpath("//ul[@id='{0}_search']".format(ulid))[0]

        #Every ul with a data-load-action and an id
        bill_list_pages = header.xpath(".//ul[boolean(@data-load-action)"
                                       " and boolean(@id)]/@data-load-action")

        bill_anchors = []

        for bill_list_url in bill_list_pages:
            bill_list_page = self.lxmlize('{}{}'.format(self.base_url, bill_list_url))
            bill_list_page.make_links_absolute(self.base_url)
            bill_anchors.extend(bill_list_page.xpath('//a') or [])

        ws = re.compile(r"\s+")

        def _clean_ws(txt):
            """Remove extra whitespace from text."""
            return ws.sub(' ', txt).strip()

        for a in bill_anchors:
            bid = ws.sub('', a.text_content())  # bill id
            bill_summary = _clean_ws(a.get('title'))
            # bill title is added below
            bill = Bill(session, chamber, bid, title='', summary=bill_summary)
            page = self.lxmlize(a.get('href'))
            versions = page.xpath('//ul[@class="dropdown-menu"]/li/span/' +
                                  'a[contains(@title, "Get the Pdf")]/@href')

            measure_info = {}
            info = page.xpath("//table[@id='measureOverviewTable']/tr")
            for row in info:
                key, value = row.xpath("./*")
                key = key.text.replace(':','').strip()
                measure_info[key] = value

            for sponsor in measure_info['Chief Sponsors'].xpath("./a"):
                if sponsor.text_content().strip():
                    bill.add_sponsor(
                            type='primary', name=sponsor.text_content())

            for sponsor in measure_info['Regular Sponsors'].xpath("./a"):
                if sponsor.text_content().strip():
                    bill.add_sponsor(
                            type='cosponsor', name=sponsor.text_content())

            title = _clean_ws(measure_info['Bill Title'].text_content())
            # some bill titles need to be added manually
            if self.slug == "2013R1" and bid == "HB2010":
                title = ("Relating to Water Resources Department contested"
                         "case proceedings.")
            bill['title'] = title

            for version in versions:
                name = version.split("/")[-1]
                bill.add_version(name=name, url=version,
                                 mimetype='application/pdf')

            history_url = self.create_url('Measures/Overview/GetHistory/{bill}', bid)
            history = self.lxmlize(history_url).xpath("//table/tr")
            for entry in history:
                wwhere, action = [_clean_ws(x.text_content())
                                  for x in entry.xpath("*")]
                vote_cleaning_re = r'(.*?)((Ayes)|(Nays),\s.*)'
                if re.match(vote_cleaning_re, action):
                    action = re.search(vote_cleaning_re, action).groups()[0]
                wwhere = re.match(
                    r"(?P<when>.*) \((?P<where>.*)\)", wwhere).groupdict()

                action_chamber = {"S": "upper", "H": "lower"}[wwhere['where']]
                when = "%s-%s" % (self.slug[:4], wwhere['when'])
                when = dt.datetime.strptime(when, "%Y-%m-%d")

                types = []
                for expr, types_ in self.action_classifiers:
                    m = re.match(expr, action)
                    if m:
                        types += types_

                if types == []:
                    types = ['other']

                # actor, action, date, type, committees, legislators
                bill.add_action(action_chamber, action, when, type=types)

                # Parse and store Vote information
                vote_id = entry.xpath('./td/a[contains(@href, "otes-")]/@href')
                if not vote_id:
                    continue
                elif "#measureVotes-" in vote_id[0]:
                    vote_id = vote_id[0].split("-")[-1]
                    vote_url = "https://olis.leg.state.or.us/liz/" + \
                            "{0}/Measures/MeasureVotes?id={1}". \
                            format(self.slug, vote_id)
                else:
                    vote_id = vote_id[0].split("-")[-1]
                    vote_url = "https://olis.leg.state.or.us/liz/" + \
                            "{0}/CommitteeReports/MajorityReport/{1}". \
                            format(self.slug, vote_id)

                votes = self._get_votes(vote_url)
                if not any(len(x) for x in votes.values()):
                    self.warning("The votes webpage was empty for " +
                            "action {0} on bill {1}.".format(action, bid))
                    continue

                passed = (
                        float(len(votes["yes_votes"])) /
                        (len(votes["yes_votes"]) + len(votes["no_votes"]))
                        > 0.5
                        )

                vote = Vote(
                        chamber=chamber,
                        date=when,
                        motion=action,
                        passed=passed,
                        yes_count=len(votes["yes_votes"]),
                        no_count=len(votes["no_votes"]),
                        other_count=len(votes["other_votes"]),

                        session=session,
                        bill_id=bid,
                        bill_chamber=action_chamber
                        )

                vote.update(votes)
                bill_url = "https://olis.leg.state.or.us/liz/" + \
                        "{0}/Measures/Overview/{1}".format(self.slug, bid)
                vote.add_source(bill_url)

                bill.add_vote(vote)

            amendments_url = self.create_url(
                    'Measures/ProposedAmendments/{bill}', bid)
            amendments = self.lxmlize(amendments_url).xpath(
                    "//div[@id='amendments']/table//tr")

            for amendment in amendments:
                nodes = amendment.xpath("./td")

                if nodes == []:
                    continue

                pdf_href, date, committee, adopted, when = nodes
                pdf_href, = pdf_href.xpath("./a")
                pdf_link = pdf_href.attrib['href']

                name = "Ammendment %s" % (pdf_href.text_content())

                adopted = adopted.text
                bill.add_document(name=name, url=pdf_link,
                                  adopted=adopted,
                                  mimetype='application/pdf')

            bill.add_source(a.get('href'))
            self.save_bill(bill)
Esempio n. 6
0
    def scrape_journal(self, url, chamber, session, date):

        filename, response = self.urlretrieve(url)
        self.logger.info('Saved journal to %r' % filename)
        all_text = convert_pdf(filename, type="text")

        lines = all_text.split("\n")
        lines = [
            line.strip().replace("–", "-").replace("―", '"').replace(
                "‖", '"').replace('“', '"').replace('”', '"') for line in lines
        ]

        # Do not process headers or completely empty lines
        header_date_re = r"\d+\w{2} Day\s+\w+DAY, \w+ \d{1,2}, \d{4}\s+\d+"
        header_journal_re = r"\d+\s+JOURNAL OF THE \w+\s+\d+\w{2} Day"
        lines = iter([
            line for line in lines
            if not (line == "" or re.match(header_date_re, line)
                    or re.match(header_journal_re, line))
        ])

        for line in lines:
            # Go through with vote parse if any of
            # these conditions match.
            if not line.startswith("On the question") or \
                    "shall" not in line.lower():
                continue

            # Get the bill_id
            bill_id = None
            bill_re = r'\(\s*([A-Z\.]+\s\d+)\s*\)'

            # The Senate ends its motion text with a vote announcement
            if chamber == "upper":
                end_of_motion_re = r'.* the vote was:\s*'
            # The House may or may not end motion text with a bill name
            elif chamber == "lower":
                end_of_motion_re = r'.*Shall.*\?"?(\s{})?\s*'.format(bill_re)

            while not re.match(end_of_motion_re, line, re.IGNORECASE):
                line += " " + lines.next()

            try:
                bill_id = re.search(bill_re, line).group(1)
            except AttributeError:
                self.warning(
                    "This motion did not pertain to legislation: {}".format(
                        line))
                continue

            # Get the motion text
            motion_re = r'''
                    ^On\sthe\squestion\s  # Precedes any motion
                    "  # Motion is preceded by a quote mark
                    (Shall\s.+?\??)  # The motion text begins with "Shall"
                    \s*"\s+  # Motion is followed by a quote mark
                    (?:{})?  # If the vote regards a bill, its number is listed
                    {}  # Senate has trailing text
                    \s*$
                    '''.format(
                bill_re, r',?.*?the\svote\swas:' if chamber == 'upper' else '')
            motion = re.search(motion_re, line,
                               re.VERBOSE | re.IGNORECASE).group(1)

            for word, letter in (('Senate', 'S'), ('House', 'H'), ('File',
                                                                   'F')):

                if bill_id is None:
                    return

                bill_id = bill_id.replace(word, letter)

            bill_chamber = dict(h='lower', s='upper')[bill_id.lower()[0]]
            self.current_id = bill_id
            votes, passed = self.parse_votes(lines)

            #at the very least, there should be a majority
            #for the bill to have passed, so check that,
            #but if the bill didn't pass, it could still be OK if it got a majority
            #eg constitutional amendments
            assert (passed == (votes['yes_count'] >
                               votes['no_count'])) or (not passed)

            #also throw a warning if the bill failed but got a majority
            #it could be OK, but is probably something we'd want to check
            if not passed and votes['yes_count'] > votes['no_count']:
                self.logger.warning(
                    "The bill got a majority but did not pass. Could be worth confirming."
                )

            vote = Vote(motion=motion,
                        passed=passed,
                        chamber=chamber,
                        date=date,
                        session=session,
                        bill_id=bill_id,
                        bill_chamber=bill_chamber,
                        **votes)
            vote.update(votes)
            vote.add_source(url)

            self.save_vote(vote)
Esempio n. 7
0
    def scrape(self, chamber, session):
        self.all_bills = {}
        self.slug = self.metadata['session_details'][session]['slug']

        page = self.lxmlize(self.bill_directory_url.format(self.slug.upper()))
        page.make_links_absolute(self.base_url)

        ulid = 'senateBills' if chamber == 'upper' else 'houseBills'  # id of <ul>
        header = page.xpath("//ul[@id='{0}_search']".format(ulid))[0]

        #Every ul with a data-load-action and an id
        bill_list_pages = header.xpath(".//ul[boolean(@data-load-action)"
                                       " and boolean(@id)]/@data-load-action")

        bill_anchors = []

        for bill_list_url in bill_list_pages:
            bill_list_page = self.lxmlize('{}{}'.format(
                self.base_url, bill_list_url))
            bill_list_page.make_links_absolute(self.base_url)
            bill_anchors.extend(bill_list_page.xpath('//a') or [])

        ws = re.compile(r"\s+")

        def _clean_ws(txt):
            """Remove extra whitespace from text."""
            return ws.sub(' ', txt).strip()

        for a in bill_anchors:
            bid = ws.sub('', a.text_content())  # bill id
            bill_summary = _clean_ws(a.get('title'))
            # bill title is added below
            bill = Bill(session, chamber, bid, title='', summary=bill_summary)
            page = self.lxmlize(a.get('href'))
            versions = page.xpath('//ul[@class="dropdown-menu"]/li/span/' +
                                  'a[contains(@title, "Get the Pdf")]/@href')

            measure_info = {}
            info = page.xpath("//table[@id='measureOverviewTable']/tr")
            for row in info:
                key, value = row.xpath("./*")
                key = key.text.replace(':', '').strip()
                measure_info[key] = value

            for sponsor in measure_info['Chief Sponsors'].xpath("./a"):
                if sponsor.text_content().strip():
                    bill.add_sponsor(type='primary',
                                     name=sponsor.text_content())

            for sponsor in measure_info['Regular Sponsors'].xpath("./a"):
                if sponsor.text_content().strip():
                    bill.add_sponsor(type='cosponsor',
                                     name=sponsor.text_content())

            title = _clean_ws(measure_info['Bill Title'].text_content())
            # some bill titles need to be added manually
            if self.slug == "2013R1" and bid == "HB2010":
                title = ("Relating to Water Resources Department contested"
                         "case proceedings.")
            bill['title'] = title

            for version in versions:
                name = version.split("/")[-1]
                bill.add_version(name=name,
                                 url=version,
                                 mimetype='application/pdf')

            history_url = self.create_url(
                'Measures/Overview/GetHistory/{bill}', bid)
            history = self.lxmlize(history_url).xpath("//table/tr")
            for entry in history:
                wwhere, action = [
                    _clean_ws(x.text_content()) for x in entry.xpath("*")
                ]
                vote_cleaning_re = r'(.*?)((Ayes)|(Nays),\s.*)'
                if re.match(vote_cleaning_re, action):
                    action = re.search(vote_cleaning_re, action).groups()[0]
                wwhere = re.match(r"(?P<when>.*) \((?P<where>.*)\)",
                                  wwhere).groupdict()

                action_chamber = {"S": "upper", "H": "lower"}[wwhere['where']]
                when = "%s-%s" % (self.slug[:4], wwhere['when'])
                when = dt.datetime.strptime(when, "%Y-%m-%d")

                types = []
                for expr, types_ in self.action_classifiers:
                    m = re.match(expr, action)
                    if m:
                        types += types_

                if types == []:
                    types = ['other']

                # actor, action, date, type, committees, legislators
                bill.add_action(action_chamber, action, when, type=types)

                # Parse and store Vote information
                vote_id = entry.xpath('./td/a[contains(@href, "otes-")]/@href')
                if not vote_id:
                    continue
                elif "#measureVotes-" in vote_id[0]:
                    vote_id = vote_id[0].split("-")[-1]
                    vote_url = "https://olis.leg.state.or.us/liz/" + \
                            "{0}/Measures/MeasureVotes?id={1}". \
                            format(self.slug, vote_id)
                else:
                    vote_id = vote_id[0].split("-")[-1]
                    vote_url = "https://olis.leg.state.or.us/liz/" + \
                            "{0}/CommitteeReports/MajorityReport/{1}". \
                            format(self.slug, vote_id)

                votes = self._get_votes(vote_url)
                if not any(len(x) for x in votes.values()):
                    self.warning("The votes webpage was empty for " +
                                 "action {0} on bill {1}.".format(action, bid))
                    continue

                passed = (float(len(votes["yes_votes"])) /
                          (len(votes["yes_votes"]) + len(votes["no_votes"])) >
                          0.5)

                vote = Vote(chamber=chamber,
                            date=when,
                            motion=action,
                            passed=passed,
                            yes_count=len(votes["yes_votes"]),
                            no_count=len(votes["no_votes"]),
                            other_count=len(votes["other_votes"]),
                            session=session,
                            bill_id=bid,
                            bill_chamber=action_chamber)

                vote.update(votes)
                bill_url = "https://olis.leg.state.or.us/liz/" + \
                        "{0}/Measures/Overview/{1}".format(self.slug, bid)
                vote.add_source(bill_url)

                bill.add_vote(vote)

            amendments_url = self.create_url(
                'Measures/ProposedAmendments/{bill}', bid)
            amendments = self.lxmlize(amendments_url).xpath(
                "//div[@id='amendments']/table//tr")

            for amendment in amendments:
                nodes = amendment.xpath("./td")

                if nodes == []:
                    continue

                pdf_href, date, committee, adopted, when = nodes
                pdf_href, = pdf_href.xpath("./a")
                pdf_link = pdf_href.attrib['href']

                name = "Ammendment %s" % (pdf_href.text_content())

                adopted = adopted.text
                bill.add_document(name=name,
                                  url=pdf_link,
                                  adopted=adopted,
                                  mimetype='application/pdf')

            bill.add_source(a.get('href'))
            self.save_bill(bill)
Esempio n. 8
0
    def scrape_journal(self, url, chamber, session, date):

        filename, response = self.urlretrieve(url)
        self.logger.info('Saved journal to %r' % filename)
        xml = convert_pdf(filename)
        try:
            et = lxml.etree.fromstring(xml)
        except lxml.etree.XMLSyntaxError:
            self.logger.warning('Skipping invalid pdf: %r' % filename)
            return

        lines = self._journal_lines(et)
        while True:
            try:
                line = next(lines)
            except StopIteration:
                break

            text = gettext(line)

            # Go through with vote parse if any of
            # these conditions match.
            if 'Shall' in text:
                if 'bill pass?' in text:
                    pass
                elif 'resolution' in text:
                    pass
                elif 'amendment' in text:
                    pass
                else:
                    continue
            else:
                continue

            # Get the bill_id.
            bill_id = None
            for line in lines:
                text += gettext(line)
                m = re.search(r'\(\s*([A-Z\.]+\s+\d+)\s*\)',  text)
                if m:
                    bill_id = m.group(1)
                    break

            motion = text.strip()
            motion = re.sub(r'\s+', ' ', motion)
            if "(" in motion:
                motion, _ = motion.rsplit('(', 1)
            motion = motion.replace('"', '')
            motion = motion.replace(u'“', '')
            motion = motion.replace(u'\u201d', '')
            motion = motion.replace(u' ,', ',')
            motion = motion.strip()
            motion = re.sub(r'[SH].\d+', lambda m: ' %s ' % m.group(), motion)
            motion = re.sub(r'On the question\s*', '', motion, flags=re.I)

            for word, letter in (('Senate', 'S'),
                                 ('House', 'H'),
                                 ('File', 'F')):

                if bill_id is None:
                    return

                bill_id = bill_id.replace(word, letter)

            bill_chamber = dict(h='lower', s='upper')[bill_id.lower()[0]]
            self.current_id = bill_id
            votes = self.parse_votes(lines)
            totals = filter(lambda x: isinstance(x, int), votes.values())
            passed = (1.0 * votes['yes_count'] / sum(totals)) >= 0.5
            vote = Vote(motion=motion,
                        passed=passed,
                        chamber=chamber, date=date,
                        session=session, bill_id=bill_id,
                        bill_chamber=bill_chamber,
                        **votes)
            vote.update(votes)
            vote.add_source(url)
            self.save_vote(vote)