Example #1
0
def import_bill_by_id(legisinfo_id):
    """Imports a single bill based on its LEGISinfo id."""

    url = LEGISINFO_SINGLE_BILL_URL % {'legisinfo_id': legisinfo_id}
    try:
        tree = etree.parse(urllib2.urlopen(url))
    except urllib2.HTTPError:
        raise Bill.DoesNotExist("HTTP error retrieving bill")
    bill = tree.xpath('/Bill')
    assert len(bill) == 1
    bill = bill[0]

    sessiontag = bill.xpath('ParliamentSession')[0]
    session = Session.objects.get(parliamentnum=int(
        sessiontag.get('parliamentNumber')),
                                  sessnum=int(sessiontag.get('sessionNumber')))
    return _import_bill(bill, session)
Example #2
0
def import_bills(session):
    previous_session = Session.objects.filter(start__lt=session.start)\
        .order_by('-start')[0] # yes, this will raise an exception if there's no older session on record
    
    legis_sess = InternalXref.objects.get(text_value=session.id, schema='session_legisin').int_value
    listurl = LEGISINFO_LIST_URL % legis_sess
    listpage = urllib2.urlopen(listurl).read()
    
    r_listlink = re.compile(r'<a href="index\.asp\?Language=E&Session=\d+&query=(\d+)&List=toc">\s*(C-\d+[A-Z]?)\s*</a>', re.UNICODE)
    for match in r_listlink.finditer(listpage):
        legisinfoid = int(match.group(1))
        billnumber_full = match.group(2)
        try:
            bill = Bill.objects.get(sessions=session, number=billnumber_full)
        except Bill.DoesNotExist:
            bill = None
        
        if not getattr(bill, 'legisinfo_url', None):
            # Not yet in the database. Go parse.
            detailurl = LEGISINFO_DETAIL_URL % (legis_sess, legisinfoid)
            try:
                detailpage = urllib2.urlopen(detailurl).read().decode('windows-1252')
            except urllib2.URLError, e:
                print "ERROR: URLError on %s" % detailurl
                print e
                continue
            match = re.search(r'<td>\s*(An [aA]ct.+?)<br', detailpage)
            if not match:
                soup = BeautifulSoup(detailpage)
                try:
                    billname = unicode(soup.find(text=billnumber_full).next.next.next)
                    print "WARNING: soupmatching bill name as %s" % billname
                except Exception, e:
                    print "Couldn't parse bill name at %s" % detailurl
                    print e
                    continue
            else:
                billname = match.group(1)[:500]
            
            # Is this a reintroduced bill?
            merging = False
            try:
                mergebill = Bill.objects.get(sessions=previous_session, number=billnumber_full, name__iexact=billname)
                if not bill:
                    bill = mergebill
                    merging = True
                    print "MERGING BILL"
                else:
                    mail_admins('Bills may need to be merged', "%s: ids %s %s" % (billnumber_full, mergebill.id, bill.id))
            except Bill.DoesNotExist:
                # Nope. New bill.
                if not bill:
                    bill = Bill(number=billnumber_full, name=billname)
                    bill.session = session
            
            if bill.session != session:
                bill.sessions.add(session)
            
            bill.legisinfo_url = detailurl
            
            membermatch = re.search(r'<font color="#005500"><b><a href=.http://www2\.parl\.gc\.ca/parlinfo/Files/Parliamentarian\.aspx\?Item=([A-Z0-9-]+?)&.+?>(.+?)<', detailpage)
            if membermatch:
                try:
                    bill.sponsor_politician = Politician.objects.get_by_parlinfo_id(membermatch.group(1))
                except models.ObjectDoesNotExist:
                    membername = membermatch.group(2)
                    membername = re.sub(r'\(.+?\)', '', membername) # parens
                    membername = re.sub(r'.+ Hon\.', '', membername) # honorific
                    try:
                        bill.sponsor_politician = Politician.objects.get_by_name(membername.strip(), session=session)
                        bill.sponsor_politician.save_parlinfo_id(membermatch.group(1))
                    except (Politician.DoesNotExist, Politician.MultipleObjectsReturned):
                        print "WARNING: Could not identify politician for bill %s" % billnumber_full
                if bill.sponsor_politician:
                    try:
                        bill.sponsor_member = ElectedMember.objects.get_by_pol(politician=bill.sponsor_politician,
                            session=session)
                    except:
                        print "WARNING: Couldn't find member for politician %s" % bill.sponsor_politician
            bill.save()
            bill.save_sponsor_activity()
Example #3
0
def _import_bill(lbill, session, previous_session=None):
    #lbill should be an ElementTree Element for the Bill tag

    if previous_session is None:
        previous_session = _get_previous_session(session)

    lbillnumber = lbill.xpath('BillNumber')[0]
    billnumber = (lbillnumber.get('prefix') + '-' + lbillnumber.get('number')
        + lbillnumber.get('suffix', ''))
    try:
        bill = Bill.objects.get(number=billnumber, sessions=session)
        bis = bill.billinsession_set.get(session=session)
    except Bill.DoesNotExist:
        bill = Bill(number=billnumber)
        bis = BillInSession(bill=bill, session=session)
        bill._changed = True
        bis._changed = True
        bill.set_temporary_session(session)

    _update(bill, 'name', lbill.xpath('BillTitle/Title[@language="en"]')[0].text)

    if not bill.status:
        # This is presumably our first import of the bill; check if this
        # looks like a reintroduced bill and we want to merge with an
        # older Bill object.
        bill._newbill = True
        try:
            if previous_session:
                mergebill = Bill.objects.get(sessions=previous_session,
                                             number=bill.number,
                                             name__iexact=bill.name)

                if bill.id:
                    # If the new bill has already been saved, let's not try
                    # to merge automatically
                    logger.error("Bill %s may need to be merged. IDs: %s %s" %
                                 (bill.number, bill.id, mergebill.id))
                else:
                    logger.warning("Merging bill %s" % bill.number)
                    bill = mergebill
                    bis.bill = bill
        except Bill.DoesNotExist:
            # Nothing to merge
            pass

    _update(bill, 'name_fr', lbill.xpath('BillTitle/Title[@language="fr"]')[0].text)
    _update(bill, 'short_title_en', lbill.xpath('ShortTitle/Title[@language="en"]')[0].text)
    _update(bill, 'short_title_fr', lbill.xpath('ShortTitle/Title[@language="fr"]')[0].text)

    if not bis.sponsor_politician and bill.number[0] == 'C' and lbill.xpath('SponsorAffiliation/@id'):
        # We don't deal with Senate sponsors yet
        pol_id = int(lbill.xpath('SponsorAffiliation/@id')[0])
        try:
            bis.sponsor_politician = Politician.objects.get_by_parl_id(pol_id)
        except Politician.DoesNotExist:
            logger.error("Couldn't find sponsor politician for bill %s, pol ID %s" % (bill.number, pol_id))
        bis._changed = True
        try:
            bis.sponsor_member = ElectedMember.objects.get_by_pol(politician=bis.sponsor_politician,
                                                                   session=session)
        except Exception:
            logger.error("Couldn't find ElectedMember for bill %s, pol %r" %
                         (bill.number, bis.sponsor_politician))
        if not bill.sponsor_politician:
            bill.sponsor_politician = bis.sponsor_politician
            bill.sponsor_member = bis.sponsor_member
            bill._changed = True

    _update(bis, 'introduced', _parse_date(lbill.xpath('BillIntroducedDate')[0].text))
    if not bill.introduced:
        bill.introduced = bis.introduced

    try:
        _update(bill, 'status',
            lbill.xpath('Events/LastMajorStageEvent/Event/Status/Title[@language="en"]')[0].text)
        _update(bill, 'status_fr',
            lbill.xpath('Events/LastMajorStageEvent/Event/Status/Title[@language="fr"]')[0].text)
        _update(bill, 'status_date', _parse_date(
            lbill.xpath('Events/LastMajorStageEvent/Event/@date')[0]))
    except IndexError:
        # Some older bills don't have status information
        pass

    try:
        _update(bill, 'text_docid', int(
            lbill.xpath('Publications/Publication/@id')[-1]))
    except IndexError:
        pass

    _update(bis, 'legisinfo_id', int(lbill.get('id')))

    if getattr(bill, '_changed', False):
        bill.save()
    if getattr(bis, '_changed', False):
        bis.bill = bis.bill # bizarrely, the django orm makes you do this
        bis.save()

    if getattr(bill, '_newbill', False) and not session.end:
        bill.save_sponsor_activity()

    if bill.text_docid and not BillText.objects.filter(docid=bill.text_docid).exists():
        try:
            BillText.objects.create(
                bill=bill,
                docid=bill.text_docid,
                text_en=get_plain_bill_text(bill)
            )
            bill.save()  # to trigger search indexing
        except CannotScrapeException:
            logger.warning(u"Could not get bill text for %s" % bill)

    return bill
            
Example #4
0
def _import_bill(lbill, session, previous_session=None):
    #lbill should be an ElementTree Element for the Bill tag

    if previous_session is None:
        previous_session = _get_previous_session(session)

    lbillnumber = lbill.xpath('BillNumber')[0]
    billnumber = (lbillnumber.get('prefix') + '-' + lbillnumber.get('number') +
                  lbillnumber.get('suffix', ''))
    try:
        bill = Bill.objects.get(number=billnumber, sessions=session)
        bis = bill.billinsession_set.get(session=session)
    except Bill.DoesNotExist:
        bill = Bill(number=billnumber)
        bis = BillInSession(bill=bill, session=session)
        bill._changed = True
        bis._changed = True
        bill.set_temporary_session(session)

    _update(bill, 'name_en',
            lbill.xpath('BillTitle/Title[@language="en"]')[0].text)

    if not bill.status_code:
        # This is presumably our first import of the bill; check if this
        # looks like a reintroduced bill and we want to merge with an
        # older Bill object.
        bill._newbill = True
        try:
            if previous_session:
                mergebill = Bill.objects.get(sessions=previous_session,
                                             number=bill.number,
                                             name_en__iexact=bill.name_en)

                if bill.id:
                    # If the new bill has already been saved, let's not try
                    # to merge automatically
                    logger.error("Bill %s may need to be merged. IDs: %s %s" %
                                 (bill.number, bill.id, mergebill.id))
                else:
                    logger.warning("Merging bill %s" % bill.number)
                    bill = mergebill
                    bis.bill = bill
        except Bill.DoesNotExist:
            # Nothing to merge
            pass

    _update(bill, 'name_fr',
            lbill.xpath('BillTitle/Title[@language="fr"]')[0].text)
    _update(bill, 'short_title_en',
            lbill.xpath('ShortTitle/Title[@language="en"]')[0].text)
    _update(bill, 'short_title_fr',
            lbill.xpath('ShortTitle/Title[@language="fr"]')[0].text)

    if not bis.sponsor_politician and bill.number[0] == 'C' and lbill.xpath(
            'SponsorAffiliation/@id'):
        # We don't deal with Senate sponsors yet
        pol_id = int(lbill.xpath('SponsorAffiliation/@id')[0])
        try:
            bis.sponsor_politician = Politician.objects.get_by_parl_id(pol_id)
        except Politician.DoesNotExist:
            logger.error(
                "Couldn't find sponsor politician for bill %s, pol ID %s" %
                (bill.number, pol_id))
        bis._changed = True
        try:
            bis.sponsor_member = ElectedMember.objects.get_by_pol(
                politician=bis.sponsor_politician, session=session)
        except Exception:
            logger.error("Couldn't find ElectedMember for bill %s, pol %r" %
                         (bill.number, bis.sponsor_politician))
        if not bill.sponsor_politician:
            bill.sponsor_politician = bis.sponsor_politician
            bill.sponsor_member = bis.sponsor_member
            bill._changed = True

    _update(bis, 'introduced',
            _parse_date(lbill.xpath('BillIntroducedDate')[0].text))
    if not bill.introduced:
        bill.introduced = bis.introduced

    try:
        status_code = lbill.xpath('Events')[0].get('laagCurrentStage')
        if status_code == '':
            status_code = 'Introduced'
        _update(bill, 'status_code', status_code)
        if status_code not in Bill.STATUS_CODES:
            logger.error("Unknown bill status code %s" % status_code)
        #_update(bill, 'status_date', _parse_date(
        #    lbill.xpath('Events/LastMajorStageEvent/Event/@date')[0]))
        status_dates = [
            _parse_date(d)
            for d in lbill.xpath('Events/LegislativeEvents/Event/@date')
        ]
        _update(bill, 'status_date', max(status_dates))
    except IndexError:
        # Some older bills don't have status information
        pass

    try:
        _update(bill, 'text_docid',
                int(lbill.xpath('Publications/Publication/@id')[-1]))
    except IndexError:
        pass

    _update(bis, 'legisinfo_id', int(lbill.get('id')))

    if getattr(bill, '_changed', False):
        bill.save()
    if getattr(bis, '_changed', False):
        bis.bill = bis.bill  # bizarrely, the django orm makes you do this
        bis.save()

    for levent in lbill.xpath('Events/LegislativeEvents/Event'):
        source_id = int(levent.get('id'))
        if BillEvent.objects.filter(source_id=source_id).exists():
            continue

        event = BillEvent(
            source_id=source_id,
            bis=bis,
            date=_parse_date(levent.get('date')),
            institution='S' if levent.get('chamber') == 'SEN' else 'C',
            status_en=levent.xpath('Status/Title[@language="en"]/text()')[0],
            status_fr=levent.xpath('Status/Title[@language="fr"]/text()')[0])

        if event.institution == 'C':
            hansard_num = levent.get('meetingNumber')
            try:
                event.debate = Document.debates.get(session=bis.session,
                                                    number=hansard_num)
            except Document.DoesNotExist:
                logger.info(
                    u"Could not associate BillEvent for %s with Hansard#%s" %
                    (bill, hansard_num))
                continue

            for lcommittee in levent.xpath('Committee'):
                acronym = lcommittee.get('accronym')
                if acronym and acronym != 'WHOL':
                    event.save()
                    try:
                        committee = Committee.objects.get_by_acronym(
                            acronym, bis.session)
                        for number in lcommittee.xpath(
                                'CommitteeMeetings/CommitteeMeeting/@number'):
                            event.committee_meetings.add(
                                CommitteeMeeting.objects.get(
                                    committee=committee,
                                    number=int(number),
                                    session=bis.session))
                    except ObjectDoesNotExist:
                        logger.exception(
                            "Could not import committee meetings: %s" %
                            etree.tostring(lcommittee))
                        continue
        event.save()

    if getattr(bill, '_newbill', False) and not session.end:
        bill.save_sponsor_activity()

    if bill.text_docid and not BillText.objects.filter(
            docid=bill.text_docid).exists():
        try:
            BillText.objects.create(bill=bill,
                                    docid=bill.text_docid,
                                    text_en=get_plain_bill_text(bill))
            bill.save()  # to trigger search indexing
        except CannotScrapeException:
            logger.warning(u"Could not get bill text for %s" % bill)

    return bill
def _import_bill(lbill, session, previous_session=None):
    #lbill should be an ElementTree Element for the Bill tag

    if previous_session is None:
        previous_session = _get_previous_session(session)

    lbillnumber = lbill.xpath('BillNumber')[0]
    billnumber = (lbillnumber.get('prefix') + '-' + lbillnumber.get('number')
        + lbillnumber.get('suffix', ''))
    try:
        bill = Bill.objects.get(number=billnumber, sessions=session)
        bis = bill.billinsession_set.get(session=session)
    except Bill.DoesNotExist:
        bill = Bill(number=billnumber)
        bis = BillInSession(bill=bill, session=session)
        bill._changed = True
        bis._changed = True
        bill.set_temporary_session(session)

    _update(bill, 'name_en', lbill.xpath('BillTitle/Title[@language="en"]')[0].text)

    if not bill.status_code:
        # This is presumably our first import of the bill; check if this
        # looks like a reintroduced bill and we want to merge with an
        # older Bill object.
        bill._newbill = True
        try:
            if previous_session:
                mergebill = Bill.objects.get(sessions=previous_session,
                                             number=bill.number,
                                             name_en__iexact=bill.name_en)

                if bill.id:
                    # If the new bill has already been saved, let's not try
                    # to merge automatically
                    logger.error("Bill %s may need to be merged. IDs: %s %s" %
                                 (bill.number, bill.id, mergebill.id))
                else:
                    logger.warning("Merging bill %s" % bill.number)
                    bill = mergebill
                    bis.bill = bill
        except Bill.DoesNotExist:
            # Nothing to merge
            pass

    _update(bill, 'name_fr', lbill.xpath('BillTitle/Title[@language="fr"]')[0].text)
    _update(bill, 'short_title_en', lbill.xpath('ShortTitle/Title[@language="en"]')[0].text)
    _update(bill, 'short_title_fr', lbill.xpath('ShortTitle/Title[@language="fr"]')[0].text)

    if not bis.sponsor_politician and bill.number[0] == 'C' and lbill.xpath('SponsorAffiliation/@id'):
        # We don't deal with Senate sponsors yet
        pol_id = int(lbill.xpath('SponsorAffiliation/@id')[0])
        try:
            bis.sponsor_politician = Politician.objects.get_by_parl_id(pol_id)
        except Politician.DoesNotExist:
            logger.error("Couldn't find sponsor politician for bill %s, pol ID %s" % (bill.number, pol_id))
        bis._changed = True
        try:
            bis.sponsor_member = ElectedMember.objects.get_by_pol(politician=bis.sponsor_politician,
                                                                   session=session)
        except Exception:
            logger.error("Couldn't find ElectedMember for bill %s, pol %r" %
                         (bill.number, bis.sponsor_politician))
        if not bill.sponsor_politician:
            bill.sponsor_politician = bis.sponsor_politician
            bill.sponsor_member = bis.sponsor_member
            bill._changed = True

    _update(bis, 'introduced', _parse_date(lbill.xpath('BillIntroducedDate')[0].text))
    if not bill.introduced:
        bill.introduced = bis.introduced

    try:
        status_code = lbill.xpath('Events')[0].get('laagCurrentStage')
        if status_code == '':
            status_code = 'Introduced'
        _update(bill, 'status_code', status_code)
        if status_code not in Bill.STATUS_CODES:
            logger.error("Unknown bill status code %s" % status_code)
        #_update(bill, 'status_date', _parse_date(
        #    lbill.xpath('Events/LastMajorStageEvent/Event/@date')[0]))
        status_dates = [_parse_date(d) for d in lbill.xpath('Events/LegislativeEvents/Event/@date')]
        _update(bill, 'status_date', max(status_dates))
    except IndexError:
        # Some older bills don't have status information
        pass

    try:
        _update(bill, 'text_docid', int(
            lbill.xpath('Publications/Publication/@id')[-1]))
    except IndexError:
        pass

    _update(bis, 'legisinfo_id', int(lbill.get('id')))

    if getattr(bill, '_changed', False):
        bill.save()
    if getattr(bis, '_changed', False):
        bis.bill = bis.bill # bizarrely, the django orm makes you do this
        bis.save()        

    for levent in lbill.xpath('Events/LegislativeEvents/Event'):
        source_id = int(levent.get('id'))
        if BillEvent.objects.filter(source_id=source_id).exists():
            continue

        try:
            status_en = levent.xpath('Status/Title[@language="en"]/text()')[0]
            status_fr = levent.xpath('Status/Title[@language="fr"]/text()')[0]
        except IndexError:
            logger.debug("No status present in billevent: %s", etree.tostring(levent))
            continue

        event = BillEvent(
            source_id=source_id,
            bis=bis,
            date=_parse_date(levent.get('date')),
            institution='S' if levent.get('chamber') == 'SEN' else 'C',
            status_en=status_en,
            status_fr=status_fr
        )

        if event.institution == 'C':
            hansard_num = levent.get('meetingNumber')
            try:
                event.debate = Document.debates.get(session=bis.session, number=hansard_num)
            except Document.DoesNotExist:
                logger.info(u"Could not associate BillEvent for %s with Hansard#%s" % (bill, hansard_num))
                continue

            for lcommittee in levent.xpath('Committee'):
                acronym = lcommittee.get('accronym')
                if acronym and acronym != 'WHOL':
                    event.save()
                    try:
                        committee = Committee.objects.get_by_acronym(acronym, bis.session)
                        for number in lcommittee.xpath('CommitteeMeetings/CommitteeMeeting/@number'):
                            event.committee_meetings.add(
                                CommitteeMeeting.objects.get(committee=committee, number=int(number), session=bis.session)
                            )
                    except ObjectDoesNotExist:
                        logger.exception("Could not import committee meetings: %s" % etree.tostring(lcommittee))
                        continue
        event.save()

    if getattr(bill, '_newbill', False) and not session.end:
        bill.save_sponsor_activity()

    if bill.text_docid and not BillText.objects.filter(docid=bill.text_docid).exists():
        try:
            BillText.objects.create(
                bill=bill,
                docid=bill.text_docid,
                text_en=get_plain_bill_text(bill)
            )
            bill.save()  # to trigger search indexing
        except CannotScrapeException:
            logger.warning(u"Could not get bill text for %s" % bill)

    return bill