Example #1
0
def person_cosponsors(request, pk):
    # Load the cosponsors.
    from bill.models import Cosponsor
    person = get_object_or_404(Person, pk=pk)
    cosponsors = Cosponsor.objects.filter(bill__sponsor=person, withdrawn=None)\
       .prefetch_related("bill", "bill__terms", "person", "person__roles")

    # Pre-fetch all of the top-terms.
    from bill.models import BillTerm
    top_terms = set(BillTerm.get_top_term_ids())

    # Aggregate.
    total = 0
    from collections import defaultdict
    ret = defaultdict(lambda: {
        "total": 0,
        "by_issue": defaultdict(lambda: 0),
    })
    for cosp in cosponsors:
        total += 1
        ret[cosp.person]["total"] += 1
        for t in cosp.bill.terms.all():
            if t.id in top_terms:
                ret[cosp.person]["by_issue"][t] += 1
        if "first_date" not in ret[
                cosp.person] or cosp.joined < ret[cosp.person]["first_date"]:
            ret[cosp.person]["first_date"] = cosp.joined
        if "last_date" not in ret[
                cosp.person] or cosp.joined > ret[cosp.person]["last_date"]:
            ret[cosp.person]["last_date"] = cosp.joined

    # Sort.
    for info in ret.values():
        info['by_issue'] = sorted(info['by_issue'].items(),
                                  key=lambda kv: kv[1],
                                  reverse=True)
    ret = sorted(ret.items(),
                 key=lambda kv:
                 (kv[1]['total'], kv[1]['last_date'], kv[0].sortname),
                 reverse=True)

    # Total bills, date range.
    from bill.models import Bill
    total_bills = Bill.objects.filter(sponsor=person).count()
    date_range = (None, None)
    if len(ret) > 0:
        date_range = (min(r["first_date"] for p, r in ret),
                      max(r["last_date"] for p, r in ret))

    return {
        "person": person,
        "cosponsors": ret,
        "total": total,
        "total_bills": total_bills,
        "date_range": date_range,
    }
Example #2
0
def person_cosponsors(request, pk):
    # Load the cosponsors.
    from bill.models import Cosponsor
    person = get_object_or_404(Person, pk=pk)
    cosponsors = Cosponsor.objects.filter(bill__sponsor=person, withdrawn=None)\
       .prefetch_related("bill", "bill__terms", "person", "person__roles")

    # Pre-fetch all of the top-terms.
    from bill.models import BillTerm
    top_terms = set(BillTerm.get_top_term_ids())

    # Aggregate.
    total = 0
    from collections import defaultdict
    ret = defaultdict(lambda : {
        "total": 0,
        "by_issue": defaultdict(lambda : 0),
    })
    for cosp in cosponsors:
        total += 1
        ret[cosp.person]["total"] += 1
        for t in cosp.bill.terms.all():
           if t.id in top_terms:
               ret[cosp.person]["by_issue"][t] += 1
        if "first_date" not in ret[cosp.person] or cosp.joined < ret[cosp.person]["first_date"]: ret[cosp.person]["first_date"] = cosp.joined
        if "last_date" not in ret[cosp.person] or cosp.joined > ret[cosp.person]["last_date"]: ret[cosp.person]["last_date"] = cosp.joined

    # Sort.
    for info in ret.values():
        info['by_issue'] = sorted(info['by_issue'].items(), key = lambda kv : kv[1], reverse=True)
    ret = sorted(ret.items(), key = lambda kv : (kv[1]['total'], kv[1]['last_date'], kv[0].sortname), reverse=True)

    # Total bills, date range.
    from bill.models import Bill
    total_bills = Bill.objects.filter(sponsor=person).count()
    date_range = (None, None)
    if len(ret) > 0:
        date_range = (min(r["first_date"] for p, r in ret), max(r["last_date"] for p, r in ret))

    return {
        "person": person,
        "cosponsors": ret,
        "total": total,
        "total_bills": total_bills,
        "date_range": date_range,
    }
def main(options):
    """
    Process bill terms and bills
    """

    # Terms

    term_processor = TermProcessor()
    terms_parsed = set()

    # Cache existing terms. There aren't so many.
    existing_terms = {}
    for term in BillTerm.objects.all():
        existing_terms[(int(term.term_type), term.name)] = term

    log.info('Processing old bill terms')
    TERMS_FILE = 'bill/liv.xml'
    tree = etree.parse(TERMS_FILE)
    for node in tree.xpath('/liv/top-term'):
        term = term_processor.process(BillTerm(), node)
        term.term_type = TermType.old
        try:
            # No need to update an existing term because there are no other attributes.
            term = existing_terms[(int(term.term_type), term.name)]
            terms_parsed.add(term.id)
        except:
            log.debug("Created %s" % term)
            term.save()
            term.subterms.clear()

        for subnode in node.xpath('./term'):
            subterm = term_processor.process(BillTerm(), subnode)
            subterm.term_type = TermType.old
            try:
                # No need to update an existing term because there are no other attributes.
                subterm = existing_terms[(int(subterm.term_type),
                                          subterm.name)]
                term.subterms.add(subterm)
                terms_parsed.add(subterm.id)
            except:
                try:
                    log.debug("Created %s" % subterm)
                    subterm.save()
                    term.subterms.add(subterm)

                    existing_terms[(int(subterm.term_type),
                                    subterm.name)] = subterm
                    terms_parsed.add(subterm.id)
                except IntegrityError:
                    log.error('Duplicated term %s' %
                              term_processor.display_node(subnode))

    log.info('Processing new bill terms')
    for FILE in ('bill/liv111.xml', 'bill/crsnet.xml'):
        tree = etree.parse(FILE)
        for node in tree.xpath('/liv/top-term'):
            term = term_processor.process(BillTerm(), node)
            term.term_type = TermType.new
            try:
                # No need to update an existing term because there are no other attributes.
                term = existing_terms[(int(term.term_type), term.name)]
                terms_parsed.add(term.id)
            except:
                log.debug("Created %s" % term)
                term.save()
                term.subterms.clear()

            for subnode in node.xpath('./term'):
                subterm = term_processor.process(BillTerm(), subnode)
                subterm.term_type = TermType.new
                try:
                    # No need to update an existing term because there are no other attributes.
                    subterm = existing_terms[(int(subterm.term_type),
                                              subterm.name)]
                    terms_parsed.add(subterm.id)
                    term.subterms.add(subterm)
                except:
                    try:
                        log.debug("Created %s" % term)
                        subterm.save()
                        term.subterms.add(subterm)

                        existing_terms[(int(subterm.term_type),
                                        subterm.name)] = subterm
                        terms_parsed.add(subterm.id)
                    except IntegrityError:
                        log.error('Duplicated term %s' %
                                  term_processor.display_node(subnode))

    for term in existing_terms.values():
        if not term.id in terms_parsed:
            log.debug("Deleted %s" % term)
            term.delete()

    # Bills

    bill_index = None
    if not options.disable_indexing:
        from bill.search_indexes import BillIndex
        bill_index = BillIndex()

    if options.congress:
        files = glob.glob(settings.CONGRESS_DATA_PATH +
                          '/%s/bills/*/*/data.xml' % options.congress)
        log.info('Parsing unitedstates/congress bills of only congress#%s' %
                 options.congress)
    else:
        files = glob.glob(settings.CONGRESS_DATA_PATH +
                          '/*/bills/*/*/data.xml')

    if options.filter:
        files = [f for f in files if re.match(options.filter, f)]

    log.info('Processing bills: %d files' % len(files))
    total = len(files)
    progress = Progress(total=total, name='files', step=100)

    bill_processor = BillProcessor()
    seen_bill_ids = []
    for fname in files:
        progress.tick()

        # With indexing or events enabled, if the bill metadata file hasn't changed check
        # the bill's latest text file for changes so we can create a text-is-available
        # event and so we can index the bill's text.
        if (not options.congress or int(options.congress) > 42) and (
                bill_index and not options.disable_events
        ) and not File.objects.is_changed(fname) and not options.force:
            m = re.match(
                re.escape(settings.CONGRESS_DATA_PATH) +
                r'/(?P<congress>\d+)/bills/(?P<bill_type>[a-z]+)/(?P<bill_type_2>[a-z]+)(?P<number>\d+)/data.xml',
                fname)

            try:
                b = Bill.objects.get(congress=int(m.group("congress")),
                                     bill_type=BillType.by_slug(
                                         m.group("bill_type")),
                                     number=m.group("number"))
                seen_bill_ids.append(b.id)

                # Update the index/events for any bill with recently changed text
                textfile = get_bill_text_metadata(b, None)
                if not textfile:
                    if b.congress >= 103 and b.introduced_date < (
                            datetime.now() - timedelta(days=14)).date():
                        print("No bill text?", fname, b.introduced_date)
                    continue
                textfile = textfile["text_file"]
                if os.path.exists(textfile) and File.objects.is_changed(
                        textfile):
                    b.update_index(bill_index)  # index the full text
                    b.create_events()  # events for new bill text documents
                    File.objects.save_file(textfile)

                continue
            except Bill.DoesNotExist:
                print("Unchanged metadata file but bill doesn't exist:", fname)
                pass  # just parse as normal

        if options.slow:
            time.sleep(1)

        tree = etree.parse(fname)
        for node in tree.xpath('/bill'):
            try:
                bill = bill_processor.process(Bill(), node)
            except:
                print(fname)
                raise

            seen_bill_ids.append(bill.id)  # don't delete me later

            # So far this is just for American Memory bills.
            if node.xpath("string(source/@url)"):
                bill.source_link = str(node.xpath("string(source/@url)"))
            else:
                bill.source_link = None

            actions = []
            for axn in tree.xpath("actions/*[@state]"):
                if axn.xpath("string(@state)") == "REFERRED":
                    continue  # we don't track this state
                actions.append((
                    repr(
                        bill_processor.parse_datetime(
                            axn.xpath("string(@datetime)"))),
                    BillStatus.by_xml_code(axn.xpath("string(@state)")),
                    axn.xpath("string(text)"),
                    etree.tostring(axn, encoding=str),
                ))

            bill.sliplawpubpriv = None
            bill.sliplawnum = None
            for axn in tree.xpath("actions/enacted"):
                bill.sliplawpubpriv = "PUB" if axn.get(
                    "type") == "public" else "PRI"
                bill.sliplawnum = int(axn.get("number").split("-")[1])

            bill.major_actions = actions
            try:
                bill.save()
            except:
                print(bill)
                raise

            if bill_index:
                bill.update_index(bill_index)

            if not options.disable_events:
                bill.create_events()

        File.objects.save_file(fname)

    # delete bill objects that are no longer represented on disk.... this is too dangerous.
    if options.congress and not options.filter:
        # this doesn't work because seen_bill_ids is too big for sqlite!
        for b in Bill.objects.filter(congress=options.congress).exclude(
                id__in=seen_bill_ids):
            print("Bill is no longer on disk: ", b.id, b)

    # The rest is for current only...

    if options.congress and int(options.congress) != settings.CURRENT_CONGRESS:
        return

    # Find what might be coming up this week.
    load_docs_house_gov(options, bill_index)
    load_senate_floor_schedule(options, bill_index)
Example #4
0
    def build_info():
        global pronunciation_guide

        if re.match(r"\d", pk):
            person = get_object_or_404(Person, pk=pk)
        else:
            # support bioguide IDs for me
            person = get_object_or_404(Person, bioguideid=pk)

        # current role
        role = person.get_current_role()
        if role:
            active_role = True
        else:
            active_role = False
            try:
                role = person.roles.order_by('-enddate')[0]
            except IndexError:
                role = None

        # photo
        photo_url, photo_credit = person.get_photo()

        # analysis
        analysis_data = analysis.load_data(person)
        try:
            # Get session stats for the previous year.
            has_session_stats = person.get_session_stats(
                str(datetime.now().year - 1))
        except:
            # Not everyone has current stats, obviously. They may have stats
            # corresponding to their most recent role. Since stats are a
            # session behind, even-year stats might not correspond to
            # a legislator's most recent role, which is why I hard-coded
            # the current session stats above.
            has_session_stats = False
            if role:
                try:
                    has_session_stats = role.get_most_recent_session_stats()
                except:
                    pass

        links = []
        if role.current:
            if role.website:
                links.append(("%s's Official Website" % person.lastname,
                              role.website, "fa fa-external-link"))
            if person.twitterid:
                links.append(("@" + person.twitterid,
                              "http://twitter.com/" + person.twitterid,
                              "fa fa-twitter"))
        if person.osid:
            links.append(
                ("OpenSecrets",
                 "http://www.opensecrets.org/politicians/summary.php?cid=" +
                 person.osid, "fa fa-money"))
        if person.pvsid:
            links.append(
                ("VoteSmart", "http://votesmart.org/candidate/" + person.pvsid,
                 "fa fa-th-list"))
        if person.bioguideid:
            links.append(
                ("Bioguide",
                 "http://bioguide.congress.gov/scripts/biodisplay.pl?index=" +
                 person.bioguideid, "fa fa-user"))
        if person.cspanid:
            links.append(("C-SPAN", "http://www.c-spanvideo.org/person/" +
                          str(person.cspanid), "fa fa-youtube-play"))

        # Get a break down of the top terms this person's sponsored bills fall into,
        # looking only at the most recent five years of bills.
        from bill.models import BillTerm
        most_recent_bill = person.sponsored_bills.order_by(
            "-introduced_date").first()
        bills_by_subject_counts = list(person.sponsored_bills.filter(
            terms__id__in=BillTerm.get_top_term_ids(),
            introduced_date__gt=(most_recent_bill.introduced_date if most_recent_bill else datetime.now())-timedelta(days=5*365.25))\
            .values("terms")\
            .annotate(count=Count('id')).order_by('-count')\
            .filter(count__gt=1)\
            [0:8])
        terms = BillTerm.objects.in_bulk(item["terms"]
                                         for item in bills_by_subject_counts)
        total_count = sum(item["count"] for item in bills_by_subject_counts)
        while len(bills_by_subject_counts) > 2 and bills_by_subject_counts[-1][
                "count"] < bills_by_subject_counts[0]["count"] / 8:
            bills_by_subject_counts.pop(-1)
        for item in bills_by_subject_counts:
            item["term"] = terms[item["terms"]]
            item["pct"] = int(round(float(item["count"]) / total_count * 100))
            del item["terms"]

        # Missed vote explanations from ProPublica
        try:
            vote_explanations = http_rest_json(
                "https://projects.propublica.org/explanations/api/members/%s.json"
                % person.bioguideid)
        except:
            # squash all errors
            vote_explanations = {}

        # Misconduct - load and filter this person's entries, keeping original order.
        # Choose 'Alleged misconduct', 'Misconduct', 'Misconduct/alleged misconduct' as appropriate.
        from website.views import load_misconduct_data
        misconduct = [
            m for m in load_misconduct_data() if m["person"] == person
        ]
        misconduct_any_alleged = (len([m for m in misconduct if m["alleged"]])
                                  > 0)
        misconduct_any_not_alleged = (len(
            [m for m in misconduct if not m["alleged"]]) > 0)

        # Load pronunciation from guide. Turn into a mapping from GovTrack IDs to data.
        if pronunciation_guide is None:
            import rtyaml
            if not hasattr(settings, 'PRONUNCIATION_DATABASE_PATH'):
                # debugging
                pronunciation_guide = {}
            else:
                pronunciation_guide = {
                    p["id"]["govtrack"]: p
                    for p in rtyaml.load(
                        open(settings.PRONUNCIATION_DATABASE_PATH))
                }

        # Get this person's entry.
        pronunciation = pronunciation_guide.get(person.id)
        # TODO: Validate that the 'name' in the guide matches the name we're actually displaying.
        if pronunciation and not pronunciation.get("key"):
            # Show a key to the letters used in the pronunciation guide. Break apart the name
            # into words which we'll show in columns.
            pronunciation["key"] = []
            for namepart in pronunciation["respell"].split(" // "):
                for nameword in namepart.split(" "):
                    # Parse out the symbols actually used in the guide. Sweep from left to right chopping
                    # off valid respelling letter combinations, chopping off the longest one where possible.
                    pronunciation["key"].append([])
                    i = 0
                    while i < len(nameword):
                        for s in sorted(pronunciation_guide_key,
                                        key=lambda s: -len(s)):
                            if nameword[i:i + len(s)] in (s, s.upper()):
                                pronunciation["key"][-1].append(
                                    (nameword[i:i + len(s)],
                                     pronunciation_guide_key[s]))
                                i += len(s)
                                break
                        else:
                            # respelling did not match any valid symbol, should be an error but we don't
                            # want to issue an Oops! for this
                            break

        # Get their enacted bills.
        enacted_bills_src_qs = person.sponsored_bills.exclude(
            original_intent_replaced=True).order_by('-current_status_date')

        return {
            'person':
            person,
            'role':
            role,
            'active_role':
            active_role,
            'active_congressional_role':
            active_role
            and role.role_type in (RoleType.senator, RoleType.representative),
            'pronunciation':
            pronunciation,
            'photo':
            photo_url,
            'photo_credit':
            photo_credit,
            'links':
            links,
            'analysis_data':
            analysis_data,
            'enacted_bills': [
                b for b in enacted_bills_src_qs if b.was_enacted_ex(
                    cache_related_bills_qs=enacted_bills_src_qs)
            ],
            'recent_bills':
            person.sponsored_bills.all().order_by('-introduced_date')[0:7],
            'committeeassignments':
            get_committee_assignments(person),
            'feed':
            person.get_feed(),
            'has_session_stats':
            has_session_stats,
            'bill_subject_areas':
            bills_by_subject_counts,
            'vote_explanations':
            vote_explanations,
            'key_votes':
            load_key_votes(person),
            'misconduct':
            misconduct,
            'misconduct_any_alleged':
            misconduct_any_alleged,
            'misconduct_any_not_alleged':
            misconduct_any_not_alleged,
        }
Example #5
0
def main(options):
    """
    Process bill terms and bills
    """

    # Terms

    term_processor = TermProcessor()
    terms_parsed = set()

    # Cache existing terms. There aren't so many.
    existing_terms = {}
    for term in BillTerm.objects.all():
        existing_terms[(int(term.term_type), term.name)] = term

    log.info('Processing old bill terms')
    TERMS_FILE = 'data/us/liv.xml'
    tree = etree.parse(TERMS_FILE)
    for node in tree.xpath('/liv/top-term'):
        term = term_processor.process(BillTerm(), node)
        term.term_type = TermType.old
        try:
            # No need to update an existing term because there are no other attributes.
            term = existing_terms[(int(term.term_type), term.name)]
            terms_parsed.add(term.id)
        except:
            log.debug("Created %s" % term)
            term.save()
            term.subterms.clear()

        for subnode in node.xpath('./term'):
            subterm = term_processor.process(BillTerm(), subnode)
            subterm.term_type = TermType.old
            try:
                # No need to update an existing term because there are no other attributes.
                subterm = existing_terms[(int(subterm.term_type),
                                          subterm.name)]
                term.subterms.add(subterm)
                terms_parsed.add(subterm.id)
            except:
                try:
                    log.debug("Created %s" % subterm)
                    subterm.save()
                    term.subterms.add(subterm)

                    existing_terms[(int(subterm.term_type),
                                    subterm.name)] = subterm
                    terms_parsed.add(subterm.id)
                except IntegrityError:
                    log.error('Duplicated term %s' %
                              term_processor.display_node(subnode))

    log.info('Processing new bill terms')
    for FILE in ('data/us/liv111.xml', 'data/us/crsnet.xml'):
        tree = etree.parse(FILE)
        for node in tree.xpath('/liv/top-term'):
            term = term_processor.process(BillTerm(), node)
            term.term_type = TermType.new
            try:
                # No need to update an existing term because there are no other attributes.
                term = existing_terms[(int(term.term_type), term.name)]
                terms_parsed.add(term.id)
            except:
                log.debug("Created %s" % term)
                term.save()
                term.subterms.clear()

            for subnode in node.xpath('./term'):
                subterm = term_processor.process(BillTerm(), subnode)
                subterm.term_type = TermType.new
                try:
                    # No need to update an existing term because there are no other attributes.
                    subterm = existing_terms[(int(subterm.term_type),
                                              subterm.name)]
                    terms_parsed.add(subterm.id)
                    term.subterms.add(subterm)
                except:
                    try:
                        log.debug("Created %s" % term)
                        subterm.save()
                        term.subterms.add(subterm)

                        existing_terms[(int(subterm.term_type),
                                        subterm.name)] = subterm
                        terms_parsed.add(subterm.id)
                    except IntegrityError:
                        log.error('Duplicated term %s' %
                                  term_processor.display_node(subnode))

    for term in existing_terms.values():
        if not term.id in terms_parsed:
            log.debug("Deleted %s" % term)
            term.delete()

    # Bills

    bill_index = None
    if not options.disable_indexing:
        from bill.search_indexes import BillIndex
        bill_index = BillIndex()

    if options.congress and int(options.congress) <= 42:
        files = glob.glob('data/congress/%s/bills/*/*/*.xml' %
                          options.congress)
        log.info('Parsing unitedstates/congress bills of only congress#%s' %
                 options.congress)
    elif options.congress:
        files = glob.glob('data/us/%s/bills/*.xml' % options.congress)
        log.info('Parsing bills of only congress#%s' % options.congress)
    else:
        files = glob.glob('data/us/*/bills/*.xml')

    if options.filter:
        files = [f for f in files if re.match(options.filter, f)]

    log.info('Processing bills: %d files' % len(files))
    total = len(files)
    progress = Progress(total=total, name='files', step=100)

    bill_processor = BillProcessor()
    seen_bill_ids = []
    for fname in files:
        progress.tick()

        # With indexing or events enabled, if the bill metadata file hasn't changed check
        # the bill's latest text file for changes so we can create a text-is-available
        # event and so we can index the bill's text.
        if (not options.congress or options.congress > 42) and (
                bill_index and not options.disable_events
        ) and not File.objects.is_changed(fname) and not options.force:
            m = re.search(r"/(\d+)/bills/([a-z]+)(\d+)\.xml$", fname)

            try:
                b = Bill.objects.get(congress=m.group(1),
                                     bill_type=BillType.by_xml_code(
                                         m.group(2)),
                                     number=m.group(3))
                seen_bill_ids.append(b.id)

                # Update the index/events for any bill with recently changed text
                textfile = get_bill_text_metadata(b, None)
                if not textfile:
                    if b.congress >= 103 and b.introduced_date < (
                            datetime.now() - timedelta(days=14)).date():
                        print "No bill text?", fname, b.introduced_date
                    continue
                textfile = textfile["text_file"]
                if os.path.exists(textfile) and File.objects.is_changed(
                        textfile):
                    bill_index.update_object(
                        b, using="bill")  # index the full text
                    b.create_events()  # events for new bill text documents
                    File.objects.save_file(textfile)

                continue
            except Bill.DoesNotExist:
                print "Unchanged metadata file but bill doesn't exist:", fname
                pass  # just parse as normal

        if options.slow:
            time.sleep(1)

        tree = etree.parse(fname)
        for node in tree.xpath('/bill'):
            try:
                bill = bill_processor.process(Bill(), node)
            except:
                print fname
                raise

            seen_bill_ids.append(bill.id)  # don't delete me later

            if bill.congress >= 93:
                bill.source = "thomas-congproj"
            elif bill.congress >= 82:
                bill.source = "statutesatlarge"
                if bill.current_status == BillStatus.enacted_signed:
                    bill.current_status = BillStatus.enacted_unknown
            elif bill.congress <= 42:
                bill.source = "americanmemory"
            else:
                raise ValueError()

            # So far this is just for American Memory bills.
            if node.xpath("string(source/@url)"):
                bill.source_link = unicode(node.xpath("string(source/@url)"))
            else:
                bill.source_link = None

            actions = []
            for axn in tree.xpath("actions/*[@state]"):
                actions.append((
                    repr(
                        bill_processor.parse_datetime(
                            axn.xpath("string(@datetime)"))),
                    BillStatus.by_xml_code(axn.xpath("string(@state)")),
                    axn.xpath("string(text)"),
                    etree.tostring(axn),
                ))

            bill.sliplawpubpriv = None
            bill.sliplawnum = None
            for axn in tree.xpath("actions/enacted"):
                bill.sliplawpubpriv = "PUB" if axn.get(
                    "type") == "public" else "PRI"
                bill.sliplawnum = int(axn.get("number").split("-")[1])

            bill.major_actions = actions
            try:
                bill.save()
            except:
                print bill
                raise
            if bill_index: bill_index.update_object(bill, using="bill")

            if not options.disable_events:
                bill.create_events()

        File.objects.save_file(fname)

    # delete bill objects that are no longer represented on disk.... this is too dangerous.
    if options.congress and not options.filter:
        # this doesn't work because seen_bill_ids is too big for sqlite!
        for b in Bill.objects.filter(congress=options.congress).exclude(
                id__in=seen_bill_ids):
            print "Bill is no longer on disk: ", b.id, b

    # The rest is for current only...

    if options.congress and int(options.congress) != CURRENT_CONGRESS:
        return

    # Parse docs.house.gov for what might be coming up this week.
    import iso8601
    dhg_html = urllib.urlopen("http://docs.house.gov/floor/").read()
    m = re.search(r"class=\"downloadXML\" href=\"(Download.aspx\?file=.*?)\"",
                  dhg_html)
    if not m:
        log.error(
            'No docs.house.gov download link found at http://docs.house.gov.')
    else:

        def bt_re(bt):
            return re.escape(bt[1]).replace(r"\.", r"\.?\s*")

        try:
            dhg = etree.parse(
                urllib.urlopen("http://docs.house.gov/floor/" +
                               m.group(1))).getroot()
        except:
            print "http://docs.house.gov/floor/" + m.group(1)
            raise
        # iso8601.parse_date(dhg.get("week-date")+"T00:00:00").date()
        for item in dhg.xpath("category/floor-items/floor-item"):
            billname = item.xpath("legis-num")[0].text
            if billname is None: continue  # weird but OK
            m = re.match(
                r"\s*(?:Concur in the Senate Amendment to |Senate Amendment to )?("
                + "|".join(bt_re(bt) for bt in BillType) +
                r")(\d+)\s*(\[Conference Report\]\s*)?$", billname, re.I)
            if not m:
                if not billname.strip().endswith(" __"):
                    log.error(
                        'Could not parse legis-num "%s" in docs.house.gov.' %
                        billname)
            else:
                for bt in BillType:
                    if re.match(bt_re(bt) + "$", m.group(1), re.I):
                        try:
                            bill = Bill.objects.get(congress=CURRENT_CONGRESS,
                                                    bill_type=bt[0],
                                                    number=m.group(2))
                            bill.docs_house_gov_postdate = iso8601.parse_date(
                                item.get("add-date")).replace(tzinfo=None)
                            bill.save()
                            if bill_index:
                                bill_index.update_object(bill, using="bill")

                            if not options.disable_events:
                                bill.create_events()
                        except Bill.DoesNotExist:
                            log.error(
                                'Could not find bill "%s" in docs.house.gov.' %
                                billname)
                        break
                else:
                    log.error(
                        'Could not parse legis-num bill type "%s" in docs.house.gov.'
                        % m.group(1))

    # Parse Senate.gov's "Floor Schedule" blurb for coming up tomorrow.
    now = datetime.now()
    sfs = urllib.urlopen(
        "http://www.senate.gov/pagelayout/legislative/d_three_sections_with_teasers/calendars.htm"
    ).read()
    try:
        sfs = re.search(r"Floor Schedule([\w\W]*)Previous Meeting",
                        sfs).group(1)
        for congress, bill_type, number in re.findall(
                r"http://hdl.loc.gov/loc.uscongress/legislation.(\d+)([a-z]+)(\d+)",
                sfs):
            bill_type = BillType.by_slug(bill_type)
            bill = Bill.objects.get(congress=congress,
                                    bill_type=bill_type,
                                    number=number)
            if bill.senate_floor_schedule_postdate == None or now - bill.senate_floor_schedule_postdate > timedelta(
                    days=7):
                bill.senate_floor_schedule_postdate = now
                bill.save()
                if bill_index: bill_index.update_object(bill, using="bill")
                if not options.disable_events:
                    bill.create_events()
    except Exception as e:
        log.error('Could not parse Senate Floor Schedule: ' + repr(e))
Example #6
0
    def build_info():
        if re.match(r"\d", pk):
            person = get_object_or_404(Person, pk=pk)
        else:
            # support bioguide IDs for me
            person = get_object_or_404(Person, bioguideid=pk)
        
        # current role
        role = person.get_current_role()
        if role:
            active_role = True
        else:
            active_role = False
            try:
                role = person.roles.order_by('-enddate')[0]
            except IndexError:
                role = None
    
        # photo
        photo_url, photo_credit = person.get_photo()
    
        # analysis
        analysis_data = analysis.load_data(person)
        try:
            has_session_stats = person.get_session_stats('2016')
        except:
            # Not everyone has current stats, obviously. They may have stats
            # corresponding to their most recent role. Since stats are a
            # session behind, even-year stats might not correspond to
            # a legislator's most recent role, which is why I hard-coded
            # the current session stats above.
            has_session_stats = False
            if role:
                try:
                    has_session_stats = role.get_most_recent_session_stats()
                except:
                    pass
        
        links = []
        if role.current:
            if role.website: links.append(("%s's Official Website" % person.lastname, role.website, "fa  fa-external-link"))
            if person.twitterid: links.append(("@" + person.twitterid, "http://twitter.com/" + person.twitterid, "fa fa-twitter"))
        if person.osid: links.append(("OpenSecrets", "http://www.opensecrets.org/politicians/summary.php?cid=" + person.osid, "fa fa-money"))
        if person.pvsid: links.append(("VoteSmart", "http://votesmart.org/candidate/" + person.pvsid, "fa fa-th-list"))
        if person.bioguideid: links.append(("Bioguide", "http://bioguide.congress.gov/scripts/biodisplay.pl?index=" + person.bioguideid, "fa fa-user"))
        if person.cspanid: links.append(("C-SPAN", "http://www.c-spanvideo.org/person/" + str(person.cspanid), "fa fa-youtube-play"))

        # Get a break down of the top terms this person's sponsored bills fall into,
        # looking only at the most recent five years of bills.
        from bill.models import BillTerm
        from datetime import datetime, timedelta
        most_recent_bill = person.sponsored_bills.order_by("-introduced_date").first()
        bills_by_subject_counts = list(person.sponsored_bills.filter(
            terms__id__in=BillTerm.get_top_term_ids(),
            introduced_date__gt=(most_recent_bill.introduced_date if most_recent_bill else datetime.now())-timedelta(days=5*365.25))\
            .values("terms")\
            .annotate(count=Count('id')).order_by('-count')\
            .filter(count__gt=1)\
            [0:8])
        terms = BillTerm.objects.in_bulk(item["terms"] for item in bills_by_subject_counts)
        total_count = sum(item["count"] for item in bills_by_subject_counts)
        while len(bills_by_subject_counts) > 2 and bills_by_subject_counts[-1]["count"] < bills_by_subject_counts[0]["count"]/8: bills_by_subject_counts.pop(-1)
        for item in bills_by_subject_counts:
            item["term"] = terms[item["terms"]]
            item["pct"] = int(round(float(item["count"]) / total_count * 100))
            del item["terms"]

        # Missed vote explanations from ProPublica
        try:
            vote_explanations = http_rest_json("https://projects.propublica.org/explanations/api/members/%s.json" % person.bioguideid)
        except: 
            # squash all errors
            vote_explanations = { }

        return {'person': person,
                'role': role,
                'active_role': active_role,
                'active_congressional_role': active_role and role.role_type in (RoleType.senator, RoleType.representative),
                'photo': photo_url,
                'photo_credit': photo_credit,
                'links': links,
                'analysis_data': analysis_data,
                'enacted_bills': [b for b in person.sponsored_bills.order_by('-current_status_date') if b.was_enacted_ex()],
                'recent_bills': person.sponsored_bills.all().order_by('-introduced_date')[0:7],
                'committeeassignments': get_committee_assignments(person),
                'feed': person.get_feed(),
                'has_session_stats': has_session_stats,
                'bill_subject_areas': bills_by_subject_counts,
                'vote_explanations': vote_explanations,
                'key_votes': load_key_votes(person),
                }
Example #7
0
    def build_info():
        if re.match(r"\d", pk):
            person = get_object_or_404(Person, pk=pk)
        else:
            # support bioguide IDs for me
            person = get_object_or_404(Person, bioguideid=pk)
        
        # current role
        role = person.get_current_role()
        if role:
            active_role = True
        else:
            active_role = False
            try:
                role = person.roles.order_by('-enddate')[0]
            except IndexError:
                role = None
    
        # photo
        photo_url, photo_credit = person.get_photo()
    
        # analysis
        analysis_data = analysis.load_data(person)
        try:
            has_session_stats = person.get_session_stats('2015')
        except:
            # Not everyone has 2014 stats, obviously. They may have stats
            # corresponding to their most recent role. Since stats are a
            # session behind, even-year stats might not correspond to
            # a legislator's most recent role, which is why I hard-coded
            # the current session stats above.
            has_session_stats = False
            if role:
                try:
                    has_session_stats = role.get_most_recent_session_stats()
                except:
                    pass
        
        links = []
        if role.current:
            if role.website: links.append(("%s's Official Website" % person.lastname, role.website, "fa  fa-external-link"))
            if person.twitterid: links.append(("@" + person.twitterid, "http://twitter.com/" + person.twitterid, "fa fa-twitter"))
        if person.osid: links.append(("OpenSecrets", "http://www.opensecrets.org/politicians/summary.php?cid=" + person.osid, "fa fa-money"))
        if person.pvsid: links.append(("VoteSmart", "http://votesmart.org/candidate/" + person.pvsid, "fa fa-th-list"))
        if person.bioguideid: links.append(("Bioguide", "http://bioguide.congress.gov/scripts/biodisplay.pl?index=" + person.bioguideid, "fa fa-user"))
        if person.cspanid: links.append(("C-SPAN", "http://www.c-spanvideo.org/person/" + str(person.cspanid), "fa fa-youtube-play"))

        # Get a break down of the top terms this person's sponsored bills fall into,
        # looking only at the most recent five years of bills.
        from bill.models import BillTerm
        from datetime import datetime, timedelta
        most_recent_bill = person.sponsored_bills.order_by("-introduced_date").first()
        bills_by_subject_counts = list(person.sponsored_bills.filter(
            terms__id__in=BillTerm.get_top_term_ids(),
            introduced_date__gt=(most_recent_bill.introduced_date if most_recent_bill else datetime.now())-timedelta(days=5*365.25))\
            .values("terms")\
            .annotate(count=Count('id')).order_by('-count')\
            .filter(count__gt=1)\
            [0:8])
        terms = BillTerm.objects.in_bulk(item["terms"] for item in bills_by_subject_counts)
        total_count = sum(item["count"] for item in bills_by_subject_counts)
        while len(bills_by_subject_counts) > 2 and bills_by_subject_counts[-1]["count"] < bills_by_subject_counts[0]["count"]/8: bills_by_subject_counts.pop(-1)
        for item in bills_by_subject_counts:
            item["term"] = terms[item["terms"]]
            item["pct"] = int(round(float(item["count"]) / total_count * 100))
            del item["terms"]

        # Missed vote explanations from ProPublica
        try:
            vote_explanations = http_rest_json("https://projects.propublica.org/explanations/api/members/%s.json" % person.bioguideid)
        except: 
            # squash all errors
            vote_explanations = { }

        return {'person': person,
                'role': role,
                'active_role': active_role,
                'active_congressional_role': active_role and role.role_type in (RoleType.senator, RoleType.representative),
                'photo': photo_url,
                'photo_credit': photo_credit,
                'links': links,
                'analysis_data': analysis_data,
                'recent_bills': person.sponsored_bills.all().order_by('-introduced_date')[0:7],
                'committeeassignments': get_committee_assignments(person),
                'feed': person.get_feed(),
                'cities': get_district_cities("%s-%02d" % (role.state.lower(), role.district)) if role and role.district else None,
                'has_session_stats': has_session_stats,
                'bill_subject_areas': bills_by_subject_counts,
                'vote_explanations': vote_explanations,
                }
Example #8
0
    def build_info():
        global pronunciation_guide

        if re.match(r"\d", pk):
            person = get_object_or_404(Person, pk=pk)
        else:
            # support bioguide IDs for me
            person = get_object_or_404(Person, bioguideid=pk)
        
        # current role
        role = person.get_current_role()
        if role:
            active_role = True
        else:
            active_role = False
            try:
                role = person.roles.order_by('-enddate')[0]
            except IndexError:
                role = None
    
        # photo
        photo_url, photo_credit = person.get_photo()
    
        # analysis
        analysis_data = analysis.load_data(person)
        try:
            # Get session stats for the previous year.
            has_session_stats = person.get_session_stats(str(datetime.now().year-1))
        except:
            # Not everyone has current stats, obviously. They may have stats
            # corresponding to their most recent role. Since stats are a
            # session behind, even-year stats might not correspond to
            # a legislator's most recent role, which is why I hard-coded
            # the current session stats above.
            has_session_stats = False
            if role:
                try:
                    has_session_stats = role.get_most_recent_session_stats()
                except:
                    pass
        
        links = []
        if role.current:
            if role.website: links.append(("%s's Official Website" % person.lastname, role.website, "fa fa-external-link"))
            if person.twitterid: links.append(("@" + person.twitterid, "http://twitter.com/" + person.twitterid, "fa fa-twitter"))
        if person.osid: links.append(("OpenSecrets", "http://www.opensecrets.org/politicians/summary.php?cid=" + person.osid, "fa fa-money"))
        if person.pvsid: links.append(("VoteSmart", "http://votesmart.org/candidate/" + person.pvsid, "fa fa-th-list"))
        if person.bioguideid: links.append(("Bioguide", "http://bioguide.congress.gov/scripts/biodisplay.pl?index=" + person.bioguideid, "fa fa-user"))
        if person.cspanid: links.append(("C-SPAN", "http://www.c-spanvideo.org/person/" + str(person.cspanid), "fa fa-youtube-play"))

        # Get a break down of the top terms this person's sponsored bills fall into,
        # looking only at the most recent five years of bills.
        from bill.models import BillTerm
        most_recent_bill = person.sponsored_bills.order_by("-introduced_date").first()
        bills_by_subject_counts = list(person.sponsored_bills.filter(
            terms__id__in=BillTerm.get_top_term_ids(),
            introduced_date__gt=(most_recent_bill.introduced_date if most_recent_bill else datetime.now())-timedelta(days=5*365.25))\
            .values("terms")\
            .annotate(count=Count('id')).order_by('-count')\
            .filter(count__gt=1)\
            [0:8])
        terms = BillTerm.objects.in_bulk(item["terms"] for item in bills_by_subject_counts)
        total_count = sum(item["count"] for item in bills_by_subject_counts)
        while len(bills_by_subject_counts) > 2 and bills_by_subject_counts[-1]["count"] < bills_by_subject_counts[0]["count"]/8: bills_by_subject_counts.pop(-1)
        for item in bills_by_subject_counts:
            item["term"] = terms[item["terms"]]
            item["pct"] = int(round(float(item["count"]) / total_count * 100))
            del item["terms"]

        # Missed vote explanations from ProPublica
        try:
            vote_explanations = http_rest_json("https://projects.propublica.org/explanations/api/members/%s.json" % person.bioguideid)
        except: 
            # squash all errors
            vote_explanations = { }

        # Misconduct - load and filter this person's entries, keeping original order.
        # Choose 'Alleged misconduct', 'Misconduct', 'Misconduct/alleged misconduct' as appropriate.
        from website.views import load_misconduct_data
        misconduct = [m for m in load_misconduct_data() if m["person"] == person ]
        misconduct_any_alleged = (len([ m for m in misconduct if m["alleged"]  ]) > 0)
        misconduct_any_not_alleged = (len([ m for m in misconduct if not m["alleged"]  ]) > 0)

        # Load pronunciation from guide. Turn into a mapping from GovTrack IDs to data.
        if pronunciation_guide is None:
            import rtyaml
            if not hasattr(settings, 'PRONUNCIATION_DATABASE_PATH'):
                # debugging
                pronunciation_guide = { }
            else:
                pronunciation_guide = { p["id"]["govtrack"]: p for p in rtyaml.load(open(settings.PRONUNCIATION_DATABASE_PATH)) }

        # Get this person's entry.
        pronunciation = pronunciation_guide.get(person.id)
        # TODO: Validate that the 'name' in the guide matches the name we're actually displaying.
        if pronunciation and not pronunciation.get("key"):
          # Show a key to the letters used in the pronunciation guide. Break apart the name
          # into words which we'll show in columns.
          pronunciation["key"] = []
          for namepart in pronunciation["respell"].split(" // "):
            for nameword in namepart.split(" "):
                # Parse out the symbols actually used in the guide. Sweep from left to right chopping
                # off valid respelling letter combinations, chopping off the longest one where possible.
                pronunciation["key"].append([])
                i = 0
                while i < len(nameword):
                  for s in sorted(pronunciation_guide_key, key = lambda s : -len(s)):
                    if nameword[i:i+len(s)] in (s, s.upper()):
                      pronunciation["key"][-1].append( (nameword[i:i+len(s)], pronunciation_guide_key[s]) )
                      i += len(s)
                      break
                  else:
                    # respelling did not match any valid symbol, should be an error but we don't
                    # want to issue an Oops! for this
                    break

        # Get their enacted bills.
        enacted_bills_src_qs = person.sponsored_bills.exclude(original_intent_replaced=True).order_by('-current_status_date')

        return {'person': person,
                'role': role,
                'active_role': active_role,
                'active_congressional_role': active_role and role.role_type in (RoleType.senator, RoleType.representative),
                'pronunciation': pronunciation,
                'photo': photo_url,
                'photo_credit': photo_credit,
                'links': links,
                'analysis_data': analysis_data,
                'enacted_bills': [b for b in enacted_bills_src_qs if b.was_enacted_ex(cache_related_bills_qs=enacted_bills_src_qs)],
                'recent_bills': person.sponsored_bills.all().order_by('-introduced_date')[0:7],
                'committeeassignments': get_committee_assignments(person),
                'feed': person.get_feed(),
                'has_session_stats': has_session_stats,
                'bill_subject_areas': bills_by_subject_counts,
                'vote_explanations': vote_explanations,
                'key_votes': load_key_votes(person),
                'misconduct': misconduct,
                'misconduct_any_alleged': misconduct_any_alleged,
                'misconduct_any_not_alleged': misconduct_any_not_alleged,
                }