Example #1
0
def bill_list(request, abbr):
    meta = metadata(abbr)
    if not meta:
        raise Http404('No metadata found for abbreviation %r' % abbr)

    if 'version_url' in request.GET:
        version_url = request.GET.get('version_url')
        spec = {'versions.url': version_url}
        exceptions = []
    else:
        limit = request.GET.get('limit', '')
        exceptions = get_quality_exceptions(abbr)['bills:' + limit]
        spec = _bill_spec(meta, limit)

    query_text = repr(spec)
    if exceptions:
        spec['_id'] = {'$nin': list(exceptions)}
        query_text += ' (excluding {0} exceptions)'.format(len(exceptions))
    bills = list(mdb.bills.find(spec))

    bill_ids = [b['_id'] for b in bills if b['_id'] not in exceptions]

    context = {'metadata': meta, 'query_text': query_text, 'bills': bills,
               'bill_ids': bill_ids}
    return render(request, 'billy/bill_list.html', context)
Example #2
0
def bill_list(request, abbr):
    meta = metadata(abbr)
    if not meta:
        raise Http404('No metadata found for abbreviation %r' % abbr)

    if 'version_url' in request.GET:
        version_url = request.GET.get('version_url')
        spec = {'versions.url': version_url}
        exceptions = []
    else:
        limit = request.GET.get('limit', '')
        exceptions = get_quality_exceptions(abbr)['bills:' + limit]
        spec = _bill_spec(meta, limit)

    query_text = repr(spec)
    if exceptions:
        spec['_id'] = {'$nin': list(exceptions)}
        query_text += ' (excluding {0} exceptions)'.format(len(exceptions))
    bills = list(db.bills.find(spec))

    bill_ids = [b['_id'] for b in bills if b['_id'] not in exceptions]

    context = {
        'metadata': meta,
        'query_text': query_text,
        'bills': bills,
        'bill_ids': bill_ids
    }
    return render(request, 'billy/bill_list.html', context)
Example #3
0
def scan_votes(abbr):
    sessions = defaultdict(_vote_report_dict)

    # load exception data into sets of ids indexed by exception type
    quality_exceptions = get_quality_exceptions(abbr)

    for vote in db.votes.find({settings.LEVEL_FIELD: abbr}):
        session_d = sessions[vote['session']]

        session_d['vote_count'] += 1
        if vote['passed']:
            session_d['_passed_vote_count'] += 1
        session_d['votes_per_chamber'][vote['chamber']] += 1
        if not vote.get('type'):
            logger.warning('vote %s missing type' % vote['_id'])
            continue
        session_d['votes_per_type'][vote.get('type')] += 1
        if not vote.get('date'):
            logger.warning('vote %s missing date' % vote['_id'])
            continue
        session_d['votes_per_month'][vote['date'].strftime('%Y-%m')] += 1

        # roll calls
        has_rollcalls = False
        for rc in (vote['yes_votes'] + vote['no_votes'] +
                   vote['other_votes']):
            has_rollcalls = True
            session_d['_rollcall_count'] += 1
            if rc.get('leg_id'):
                session_d['_rollcalls_with_leg_id_count'] += 1
            else:
                # keep missing leg_ids
                session_d['unmatched_voters'].add(
                    (term_for_session(abbr, vote['session']),
                     vote['chamber'],
                    rc['name'])
                )

        # check counts if any rollcalls are present
        if has_rollcalls:
            if (len(vote['yes_votes']) != vote['yes_count'] and
                vote['vote_id'] not in
                quality_exceptions['votes:bad_yes_count']):
                session_d['bad_vote_counts'].add(vote['vote_id'])
            if (len(vote['no_votes']) != vote['no_count'] and
                vote['vote_id'] not in
                quality_exceptions['votes:bad_no_count']):
                session_d['bad_vote_counts'].add(vote['vote_id'])
            if (len(vote['other_votes']) != vote['other_count'] and
                vote['vote_id'] not in
                quality_exceptions['votes:bad_other_count']):
                session_d['bad_vote_counts'].add(vote['vote_id'])

    # do logging of unnecessary exceptions
    for qe_type, qes in quality_exceptions.iteritems():
        if qes:
            logger.warning('unnecessary {0} exceptions for {1} votes: \n  {2}'
                           .format(qe_type, len(qes), '\n  '.join(qes)))

    return {'sessions': sessions}
Example #4
0
def scan_votes(abbr):
    sessions = defaultdict(_vote_report_dict)

    # load exception data into sets of ids indexed by exception type
    quality_exceptions = get_quality_exceptions(abbr)

    for vote in db.votes.find({settings.LEVEL_FIELD: abbr}):
        session_d = sessions[vote['session']]

        session_d['vote_count'] += 1
        if vote['passed']:
            session_d['_passed_vote_count'] += 1
        session_d['votes_per_chamber'][vote['chamber']] += 1
        if not vote.get('type'):
            logger.warning('vote %s missing type' % vote['_id'])
            continue
        session_d['votes_per_type'][vote.get('type')] += 1
        if not vote.get('date'):
            logger.warning('vote %s missing date' % vote['_id'])
            continue
        session_d['votes_per_month'][vote['date'].strftime('%Y-%m')] += 1

        # roll calls
        has_rollcalls = False
        for rc in (vote['yes_votes'] + vote['no_votes'] + vote['other_votes']):
            has_rollcalls = True
            session_d['_rollcall_count'] += 1
            if rc.get('leg_id'):
                session_d['_rollcalls_with_leg_id_count'] += 1
            else:
                # keep missing leg_ids
                session_d['unmatched_voters'].add(
                    (term_for_session(abbr, vote['session']), vote['chamber'],
                     rc['name']))

        # check counts if any rollcalls are present
        if has_rollcalls:
            if (len(vote['yes_votes']) != vote['yes_count'] and vote['vote_id']
                    not in quality_exceptions['votes:bad_yes_count']):
                session_d['bad_vote_counts'].add(vote['vote_id'])
            if (len(vote['no_votes']) != vote['no_count'] and vote['vote_id']
                    not in quality_exceptions['votes:bad_no_count']):
                session_d['bad_vote_counts'].add(vote['vote_id'])
            if (len(vote['other_votes']) != vote['other_count']
                    and vote['vote_id']
                    not in quality_exceptions['votes:bad_other_count']):
                session_d['bad_vote_counts'].add(vote['vote_id'])

    # do logging of unnecessary exceptions
    for qe_type, qes in quality_exceptions.iteritems():
        if qes:
            logger.warning(
                'unnecessary {0} exceptions for {1} votes: \n  {2}'.format(
                    qe_type, len(qes), '\n  '.join(qes)))

    return {'sessions': sessions}
Example #5
0
def bill_list(request, abbr):
    meta = metadata(abbr)
    if not meta:
        raise Http404("No metadata found for abbreviation %r" % abbr)

    if "version_url" in request.GET:
        version_url = request.GET.get("version_url")
        spec = {"versions.url": version_url}
        exceptions = []
    else:
        limit = request.GET.get("limit", "")
        exceptions = get_quality_exceptions(abbr)["bills:" + limit]
        spec = _bill_spec(meta, limit)

    query_text = repr(spec)
    if exceptions:
        spec["_id"] = {"$nin": list(exceptions)}
        query_text += " (excluding {0} exceptions)".format(len(exceptions))
    bills = list(mdb.bills.find(spec))

    bill_ids = [b["_id"] for b in bills if b["_id"] not in exceptions]

    context = {"metadata": meta, "query_text": query_text, "bills": bills, "bill_ids": bill_ids}
    return render(request, "billy/bill_list.html", context)
Example #6
0
def scan_bills(abbr):
    duplicate_sources = defaultdict(int)
    duplicate_versions = defaultdict(int)
    other_actions = defaultdict(int)
    uncategorized_subjects = defaultdict(int)
    sessions = defaultdict(_bill_report_dict)

    # load exception data into sets of ids indexed by exception type
    quality_exceptions = get_quality_exceptions(abbr)

    for bill in db.bills.find({settings.LEVEL_FIELD: abbr}):
        session_d = sessions[bill['session']]

        # chamber count & bill_types
        if bill['chamber'] == 'lower':
            session_d['lower_count'] += 1
        elif bill['chamber'] == 'upper':
            session_d['upper_count'] += 1
        for type in bill['type']:
            session_d['bill_types'][type] += 1

        update_common(bill, session_d)

        # actions
        last_date = datetime.datetime(1900, 1, 1)
        for action in bill['actions']:
            date = action['date']
            if date < last_date:
                session_d['actions_unsorted'].add(bill['_id'])
            session_d['action_count'] += 1
            for type in action['type']:
                session_d['actions_per_type'][type] += 1
            if 'other' in action['type']:
                other_actions[action['action']] += 1
            session_d['actions_per_actor'][action['actor']] += 1
            session_d['actions_per_month'][date.strftime('%Y-%m')] += 1

        # handle no_actions bills
        if not bill['actions']:
            if bill['_id'] not in quality_exceptions['bills:no_actions']:
                session_d['actionless_count'] += 1
            else:
                quality_exceptions['bills:no_actions'].remove(bill['_id'])

        # sponsors
        for sponsor in bill['sponsors']:
            session_d['_sponsor_count'] += 1
            if sponsor.get('leg_id') or sponsor.get('committee_id'):
                session_d['_sponsors_with_id_count'] += 1
            else:
                # keep list of unmatched sponsors
                session_d['unmatched_sponsors'].add(
                    (term_for_session(abbr, bill['session']), bill['chamber'],
                     sponsor['name'])
                )
            session_d['sponsors_per_type'][sponsor['type']] += 1

        # handle no sponsors bills
        if not bill['sponsors']:
            if bill['_id'] not in quality_exceptions['bills:no_sponsors']:
                session_d['sponsorless_count'] += 1
            else:
                quality_exceptions['bills:no_sponsors'].remove(bill['_id'])

        # subjects
        for subj in bill.get('scraped_subjects', []):
            uncategorized_subjects[subj] += 1
        if bill.get('subjects'):
            session_d['_subjects_count'] += 1
            for subject in bill['subjects']:
                session_d['bills_per_subject'][subject] += 1

        # sources
        for source in bill['sources']:
            duplicate_sources[source['url']] += 1

        # versions
        if not bill['versions']:
            # total num of bills w/o versions
            if bill['_id'] not in quality_exceptions['bills:no_versions']:
                session_d['versionless_count'] += 1
            else:
                quality_exceptions['bills:no_versions'].remove(bill['_id'])
        else:
            # total num of versions
            session_d['version_count'] += len(bill['versions'])
        for doc in bill['versions']:
            duplicate_versions[doc['url']] += 1
        # TODO: add duplicate document detection back in?

        # Check for progress meter gaps.
        progress_meter_gaps = session_d['progress_meter_gaps']
        action_dates = bill['action_dates']
        bill_chamber = bill['chamber']
        other_chamber = dict(lower='upper', upper='lower')[bill_chamber]

        # Check for bills that were signed but didn't pass both chambers.
        if bill['type'] == 'bill':
            if action_dates['signed']:
                if not action_dates['passed_upper']:
                    progress_meter_gaps.add(bill['_id'])
                elif not action_dates['passed_lower']:
                    progress_meter_gaps.add(bill['_id'])

        else:
            # Check for nonbills that were signed but didn't pass their
            # house of origin.
            if action_dates['signed']:
                if not action_dates['passed_' + bill_chamber]:
                    progress_meter_gaps.add(bill['_id'])

        if action_dates['passed_' + other_chamber]:
            if not action_dates['passed_' + bill_chamber]:
                progress_meter_gaps.add(bill['_id'])

    dup_version_urls = []
    dup_source_urls = []
    for url, n in duplicate_versions.items():
        if n > 1:
            dup_version_urls.append(url)
    for url, n in duplicate_sources.items():
        if n > 1:
            dup_source_urls.append(url)

    # do logging of unnecessary exceptions
    for qe_type, qes in quality_exceptions.items():
        if qes:
            logger.warning('unnecessary {0} exceptions for {1} bills: \n  {2}'
                           .format(qe_type, len(qes), '\n  '.join(qes)))

    return {'duplicate_versions': dup_version_urls,
            'duplicate_sources': dup_source_urls,
            'other_actions': other_actions.items(),
            'uncategorized_subjects': uncategorized_subjects.items(),
            'sessions': sessions,
            'progress_meter_gaps': []
           }
Example #7
0
def scan_bills(abbr):
    duplicate_sources = defaultdict(int)
    duplicate_versions = defaultdict(int)
    other_actions = defaultdict(int)
    uncategorized_subjects = defaultdict(int)
    sessions = defaultdict(_bill_report_dict)

    # load exception data into sets of ids indexed by exception type
    quality_exceptions = get_quality_exceptions(abbr)

    for bill in db.bills.find({settings.LEVEL_FIELD: abbr}):
        session_d = sessions[bill['session']]

        # chamber count & bill_types
        if bill['chamber'] == 'lower':
            session_d['lower_count'] += 1
        elif bill['chamber'] == 'upper':
            session_d['upper_count'] += 1
        for type in bill['type']:
            session_d['bill_types'][type] += 1

        update_common(bill, session_d)

        # actions
        last_date = datetime.datetime(1900, 1, 1)
        for action in bill['actions']:
            date = action['date']
            
            if not date:
              continue 

            if date < last_date:
                session_d['actions_unsorted'].add(bill['_id'])
            session_d['action_count'] += 1
            for type in action['type']:
                session_d['actions_per_type'][type] += 1
            if 'other' in action['type']:
                other_actions[action['action']] += 1
            session_d['actions_per_actor'][action['actor']] += 1
            session_d['actions_per_month'][date.strftime('%Y-%m')] += 1

        # handle no_actions bills
        if not bill['actions']:
            if bill['_id'] not in quality_exceptions['bills:no_actions']:
                session_d['actionless_count'] += 1
            else:
                quality_exceptions['bills:no_actions'].remove(bill['_id'])

        # sponsors
        for sponsor in bill['sponsors']:
            session_d['_sponsor_count'] += 1
            if sponsor.get('leg_id') or sponsor.get('committee_id'):
                session_d['_sponsors_with_id_count'] += 1
            else:
                # keep list of unmatched sponsors
                session_d['unmatched_sponsors'].add(
                    (term_for_session(abbr, bill['session']), bill['chamber'],
                     sponsor['name'])
                )
            session_d['sponsors_per_type'][sponsor['type']] += 1

        # handle no sponsors bills
        if not bill['sponsors']:
            if bill['_id'] not in quality_exceptions['bills:no_sponsors']:
                session_d['sponsorless_count'] += 1
            else:
                quality_exceptions['bills:no_sponsors'].remove(bill['_id'])

        # subjects
        for subj in bill.get('scraped_subjects', []):
            uncategorized_subjects[subj] += 1
        if bill.get('subjects'):
            session_d['_subjects_count'] += 1
            for subject in bill['subjects']:
                session_d['bills_per_subject'][subject] += 1

        # sources
        for source in bill['sources']:
            duplicate_sources[source['url']] += 1

        # versions
        if not bill['versions']:
            # total num of bills w/o versions
            if bill['_id'] not in quality_exceptions['bills:no_versions']:
                session_d['versionless_count'] += 1
            else:
                quality_exceptions['bills:no_versions'].remove(bill['_id'])
        else:
            # total num of versions
            session_d['version_count'] += len(bill['versions'])
        for doc in bill['versions']:
            duplicate_versions[doc['url']] += 1
        # TODO: add duplicate document detection back in?

        # Check for progress meter gaps.
        progress_meter_gaps = session_d['progress_meter_gaps']
        action_dates = bill['action_dates']
        bill_chamber = bill['chamber']
        other_chamber = dict(lower='upper', upper='lower')[bill_chamber]

        # Check for bills that were signed but didn't pass both chambers.
        if bill['type'] == 'bill':
            if action_dates['signed']:
                if not action_dates['passed_upper']:
                    progress_meter_gaps.add(bill['_id'])
                elif not action_dates['passed_lower']:
                    progress_meter_gaps.add(bill['_id'])

        else:
            # Check for nonbills that were signed but didn't pass their
            # house of origin.
            if action_dates['signed']:
                if not action_dates['passed_' + bill_chamber]:
                    progress_meter_gaps.add(bill['_id'])

        if action_dates['passed_' + other_chamber]:
            if not action_dates['passed_' + bill_chamber]:
                progress_meter_gaps.add(bill['_id'])

    dup_version_urls = []
    dup_source_urls = []
    for url, n in duplicate_versions.items():
        if n > 1:
            dup_version_urls.append(url)
    for url, n in duplicate_sources.items():
        if n > 1:
            dup_source_urls.append(url)

    # do logging of unnecessary exceptions
    for qe_type, qes in quality_exceptions.items():
        if qes:
            logger.warning('unnecessary {0} exceptions for {1} bills: \n  {2}'
                           .format(qe_type, len(qes), '\n  '.join(qes)))

    return {'duplicate_versions': dup_version_urls,
            'duplicate_sources': dup_source_urls,
            'other_actions': other_actions.items(),
            'uncategorized_subjects': uncategorized_subjects.items(),
            'sessions': sessions,
            'progress_meter_gaps': []
           }
Example #8
0
File: bills.py Project: JT5D/billy
def scan_bills(abbr):
    duplicate_sources = defaultdict(int)
    duplicate_versions = defaultdict(int)
    other_actions = defaultdict(int)
    uncategorized_subjects = defaultdict(int)
    sessions = defaultdict(_bill_report_dict)

    # load exception data into sets of ids indexed by exception type
    quality_exceptions = get_quality_exceptions(abbr)

    for bill in db.bills.find({settings.LEVEL_FIELD: abbr}):
        session_d = sessions[bill["session"]]

        # chamber count & bill_types
        if bill["chamber"] == "lower":
            session_d["lower_count"] += 1
        elif bill["chamber"] == "upper":
            session_d["upper_count"] += 1
        for type in bill["type"]:
            session_d["bill_types"][type] += 1

        update_common(bill, session_d)

        # actions
        last_date = datetime.datetime(1900, 1, 1)
        for action in bill["actions"]:
            date = action["date"]
            if date < last_date:
                session_d["actions_unsorted"].add(bill["_id"])
            session_d["action_count"] += 1
            for type in action["type"]:
                session_d["actions_per_type"][type] += 1
            if "other" in action["type"]:
                other_actions[action["action"]] += 1
            session_d["actions_per_actor"][action["actor"]] += 1
            session_d["actions_per_month"][date.strftime("%Y-%m")] += 1

        # handle no_actions bills
        if not bill["actions"]:
            if bill["_id"] not in quality_exceptions["bills:no_actions"]:
                session_d["actionless_count"] += 1
            else:
                quality_exceptions["bills:no_actions"].remove(bill["_id"])

        # sponsors
        for sponsor in bill["sponsors"]:
            session_d["_sponsor_count"] += 1
            if sponsor.get("leg_id") or sponsor.get("committee_id"):
                session_d["_sponsors_with_id_count"] += 1
            else:
                # keep list of unmatched sponsors
                session_d["unmatched_sponsors"].add(
                    (term_for_session(abbr, bill["session"]), bill["chamber"], sponsor["name"])
                )
            session_d["sponsors_per_type"][sponsor["type"]] += 1

        # handle no sponsors bills
        if not bill["sponsors"]:
            if bill["_id"] not in quality_exceptions["bills:no_sponsors"]:
                session_d["sponsorless_count"] += 1
            else:
                quality_exceptions["bills:no_sponsors"].remove(bill["_id"])

        # subjects
        for subj in bill.get("scraped_subjects", []):
            uncategorized_subjects[subj] += 1
        if bill.get("subjects"):
            session_d["_subjects_count"] += 1
            for subject in bill["subjects"]:
                session_d["bills_per_subject"][subject] += 1

        # sources
        for source in bill["sources"]:
            duplicate_sources[source["url"]] += 1

        # versions
        if not bill["versions"]:
            # total num of bills w/o versions
            if bill["_id"] not in quality_exceptions["bills:no_versions"]:
                session_d["versionless_count"] += 1
            else:
                quality_exceptions["bills:no_versions"].remove(bill["_id"])
        else:
            # total num of versions
            session_d["version_count"] += len(bill["versions"])
        for doc in bill["versions"]:
            duplicate_versions[doc["url"]] += 1
        # TODO: add duplicate document detection back in?

    dup_version_urls = []
    dup_source_urls = []
    for url, n in duplicate_versions.iteritems():
        if n > 1:
            dup_version_urls.append(url)
    for url, n in duplicate_sources.iteritems():
        if n > 1:
            dup_source_urls.append(url)

    # do logging of unnecessary exceptions
    for qe_type, qes in quality_exceptions.iteritems():
        if qes:
            logger.warning(
                "unnecessary {0} exceptions for {1} bills: \n  {2}".format(qe_type, len(qes), "\n  ".join(qes))
            )

    return {
        "duplicate_versions": dup_version_urls,
        "duplicate_sources": dup_source_urls,
        "other_actions": other_actions.items(),
        "uncategorized_subjects": uncategorized_subjects.items(),
        "sessions": sessions,
    }