def bill_list(request, abbr): meta = metadata(abbr) if not meta: raise Http404('No metadata found for abbreviation %r' % abbr) if 'version_url' in request.GET: version_url = request.GET.get('version_url') spec = {'versions.url': version_url} exceptions = [] else: limit = request.GET.get('limit', '') exceptions = get_quality_exceptions(abbr)['bills:' + limit] spec = _bill_spec(meta, limit) query_text = repr(spec) if exceptions: spec['_id'] = {'$nin': list(exceptions)} query_text += ' (excluding {0} exceptions)'.format(len(exceptions)) bills = list(mdb.bills.find(spec)) bill_ids = [b['_id'] for b in bills if b['_id'] not in exceptions] context = {'metadata': meta, 'query_text': query_text, 'bills': bills, 'bill_ids': bill_ids} return render(request, 'billy/bill_list.html', context)
def bill_list(request, abbr): meta = metadata(abbr) if not meta: raise Http404('No metadata found for abbreviation %r' % abbr) if 'version_url' in request.GET: version_url = request.GET.get('version_url') spec = {'versions.url': version_url} exceptions = [] else: limit = request.GET.get('limit', '') exceptions = get_quality_exceptions(abbr)['bills:' + limit] spec = _bill_spec(meta, limit) query_text = repr(spec) if exceptions: spec['_id'] = {'$nin': list(exceptions)} query_text += ' (excluding {0} exceptions)'.format(len(exceptions)) bills = list(db.bills.find(spec)) bill_ids = [b['_id'] for b in bills if b['_id'] not in exceptions] context = { 'metadata': meta, 'query_text': query_text, 'bills': bills, 'bill_ids': bill_ids } return render(request, 'billy/bill_list.html', context)
def scan_votes(abbr): sessions = defaultdict(_vote_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for vote in db.votes.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[vote['session']] session_d['vote_count'] += 1 if vote['passed']: session_d['_passed_vote_count'] += 1 session_d['votes_per_chamber'][vote['chamber']] += 1 if not vote.get('type'): logger.warning('vote %s missing type' % vote['_id']) continue session_d['votes_per_type'][vote.get('type')] += 1 if not vote.get('date'): logger.warning('vote %s missing date' % vote['_id']) continue session_d['votes_per_month'][vote['date'].strftime('%Y-%m')] += 1 # roll calls has_rollcalls = False for rc in (vote['yes_votes'] + vote['no_votes'] + vote['other_votes']): has_rollcalls = True session_d['_rollcall_count'] += 1 if rc.get('leg_id'): session_d['_rollcalls_with_leg_id_count'] += 1 else: # keep missing leg_ids session_d['unmatched_voters'].add( (term_for_session(abbr, vote['session']), vote['chamber'], rc['name']) ) # check counts if any rollcalls are present if has_rollcalls: if (len(vote['yes_votes']) != vote['yes_count'] and vote['vote_id'] not in quality_exceptions['votes:bad_yes_count']): session_d['bad_vote_counts'].add(vote['vote_id']) if (len(vote['no_votes']) != vote['no_count'] and vote['vote_id'] not in quality_exceptions['votes:bad_no_count']): session_d['bad_vote_counts'].add(vote['vote_id']) if (len(vote['other_votes']) != vote['other_count'] and vote['vote_id'] not in quality_exceptions['votes:bad_other_count']): session_d['bad_vote_counts'].add(vote['vote_id']) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.iteritems(): if qes: logger.warning('unnecessary {0} exceptions for {1} votes: \n {2}' .format(qe_type, len(qes), '\n '.join(qes))) return {'sessions': sessions}
def scan_votes(abbr): sessions = defaultdict(_vote_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for vote in db.votes.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[vote['session']] session_d['vote_count'] += 1 if vote['passed']: session_d['_passed_vote_count'] += 1 session_d['votes_per_chamber'][vote['chamber']] += 1 if not vote.get('type'): logger.warning('vote %s missing type' % vote['_id']) continue session_d['votes_per_type'][vote.get('type')] += 1 if not vote.get('date'): logger.warning('vote %s missing date' % vote['_id']) continue session_d['votes_per_month'][vote['date'].strftime('%Y-%m')] += 1 # roll calls has_rollcalls = False for rc in (vote['yes_votes'] + vote['no_votes'] + vote['other_votes']): has_rollcalls = True session_d['_rollcall_count'] += 1 if rc.get('leg_id'): session_d['_rollcalls_with_leg_id_count'] += 1 else: # keep missing leg_ids session_d['unmatched_voters'].add( (term_for_session(abbr, vote['session']), vote['chamber'], rc['name'])) # check counts if any rollcalls are present if has_rollcalls: if (len(vote['yes_votes']) != vote['yes_count'] and vote['vote_id'] not in quality_exceptions['votes:bad_yes_count']): session_d['bad_vote_counts'].add(vote['vote_id']) if (len(vote['no_votes']) != vote['no_count'] and vote['vote_id'] not in quality_exceptions['votes:bad_no_count']): session_d['bad_vote_counts'].add(vote['vote_id']) if (len(vote['other_votes']) != vote['other_count'] and vote['vote_id'] not in quality_exceptions['votes:bad_other_count']): session_d['bad_vote_counts'].add(vote['vote_id']) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.iteritems(): if qes: logger.warning( 'unnecessary {0} exceptions for {1} votes: \n {2}'.format( qe_type, len(qes), '\n '.join(qes))) return {'sessions': sessions}
def bill_list(request, abbr): meta = metadata(abbr) if not meta: raise Http404("No metadata found for abbreviation %r" % abbr) if "version_url" in request.GET: version_url = request.GET.get("version_url") spec = {"versions.url": version_url} exceptions = [] else: limit = request.GET.get("limit", "") exceptions = get_quality_exceptions(abbr)["bills:" + limit] spec = _bill_spec(meta, limit) query_text = repr(spec) if exceptions: spec["_id"] = {"$nin": list(exceptions)} query_text += " (excluding {0} exceptions)".format(len(exceptions)) bills = list(mdb.bills.find(spec)) bill_ids = [b["_id"] for b in bills if b["_id"] not in exceptions] context = {"metadata": meta, "query_text": query_text, "bills": bills, "bill_ids": bill_ids} return render(request, "billy/bill_list.html", context)
def scan_bills(abbr): duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for bill in db.bills.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[bill['session']] # chamber count & bill_types if bill['chamber'] == 'lower': session_d['lower_count'] += 1 elif bill['chamber'] == 'upper': session_d['upper_count'] += 1 for type in bill['type']: session_d['bill_types'][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill['actions']: date = action['date'] if date < last_date: session_d['actions_unsorted'].add(bill['_id']) session_d['action_count'] += 1 for type in action['type']: session_d['actions_per_type'][type] += 1 if 'other' in action['type']: other_actions[action['action']] += 1 session_d['actions_per_actor'][action['actor']] += 1 session_d['actions_per_month'][date.strftime('%Y-%m')] += 1 # handle no_actions bills if not bill['actions']: if bill['_id'] not in quality_exceptions['bills:no_actions']: session_d['actionless_count'] += 1 else: quality_exceptions['bills:no_actions'].remove(bill['_id']) # sponsors for sponsor in bill['sponsors']: session_d['_sponsor_count'] += 1 if sponsor.get('leg_id') or sponsor.get('committee_id'): session_d['_sponsors_with_id_count'] += 1 else: # keep list of unmatched sponsors session_d['unmatched_sponsors'].add( (term_for_session(abbr, bill['session']), bill['chamber'], sponsor['name']) ) session_d['sponsors_per_type'][sponsor['type']] += 1 # handle no sponsors bills if not bill['sponsors']: if bill['_id'] not in quality_exceptions['bills:no_sponsors']: session_d['sponsorless_count'] += 1 else: quality_exceptions['bills:no_sponsors'].remove(bill['_id']) # subjects for subj in bill.get('scraped_subjects', []): uncategorized_subjects[subj] += 1 if bill.get('subjects'): session_d['_subjects_count'] += 1 for subject in bill['subjects']: session_d['bills_per_subject'][subject] += 1 # sources for source in bill['sources']: duplicate_sources[source['url']] += 1 # versions if not bill['versions']: # total num of bills w/o versions if bill['_id'] not in quality_exceptions['bills:no_versions']: session_d['versionless_count'] += 1 else: quality_exceptions['bills:no_versions'].remove(bill['_id']) else: # total num of versions session_d['version_count'] += len(bill['versions']) for doc in bill['versions']: duplicate_versions[doc['url']] += 1 # TODO: add duplicate document detection back in? # Check for progress meter gaps. progress_meter_gaps = session_d['progress_meter_gaps'] action_dates = bill['action_dates'] bill_chamber = bill['chamber'] other_chamber = dict(lower='upper', upper='lower')[bill_chamber] # Check for bills that were signed but didn't pass both chambers. if bill['type'] == 'bill': if action_dates['signed']: if not action_dates['passed_upper']: progress_meter_gaps.add(bill['_id']) elif not action_dates['passed_lower']: progress_meter_gaps.add(bill['_id']) else: # Check for nonbills that were signed but didn't pass their # house of origin. if action_dates['signed']: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) if action_dates['passed_' + other_chamber]: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.items(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.items(): if n > 1: dup_source_urls.append(url) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.items(): if qes: logger.warning('unnecessary {0} exceptions for {1} bills: \n {2}' .format(qe_type, len(qes), '\n '.join(qes))) return {'duplicate_versions': dup_version_urls, 'duplicate_sources': dup_source_urls, 'other_actions': other_actions.items(), 'uncategorized_subjects': uncategorized_subjects.items(), 'sessions': sessions, 'progress_meter_gaps': [] }
def scan_bills(abbr): duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for bill in db.bills.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[bill['session']] # chamber count & bill_types if bill['chamber'] == 'lower': session_d['lower_count'] += 1 elif bill['chamber'] == 'upper': session_d['upper_count'] += 1 for type in bill['type']: session_d['bill_types'][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill['actions']: date = action['date'] if not date: continue if date < last_date: session_d['actions_unsorted'].add(bill['_id']) session_d['action_count'] += 1 for type in action['type']: session_d['actions_per_type'][type] += 1 if 'other' in action['type']: other_actions[action['action']] += 1 session_d['actions_per_actor'][action['actor']] += 1 session_d['actions_per_month'][date.strftime('%Y-%m')] += 1 # handle no_actions bills if not bill['actions']: if bill['_id'] not in quality_exceptions['bills:no_actions']: session_d['actionless_count'] += 1 else: quality_exceptions['bills:no_actions'].remove(bill['_id']) # sponsors for sponsor in bill['sponsors']: session_d['_sponsor_count'] += 1 if sponsor.get('leg_id') or sponsor.get('committee_id'): session_d['_sponsors_with_id_count'] += 1 else: # keep list of unmatched sponsors session_d['unmatched_sponsors'].add( (term_for_session(abbr, bill['session']), bill['chamber'], sponsor['name']) ) session_d['sponsors_per_type'][sponsor['type']] += 1 # handle no sponsors bills if not bill['sponsors']: if bill['_id'] not in quality_exceptions['bills:no_sponsors']: session_d['sponsorless_count'] += 1 else: quality_exceptions['bills:no_sponsors'].remove(bill['_id']) # subjects for subj in bill.get('scraped_subjects', []): uncategorized_subjects[subj] += 1 if bill.get('subjects'): session_d['_subjects_count'] += 1 for subject in bill['subjects']: session_d['bills_per_subject'][subject] += 1 # sources for source in bill['sources']: duplicate_sources[source['url']] += 1 # versions if not bill['versions']: # total num of bills w/o versions if bill['_id'] not in quality_exceptions['bills:no_versions']: session_d['versionless_count'] += 1 else: quality_exceptions['bills:no_versions'].remove(bill['_id']) else: # total num of versions session_d['version_count'] += len(bill['versions']) for doc in bill['versions']: duplicate_versions[doc['url']] += 1 # TODO: add duplicate document detection back in? # Check for progress meter gaps. progress_meter_gaps = session_d['progress_meter_gaps'] action_dates = bill['action_dates'] bill_chamber = bill['chamber'] other_chamber = dict(lower='upper', upper='lower')[bill_chamber] # Check for bills that were signed but didn't pass both chambers. if bill['type'] == 'bill': if action_dates['signed']: if not action_dates['passed_upper']: progress_meter_gaps.add(bill['_id']) elif not action_dates['passed_lower']: progress_meter_gaps.add(bill['_id']) else: # Check for nonbills that were signed but didn't pass their # house of origin. if action_dates['signed']: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) if action_dates['passed_' + other_chamber]: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.items(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.items(): if n > 1: dup_source_urls.append(url) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.items(): if qes: logger.warning('unnecessary {0} exceptions for {1} bills: \n {2}' .format(qe_type, len(qes), '\n '.join(qes))) return {'duplicate_versions': dup_version_urls, 'duplicate_sources': dup_source_urls, 'other_actions': other_actions.items(), 'uncategorized_subjects': uncategorized_subjects.items(), 'sessions': sessions, 'progress_meter_gaps': [] }
def scan_bills(abbr): duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for bill in db.bills.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[bill["session"]] # chamber count & bill_types if bill["chamber"] == "lower": session_d["lower_count"] += 1 elif bill["chamber"] == "upper": session_d["upper_count"] += 1 for type in bill["type"]: session_d["bill_types"][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill["actions"]: date = action["date"] if date < last_date: session_d["actions_unsorted"].add(bill["_id"]) session_d["action_count"] += 1 for type in action["type"]: session_d["actions_per_type"][type] += 1 if "other" in action["type"]: other_actions[action["action"]] += 1 session_d["actions_per_actor"][action["actor"]] += 1 session_d["actions_per_month"][date.strftime("%Y-%m")] += 1 # handle no_actions bills if not bill["actions"]: if bill["_id"] not in quality_exceptions["bills:no_actions"]: session_d["actionless_count"] += 1 else: quality_exceptions["bills:no_actions"].remove(bill["_id"]) # sponsors for sponsor in bill["sponsors"]: session_d["_sponsor_count"] += 1 if sponsor.get("leg_id") or sponsor.get("committee_id"): session_d["_sponsors_with_id_count"] += 1 else: # keep list of unmatched sponsors session_d["unmatched_sponsors"].add( (term_for_session(abbr, bill["session"]), bill["chamber"], sponsor["name"]) ) session_d["sponsors_per_type"][sponsor["type"]] += 1 # handle no sponsors bills if not bill["sponsors"]: if bill["_id"] not in quality_exceptions["bills:no_sponsors"]: session_d["sponsorless_count"] += 1 else: quality_exceptions["bills:no_sponsors"].remove(bill["_id"]) # subjects for subj in bill.get("scraped_subjects", []): uncategorized_subjects[subj] += 1 if bill.get("subjects"): session_d["_subjects_count"] += 1 for subject in bill["subjects"]: session_d["bills_per_subject"][subject] += 1 # sources for source in bill["sources"]: duplicate_sources[source["url"]] += 1 # versions if not bill["versions"]: # total num of bills w/o versions if bill["_id"] not in quality_exceptions["bills:no_versions"]: session_d["versionless_count"] += 1 else: quality_exceptions["bills:no_versions"].remove(bill["_id"]) else: # total num of versions session_d["version_count"] += len(bill["versions"]) for doc in bill["versions"]: duplicate_versions[doc["url"]] += 1 # TODO: add duplicate document detection back in? dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.iteritems(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.iteritems(): if n > 1: dup_source_urls.append(url) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.iteritems(): if qes: logger.warning( "unnecessary {0} exceptions for {1} bills: \n {2}".format(qe_type, len(qes), "\n ".join(qes)) ) return { "duplicate_versions": dup_version_urls, "duplicate_sources": dup_source_urls, "other_actions": other_actions.items(), "uncategorized_subjects": uncategorized_subjects.items(), "sessions": sessions, }