def scan_committees(abbr): metadata = db.metadata.find_one({'_id': abbr}) level = metadata['level'] duplicate_sources = defaultdict(int) report = {'upper_count': 0, 'lower_count': 0, 'joint_count': 0, 'empty_count': 0, '_updated_today_count': 0, '_updated_this_month_count': 0, '_updated_this_year_count': 0, '_member_count': 0, '_members_with_leg_id_count': 0, 'sourceless_count': 0, 'unmatched_leg_ids': set(), } for com in db.committees.find({'level': level, level: abbr}): update_common(com, report) if com['chamber'] == 'upper': report['upper_count'] += 1 elif com['chamber'] == 'lower': report['lower_count'] += 1 elif com['chamber'] == 'joint': report['joint_count'] += 1 # members if not com['members']: report['empty_count'] += 1 for member in com['members']: report['_member_count'] += 1 if member.get('leg_id'): report['_members_with_leg_id_count'] += 1 else: report['unmatched_leg_ids'].add((com.get('term', ''), com['chamber'], member['name'])) # sources for source in com['sources']: duplicate_sources[source['url']] += 1 report['duplicate_sources'] = [] for url, n in duplicate_sources.iteritems(): if n > 1: report['duplicate_sources'].append(url) return report
def scan_legislators(abbr): metadata = db.metadata.find_one({'_id': abbr}) level = metadata['level'] duplicate_sources = defaultdict(int) report = {'upper_active_count': 0, 'lower_active_count': 0, 'inactive_count': 0, '_updated_today_count': 0, '_updated_this_month_count': 0, '_updated_this_year_count': 0, 'sourceless_count': 0, } seats_filled = {'upper': defaultdict(int), 'lower': defaultdict(int)} for key in checked_keys: report[key] = 0 for leg in db.legislators.find({'level': level, level: abbr}): # do common details update_common(leg, report) # most checks only apply to active set if leg.get('active'): chamber = leg.get('chamber') if chamber == 'upper': report['upper_active_count'] += 1 elif chamber == 'lower': report['lower_active_count'] += 1 else: # TODO: track these? (executives) continue seats_filled[chamber][leg['district']] += 1 # TODO: check seats_filled against districts for key in checked_keys: if leg.get(key): report[key] += 1 else: report['inactive_count'] += 1 for source in leg['sources']: duplicate_sources[source['url']] += 1 report['duplicate_sources'] = [] for url, n in duplicate_sources.iteritems(): if n > 1: report['duplicate_sources'].append(url) return report
def scan_legislators(abbr): duplicate_sources = defaultdict(int) report = {'upper_active_count': 0, 'lower_active_count': 0, 'inactive_count': 0, '_updated_today_count': 0, '_updated_this_month_count': 0, '_updated_this_year_count': 0, } for key in checked_keys: report[key] = 0 # initialize seat counts district_seats = {'upper': defaultdict(int), 'lower': defaultdict(int)} for district in db.districts.find({'abbr': abbr}): district_seats[district['chamber']][district['name']] = \ district['num_seats'] for leg in db.legislators.find({settings.LEVEL_FIELD: abbr}): # do common details update_common(leg, report) # most checks only apply to active set if leg.get('active'): chamber = leg.get('chamber') if chamber == 'upper': report['upper_active_count'] += 1 elif chamber == 'lower': report['lower_active_count'] += 1 else: # TODO: track these? (executives) continue # decrement empty seats (if it goes negative, we have too many) district_seats[chamber][leg['district']] -= 1 for key in checked_keys: if leg.get(key): report[key] += 1 else: report['inactive_count'] += 1 for source in leg['sources']: duplicate_sources[source['url']] += 1 report['duplicate_sources'] = [] for url, n in duplicate_sources.iteritems(): if n > 1: report['duplicate_sources'].append(url) # copy over seat issues into report report['overfilled_seats'] = [] report['vacant_seats'] = [] for chamber, chamber_seats in district_seats.iteritems(): for seat, count in chamber_seats.iteritems(): if count < 0: report['overfilled_seats'].append((chamber, seat, -count)) elif count > 0: report['vacant_seats'].append((chamber, seat, count)) return report
def scan_bills(abbr): metadata = db.metadata.find_one({'_id': abbr}) level = metadata['level'] duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) for bill in db.bills.find({'level': level, level: abbr}): session_d = sessions[bill['session']] # chamber count & bill_types if bill['chamber'] == 'lower': session_d['lower_count'] += 1 elif bill['chamber'] == 'upper': session_d['upper_count'] += 1 for type in bill['type']: session_d['bill_types'][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill['actions']: date = action['date'] if date < last_date: session_d['actions_unsorted'].add(bill['_id']) session_d['action_count'] += 1 for type in action['type']: session_d['actions_per_type'][type] += 1 if 'other' in action['type']: other_actions[action['action']] += 1 session_d['actions_per_actor'][action['actor']] += 1 session_d['actions_per_month'][date.strftime('%Y-%m')] += 1 if not bill['actions']: session_d['actionless_count'] += 1 # sponsors for sponsor in bill['sponsors']: session_d['_sponsor_count'] += 1 if sponsor.get('leg_id'): session_d['_sponsors_with_leg_id_count'] += 1 else: # keep missing leg_ids session_d['unmatched_leg_ids'].add( (term_for_session(abbr, bill['session']), bill['chamber'], sponsor['name'])) session_d['sponsors_per_type'][sponsor['type']] += 1 if not bill['sponsors']: session_d['sponsorless_count'] += 1 # votes for vote in bill['votes']: session_d['vote_count'] += 1 if vote['passed']: session_d['_passed_vote_count'] += 1 session_d['votes_per_chamber'][vote['chamber']] += 1 if not vote.get('type'): logger.warning('vote is missing type on %s' % bill['_id']) continue session_d['votes_per_type'][vote.get('type')] += 1 if not vote.get('date'): logger.warning('vote is missing date on %s' % bill['_id']) continue session_d['votes_per_month'][vote['date'].strftime('%Y-%m')] += 1 # roll calls has_rollcalls = False for rc in (vote['yes_votes'] + vote['no_votes'] + vote['other_votes']): has_rollcalls = True session_d['_rollcall_count'] += 1 if rc.get('leg_id'): session_d['_rollcalls_with_leg_id_count'] += 1 else: # keep missing leg_ids session_d['unmatched_leg_ids'].add( (term_for_session(abbr, bill['session']), vote['chamber'], rc['name'])) # check counts if any rollcalls are present if (has_rollcalls and (len(vote['yes_votes']) != vote['yes_count'] or len(vote['no_votes']) != vote['no_count'] or len(vote['other_votes']) != vote['other_count'])): session_d['bad_vote_counts'].add(bill['_id']) # subjects for subj in bill.get('scraped_subjects', []): uncategorized_subjects[subj] += 1 if bill.get('subjects'): session_d['_subjects_count'] += 1 for subject in bill['subjects']: session_d['bills_per_subject'][subject] += 1 # sources for source in bill['sources']: duplicate_sources[source['url']] += 1 # versions if not bill['versions']: # total num of bills w/o versions session_d['versionless_count'] += 1 else: # total num of versions session_d['version_count'] += len(bill['versions']) for doc in bill['versions']: duplicate_versions[doc['url']] += 1 # TODO: add a duplicate documents back in? dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.iteritems(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.iteritems(): if n > 1: dup_source_urls.append(url) return { 'duplicate_versions': dup_version_urls, 'duplicate_sources': dup_source_urls, 'other_actions': other_actions.items(), 'uncategorized_subjects': uncategorized_subjects.items(), 'sessions': sessions, }
def scan_legislators(abbr): duplicate_sources = defaultdict(int) report = { 'upper_active_count': 0, 'lower_active_count': 0, 'inactive_count': 0, '_updated_today_count': 0, '_updated_this_month_count': 0, '_updated_this_year_count': 0, } for key in checked_keys: report[key] = 0 # initialize seat counts district_seats = {'upper': defaultdict(int), 'lower': defaultdict(int)} for district in db.districts.find({'abbr': abbr}): district_seats[district['chamber']][district['name']] = \ district['num_seats'] for leg in db.legislators.find({settings.LEVEL_FIELD: abbr}): # do common details update_common(leg, report) # most checks only apply to active set if leg.get('active'): chamber = leg.get('chamber') if chamber == 'upper': report['upper_active_count'] += 1 elif chamber == 'lower': report['lower_active_count'] += 1 else: # TODO: track these? (executives) continue # decrement empty seats (if it goes negative, we have too many) district_seats[chamber][leg['district']] -= 1 for key in checked_keys: if leg.get(key): report[key] += 1 else: report['inactive_count'] += 1 for source in leg['sources']: duplicate_sources[source['url']] += 1 report['duplicate_sources'] = [] for url, n in duplicate_sources.items(): if n > 1: report['duplicate_sources'].append(url) # copy over seat issues into report report['overfilled_seats'] = [] report['vacant_seats'] = [] for chamber, chamber_seats in district_seats.items(): for seat, count in chamber_seats.items(): if count < 0: report['overfilled_seats'].append((chamber, seat, -count)) elif count > 0: report['vacant_seats'].append((chamber, seat, count)) return report
def scan_bills(abbr): duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for bill in db.bills.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[bill['session']] # chamber count & bill_types if bill['chamber'] == 'lower': session_d['lower_count'] += 1 elif bill['chamber'] == 'upper': session_d['upper_count'] += 1 for type in bill['type']: session_d['bill_types'][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill['actions']: date = action['date'] if date < last_date: session_d['actions_unsorted'].add(bill['_id']) session_d['action_count'] += 1 for type in action['type']: session_d['actions_per_type'][type] += 1 if 'other' in action['type']: other_actions[action['action']] += 1 session_d['actions_per_actor'][action['actor']] += 1 session_d['actions_per_month'][date.strftime('%Y-%m')] += 1 # handle no_actions bills if not bill['actions']: if bill['_id'] not in quality_exceptions['bills:no_actions']: session_d['actionless_count'] += 1 else: quality_exceptions['bills:no_actions'].remove(bill['_id']) # sponsors for sponsor in bill['sponsors']: session_d['_sponsor_count'] += 1 if sponsor.get('leg_id') or sponsor.get('committee_id'): session_d['_sponsors_with_id_count'] += 1 else: # keep list of unmatched sponsors session_d['unmatched_sponsors'].add( (term_for_session(abbr, bill['session']), bill['chamber'], sponsor['name']) ) session_d['sponsors_per_type'][sponsor['type']] += 1 # handle no sponsors bills if not bill['sponsors']: if bill['_id'] not in quality_exceptions['bills:no_sponsors']: session_d['sponsorless_count'] += 1 else: quality_exceptions['bills:no_sponsors'].remove(bill['_id']) # subjects for subj in bill.get('scraped_subjects', []): uncategorized_subjects[subj] += 1 if bill.get('subjects'): session_d['_subjects_count'] += 1 for subject in bill['subjects']: session_d['bills_per_subject'][subject] += 1 # sources for source in bill['sources']: duplicate_sources[source['url']] += 1 # versions if not bill['versions']: # total num of bills w/o versions if bill['_id'] not in quality_exceptions['bills:no_versions']: session_d['versionless_count'] += 1 else: quality_exceptions['bills:no_versions'].remove(bill['_id']) else: # total num of versions session_d['version_count'] += len(bill['versions']) for doc in bill['versions']: duplicate_versions[doc['url']] += 1 # TODO: add duplicate document detection back in? # Check for progress meter gaps. progress_meter_gaps = session_d['progress_meter_gaps'] action_dates = bill['action_dates'] bill_chamber = bill['chamber'] other_chamber = dict(lower='upper', upper='lower')[bill_chamber] # Check for bills that were signed but didn't pass both chambers. if bill['type'] == 'bill': if action_dates['signed']: if not action_dates['passed_upper']: progress_meter_gaps.add(bill['_id']) elif not action_dates['passed_lower']: progress_meter_gaps.add(bill['_id']) else: # Check for nonbills that were signed but didn't pass their # house of origin. if action_dates['signed']: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) if action_dates['passed_' + other_chamber]: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.items(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.items(): if n > 1: dup_source_urls.append(url) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.items(): if qes: logger.warning('unnecessary {0} exceptions for {1} bills: \n {2}' .format(qe_type, len(qes), '\n '.join(qes))) return {'duplicate_versions': dup_version_urls, 'duplicate_sources': dup_source_urls, 'other_actions': other_actions.items(), 'uncategorized_subjects': uncategorized_subjects.items(), 'sessions': sessions, 'progress_meter_gaps': [] }
def scan_bills(abbr): duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for bill in db.bills.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[bill['session']] # chamber count & bill_types if bill['chamber'] == 'lower': session_d['lower_count'] += 1 elif bill['chamber'] == 'upper': session_d['upper_count'] += 1 for type in bill['type']: session_d['bill_types'][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill['actions']: date = action['date'] if not date: continue if date < last_date: session_d['actions_unsorted'].add(bill['_id']) session_d['action_count'] += 1 for type in action['type']: session_d['actions_per_type'][type] += 1 if 'other' in action['type']: other_actions[action['action']] += 1 session_d['actions_per_actor'][action['actor']] += 1 session_d['actions_per_month'][date.strftime('%Y-%m')] += 1 # handle no_actions bills if not bill['actions']: if bill['_id'] not in quality_exceptions['bills:no_actions']: session_d['actionless_count'] += 1 else: quality_exceptions['bills:no_actions'].remove(bill['_id']) # sponsors for sponsor in bill['sponsors']: session_d['_sponsor_count'] += 1 if sponsor.get('leg_id') or sponsor.get('committee_id'): session_d['_sponsors_with_id_count'] += 1 else: # keep list of unmatched sponsors session_d['unmatched_sponsors'].add( (term_for_session(abbr, bill['session']), bill['chamber'], sponsor['name']) ) session_d['sponsors_per_type'][sponsor['type']] += 1 # handle no sponsors bills if not bill['sponsors']: if bill['_id'] not in quality_exceptions['bills:no_sponsors']: session_d['sponsorless_count'] += 1 else: quality_exceptions['bills:no_sponsors'].remove(bill['_id']) # subjects for subj in bill.get('scraped_subjects', []): uncategorized_subjects[subj] += 1 if bill.get('subjects'): session_d['_subjects_count'] += 1 for subject in bill['subjects']: session_d['bills_per_subject'][subject] += 1 # sources for source in bill['sources']: duplicate_sources[source['url']] += 1 # versions if not bill['versions']: # total num of bills w/o versions if bill['_id'] not in quality_exceptions['bills:no_versions']: session_d['versionless_count'] += 1 else: quality_exceptions['bills:no_versions'].remove(bill['_id']) else: # total num of versions session_d['version_count'] += len(bill['versions']) for doc in bill['versions']: duplicate_versions[doc['url']] += 1 # TODO: add duplicate document detection back in? # Check for progress meter gaps. progress_meter_gaps = session_d['progress_meter_gaps'] action_dates = bill['action_dates'] bill_chamber = bill['chamber'] other_chamber = dict(lower='upper', upper='lower')[bill_chamber] # Check for bills that were signed but didn't pass both chambers. if bill['type'] == 'bill': if action_dates['signed']: if not action_dates['passed_upper']: progress_meter_gaps.add(bill['_id']) elif not action_dates['passed_lower']: progress_meter_gaps.add(bill['_id']) else: # Check for nonbills that were signed but didn't pass their # house of origin. if action_dates['signed']: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) if action_dates['passed_' + other_chamber]: if not action_dates['passed_' + bill_chamber]: progress_meter_gaps.add(bill['_id']) dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.items(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.items(): if n > 1: dup_source_urls.append(url) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.items(): if qes: logger.warning('unnecessary {0} exceptions for {1} bills: \n {2}' .format(qe_type, len(qes), '\n '.join(qes))) return {'duplicate_versions': dup_version_urls, 'duplicate_sources': dup_source_urls, 'other_actions': other_actions.items(), 'uncategorized_subjects': uncategorized_subjects.items(), 'sessions': sessions, 'progress_meter_gaps': [] }
def scan_bills(abbr): metadata = db.metadata.find_one({'_id': abbr}) level = metadata['level'] duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) for bill in db.bills.find({'level': level, level: abbr}): session_d = sessions[bill['session']] # chamber count & bill_types if bill['chamber'] == 'lower': session_d['lower_count'] += 1 elif bill['chamber'] == 'upper': session_d['upper_count'] += 1 for type in bill['type']: session_d['bill_types'][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill['actions']: date = action['date'] if date < last_date: session_d['actions_unsorted'].add(bill['_id']) session_d['action_count'] += 1 for type in action['type']: session_d['actions_per_type'][type] += 1 if 'other' in action['type']: other_actions[action['action']] += 1 session_d['actions_per_actor'][action['actor']] += 1 session_d['actions_per_month'][date.strftime('%Y-%m')] += 1 if not bill['actions']: session_d['actionless_count'] += 1 # sponsors for sponsor in bill['sponsors']: session_d['_sponsor_count'] += 1 if sponsor.get('leg_id'): session_d['_sponsors_with_leg_id_count'] += 1 else: # keep missing leg_ids session_d['unmatched_leg_ids'].add( (term_for_session(abbr, bill['session']), bill['chamber'], sponsor['name']) ) session_d['sponsors_per_type'][sponsor['type']] += 1 if not bill['sponsors']: session_d['sponsorless_count'] += 1 # votes for vote in bill['votes']: session_d['vote_count'] += 1 if vote['passed']: session_d['_passed_vote_count'] += 1 session_d['votes_per_chamber'][vote['chamber']] += 1 if not vote.get('type'): logger.warning('vote is missing type on %s' % bill['_id']) continue session_d['votes_per_type'][vote.get('type')] += 1 if not vote.get('date'): logger.warning('vote is missing date on %s' % bill['_id']) continue session_d['votes_per_month'][vote['date'].strftime('%Y-%m')] += 1 # roll calls has_rollcalls = False for rc in (vote['yes_votes'] + vote['no_votes'] + vote['other_votes']): has_rollcalls = True session_d['_rollcall_count'] += 1 if rc.get('leg_id'): session_d['_rollcalls_with_leg_id_count'] += 1 else: # keep missing leg_ids session_d['unmatched_leg_ids'].add( (term_for_session(abbr, bill['session']), vote['chamber'], rc['name']) ) # check counts if any rollcalls are present if (has_rollcalls and (len(vote['yes_votes']) != vote['yes_count'] or len(vote['no_votes']) != vote['no_count'] or len(vote['other_votes']) != vote['other_count'])): session_d['bad_vote_counts'].add(bill['_id']) # subjects for subj in bill.get('scraped_subjects', []): uncategorized_subjects[subj] += 1 if bill.get('subjects'): session_d['_subjects_count'] += 1 for subject in bill['subjects']: session_d['bills_per_subject'][subject] += 1 # sources for source in bill['sources']: duplicate_sources[source['url']] += 1 # versions if not bill['versions']: # total num of bills w/o versions session_d['versionless_count'] += 1 else: # total num of versions session_d['version_count'] += len(bill['versions']) for doc in bill['versions']: duplicate_versions[doc['url']] += 1 # TODO: add a duplicate documents back in? dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.iteritems(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.iteritems(): if n > 1: dup_source_urls.append(url) return {'duplicate_versions': dup_version_urls, 'duplicate_sources': dup_source_urls, 'other_actions': other_actions.items(), 'uncategorized_subjects': uncategorized_subjects.items(), 'sessions': sessions, }
def scan_bills(abbr): duplicate_sources = defaultdict(int) duplicate_versions = defaultdict(int) other_actions = defaultdict(int) uncategorized_subjects = defaultdict(int) sessions = defaultdict(_bill_report_dict) # load exception data into sets of ids indexed by exception type quality_exceptions = get_quality_exceptions(abbr) for bill in db.bills.find({settings.LEVEL_FIELD: abbr}): session_d = sessions[bill["session"]] # chamber count & bill_types if bill["chamber"] == "lower": session_d["lower_count"] += 1 elif bill["chamber"] == "upper": session_d["upper_count"] += 1 for type in bill["type"]: session_d["bill_types"][type] += 1 update_common(bill, session_d) # actions last_date = datetime.datetime(1900, 1, 1) for action in bill["actions"]: date = action["date"] if date < last_date: session_d["actions_unsorted"].add(bill["_id"]) session_d["action_count"] += 1 for type in action["type"]: session_d["actions_per_type"][type] += 1 if "other" in action["type"]: other_actions[action["action"]] += 1 session_d["actions_per_actor"][action["actor"]] += 1 session_d["actions_per_month"][date.strftime("%Y-%m")] += 1 # handle no_actions bills if not bill["actions"]: if bill["_id"] not in quality_exceptions["bills:no_actions"]: session_d["actionless_count"] += 1 else: quality_exceptions["bills:no_actions"].remove(bill["_id"]) # sponsors for sponsor in bill["sponsors"]: session_d["_sponsor_count"] += 1 if sponsor.get("leg_id") or sponsor.get("committee_id"): session_d["_sponsors_with_id_count"] += 1 else: # keep list of unmatched sponsors session_d["unmatched_sponsors"].add( (term_for_session(abbr, bill["session"]), bill["chamber"], sponsor["name"]) ) session_d["sponsors_per_type"][sponsor["type"]] += 1 # handle no sponsors bills if not bill["sponsors"]: if bill["_id"] not in quality_exceptions["bills:no_sponsors"]: session_d["sponsorless_count"] += 1 else: quality_exceptions["bills:no_sponsors"].remove(bill["_id"]) # subjects for subj in bill.get("scraped_subjects", []): uncategorized_subjects[subj] += 1 if bill.get("subjects"): session_d["_subjects_count"] += 1 for subject in bill["subjects"]: session_d["bills_per_subject"][subject] += 1 # sources for source in bill["sources"]: duplicate_sources[source["url"]] += 1 # versions if not bill["versions"]: # total num of bills w/o versions if bill["_id"] not in quality_exceptions["bills:no_versions"]: session_d["versionless_count"] += 1 else: quality_exceptions["bills:no_versions"].remove(bill["_id"]) else: # total num of versions session_d["version_count"] += len(bill["versions"]) for doc in bill["versions"]: duplicate_versions[doc["url"]] += 1 # TODO: add duplicate document detection back in? dup_version_urls = [] dup_source_urls = [] for url, n in duplicate_versions.iteritems(): if n > 1: dup_version_urls.append(url) for url, n in duplicate_sources.iteritems(): if n > 1: dup_source_urls.append(url) # do logging of unnecessary exceptions for qe_type, qes in quality_exceptions.iteritems(): if qes: logger.warning( "unnecessary {0} exceptions for {1} bills: \n {2}".format(qe_type, len(qes), "\n ".join(qes)) ) return { "duplicate_versions": dup_version_urls, "duplicate_sources": dup_source_urls, "other_actions": other_actions.items(), "uncategorized_subjects": uncategorized_subjects.items(), "sessions": sessions, }