def handle(self, args): level = metadata(args.abbr)['level'] nm = NameMatcher(args.abbr, args.term, level) for t in metadata(args.abbr)['terms']: if t['name'] == args.term: sessions = t['sessions'] break else: print 'No such term for %s: %s' % (args.abbr, args.term) return for session in sessions: bills = db.bills.find({'level': level, level: args.abbr, 'session': session}) for bill in bills: for sponsor in bill['sponsors']: if not sponsor['leg_id']: sponsor['leg_id'] = nm.match(sponsor['name'], bill['chamber']) for vote in bill['votes']: for type in ('yes_votes', 'no_votes', 'other_votes'): for voter in vote[type]: if not voter['leg_id']: voter['leg_id'] = nm.match(voter['name'], vote['chamber']) db.bills.save(bill, safe=True)
def committees(request, abbr): meta = metadata(abbr) level = metadata(abbr)["level"] upper_coms = db.committees.find({"level": level, level: abbr.lower(), "chamber": "upper"}) lower_coms = db.committees.find({"level": level, level: abbr.lower(), "chamber": "lower"}) joint_coms = db.committees.find({"level": level, level: abbr.lower(), "chamber": "joint"}) upper_coms = sorted(upper_coms) lower_coms = sorted(lower_coms) joint_coms = sorted(joint_coms) return render_to_response( "billy/committees.html", {"upper_coms": upper_coms, "lower_coms": lower_coms, "joint_coms": joint_coms, "metadata": meta}, )
def events(request, abbr): meta = metadata(abbr) level = metadata(abbr)['level'] events = db.events.find({ 'level': level, level: abbr.lower() }).limit(20) # sort and get rid of old events. return render(request, 'billy/events.html', { 'events': ((e, e['_id']) for e in events), 'metadata': meta, })
def legislators(request, abbr): meta = metadata(abbr) level = metadata(abbr)["level"] upper_legs = db.legislators.find({"level": level, level: abbr.lower(), "active": True, "chamber": "upper"}) lower_legs = db.legislators.find({"level": level, level: abbr.lower(), "active": True, "chamber": "lower"}) inactive_legs = db.legislators.find({"level": level, level: abbr.lower(), "active": False}) upper_legs = sorted(upper_legs, key=keyfunc) lower_legs = sorted(lower_legs, key=keyfunc) inactive_legs = sorted(inactive_legs, key=lambda x: x["last_name"]) return render_to_response( "billy/legislators.html", {"upper_legs": upper_legs, "lower_legs": lower_legs, "inactive_legs": inactive_legs, "metadata": meta}, )
def upload(abbr, filename, type, s3_prefix='downloads/', use_cname=True): today = datetime.date.today() # build URL s3_bucket = settings.AWS_BUCKET s3_path = '%s%s-%02d-%02d-%s-%s.zip' % (s3_prefix, today.year, today.month, today.day, abbr, type) if use_cname: s3_url = 'http://%s/%s' % (s3_bucket, s3_path) else: s3_url = 'http://%s.s3.amazonaws.com/%s' % (s3_bucket, s3_path) # S3 upload s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET) bucket = s3conn.create_bucket(s3_bucket) k = Key(bucket) k.key = s3_path logging.info('beginning upload to %s' % s3_url) k.set_contents_from_filename(filename) k.set_acl('public-read') meta = metadata(abbr) meta['latest_%s_url' % type] = s3_url meta['latest_%s_date' % type] = datetime.datetime.utcnow() db.metadata.save(meta, safe=True) logging.info('uploaded to %s' % s3_url)
def district_slug(state, chamber, district): if not _district_to_census: _load_census_mapping() try: census_name = _district_to_census[(state, chamber, district)] except KeyError: if chamber == 'upper': census_name = 'State Senate District %s' % district else: if state == 'md': if district[-1].isalpha(): census_name = 'State Legislative Subdistrict ' + district else: census_name = 'State Legislative District ' + district meta = metadata(state) lower_name = meta['lower_chamber_name'] if lower_name.startswith('House'): census_name = "State House District %s" % district else: census_name = "Assembly District %s" % district if chamber == 'lower': prefix = 'sldl' else: prefix = 'sldu' return "%s-%s-%s" % (prefix, state.lower(), census_name.replace(' ', '-').lower())
def summary_index(request, abbr, session): meta = metadata(abbr) object_types = 'votes actions versions sponsors documents sources'.split() def build(context_set): summary = defaultdict(int) for c in context_set: for k, v in c.items(): summary[k] += 1 return dict(summary) def build_state(abbr): bills = list(db.bills.find({'state': abbr, 'session': session})) res = {} for k in object_types: res[k] = build(chain.from_iterable(map(itemgetter(k), bills))) res.update(bills=build(bills)) return res summary = build_state(abbr) return render(request, 'billy/summary_index.html', locals())
def bill(request, abbr, session, id): level = metadata(abbr)["level"] bill = db.bills.find_one({"level": level, level: abbr, "session": session, "bill_id": id.upper()}) if not bill: raise Http404 return render_to_response("billy/bill.html", {"bill": bill})
def event(request, abbr, event_id): meta = metadata(abbr) event = db.events.find_one(event_id) return render(request, 'billy/events.html', { 'event': event, 'metadata': meta, })
def read(self, request, abbr, session, bill_id, chamber=None): abbr = abbr.lower() level = metadata(abbr)['level'] query = {level: abbr, 'session': session, 'bill_id': bill_id} if chamber: query['chamber'] = chamber.lower() return db.bills.find_one(query, fields=_build_field_list(request))
def legislators(request, abbr): meta = metadata(abbr) report = db.reports.find_one({"_id": abbr}) if not report: raise Http404("No report was found for abbr %r." % abbr) else: report = report["legislators"] chambers = meta["chambers"].copy() for chamber_type, chamber in chambers.iteritems(): chamber["legislators"] = sorted( db.legislators.find({settings.LEVEL_FIELD: abbr.lower(), "active": True, "chamber": chamber_type}), key=keyfunc, ) inactive_legs = db.legislators.find({settings.LEVEL_FIELD: abbr.lower(), "active": {"$ne": True}}) inactive_legs = sorted(inactive_legs, key=lambda x: x["last_name"]) return render( request, "billy/legislators.html", { "chambers": chambers.values(), "inactive_legs": inactive_legs, "metadata": meta, "overfilled": report["overfilled_seats"], "vacant": report["vacant_seats"], }, )
def district_slug(state, chamber, district): if not _district_to_census: _load_census_mapping() try: census_name = _district_to_census[(state, chamber, district)] except KeyError: if chamber == "upper": census_name = "State Senate District %s" % district else: if state == "md": if district[-1].isalpha(): census_name = "State Legislative Subdistrict " + district else: census_name = "State Legislative District " + district meta = metadata(state) lower_name = meta["lower_chamber_name"] if lower_name.startswith("House"): census_name = "State House District %s" % district else: census_name = "Assembly District %s" % district if chamber == "lower": prefix = "sldl" else: prefix = "sldu" return "%s-%s-%s" % (prefix, state.lower(), census_name.replace(" ", "-").lower())
def legislator_edit(request, id): leg = db.legislators.find_one({"_all_ids": id}) if not leg: raise Http404("No legislators found for id %r." % id) meta = metadata(leg[settings.LEVEL_FIELD]) return render( request, "billy/legislator_edit.html", { "leg": leg, "metadata": meta, "locked": leg.get("_locked_fields", []), "fields": [ "last_name", "full_name", "first_name", "middle_name", "nickname", "suffixes", "email", "transparencydata_id", "photo_url", "url", ], }, )
def object_json(request, collection, _id): re_attr = re.compile(r'^ "(.{1,100})":', re.M) model_obj = getattr(mdb, collection).find_one(_id) if model_obj is None: msg = 'No object found with id %r in collection %r' raise Http404(msg % (_id, collection)) obj = OrderedDict(sorted(model_obj.items())) obj_id = obj['_id'] obj_json = json.dumps(obj, cls=JSONEncoderPlus, indent=4) def subfunc(m, tmpl=' <a name="%s">%s:</a>'): val = m.group(1) return tmpl % (val, val) for k in obj: obj_json = re_attr.sub(subfunc, obj_json) tmpl = '<a href="{0}">{0}</a>' obj_json = re.sub('"(http://.+?)"', lambda m: tmpl.format(*m.groups()), obj_json) if obj['_type'] != 'metadata': mdata = metadata(obj[settings.LEVEL_FIELD]) else: mdata = obj return render(request, 'billy/object_json.html', dict( obj=obj, obj_id=obj_id, obj_json=obj_json, collection=collection, metadata=mdata, model_obj=model_obj ))
def match_names(state, term): nm = NameMatcher(state, term) for t in metadata(state)['terms']: if t['name'] == term: sessions = t['sessions'] break else: print 'No such term for %s: %s' % (state, term) return for session in sessions: bills = db.bills.find({'state': state, 'session': session}) for bill in bills: for sponsor in bill['sponsors']: if not sponsor['leg_id']: sponsor['leg_id'] = nm.match(sponsor['name'], bill['chamber']) for vote in bill['votes']: for type in ('yes_votes', 'no_votes', 'other_votes'): for voter in vote[type]: if not voter['leg_id']: voter['leg_id'] = nm.match(voter['name'], vote['chamber']) db.bills.save(bill, safe=True)
def bill_list(request, abbr): meta = metadata(abbr) if not meta: raise Http404('No metadata found for abbreviation %r' % abbr) if 'version_url' in request.GET: version_url = request.GET.get('version_url') spec = {'versions.url': version_url} exceptions = [] else: limit = request.GET.get('limit', '') exceptions = get_quality_exceptions(abbr)['bills:' + limit] spec = _bill_spec(meta, limit) query_text = repr(spec) if exceptions: spec['_id'] = {'$nin': list(exceptions)} query_text += ' (excluding {0} exceptions)'.format(len(exceptions)) bills = list(mdb.bills.find(spec)) bill_ids = [b['_id'] for b in bills if b['_id'] not in exceptions] context = {'metadata': meta, 'query_text': query_text, 'bills': bills, 'bill_ids': bill_ids} return render(request, 'billy/bill_list.html', context)
def legislators(request, abbr): meta = metadata(abbr) report = db.reports.find_one({'_id': abbr}) if not report: raise Http404('No report was found for abbr %r.' % abbr) else: report = report['legislators'] chambers = meta['chambers'].copy() for chamber_type, chamber in chambers.iteritems(): chamber['legislators'] = sorted(db.legislators.find( {settings.LEVEL_FIELD: abbr.lower(), 'active': True, 'chamber': chamber_type}), key=keyfunc) inactive_legs = db.legislators.find({settings.LEVEL_FIELD: abbr.lower(), 'active': {'$ne': True}}) inactive_legs = sorted(inactive_legs, key=lambda x: x['last_name']) return render(request, 'billy/legislators.html', { 'chambers': chambers.values(), 'inactive_legs': inactive_legs, 'metadata': meta, 'overfilled': report['overfilled_seats'], 'vacant': report['vacant_seats'], })
def subjects(request, abbr): meta = metadata(abbr) subjects = db.subjects.find({ 'abbr': abbr.lower() }) report = db.reports.find_one({'_id': abbr}) uc_s = report['bills']['uncategorized_subjects'] uc_subjects = [] c_subjects = {} for sub in subjects: c_subjects[sub['remote']] = sub subjects.rewind() uniqid = 1 for sub in uc_s: if not sub[0] in c_subjects: sub.append(uniqid) uniqid += 1 uc_subjects.append(sub) normalized_subjects = settings.BILLY_SUBJECTS[:] normalized_subjects.append("IGNORED") return render(request, 'billy/subjects.html', { 'metadata': meta, 'subjects': subjects, 'normalized_subjects': normalized_subjects, 'uncat_subjects': uc_subjects })
def summary_object_key_vals( request, abbr, urlencode=urllib.urlencode, collections=("bills", "legislators", "committees")): meta = metadata(abbr) session = request.GET['session'] object_type = request.GET['object_type'] key = request.GET['key'] val = request.GET['val'] try: val = json.loads(val) except ValueError: pass spec = {settings.LEVEL_FIELD: abbr, 'session': session} fields = {'_id': 1} if object_type in collections: spec.update({key: val}) objects = getattr(db, object_type).find(spec, fields) objects = ((object_type, obj['_id']) for obj in objects) else: spec.update({'.'.join([object_type, key]): val}) objects = db.bills.find(spec, fields) objects = (('bills', obj['_id']) for obj in objects) spec = json.dumps(spec, cls=JSONEncoderPlus, indent=4) return render(request, 'billy/summary_object_keyvals.html', dict( object_type=object_type, objects=objects, spec=spec, meta=meta ))
def upload(abbr, filename, type, s3_prefix="downloads/", use_cname=True): today = datetime.date.today() # build URL s3_bucket = settings.AWS_BUCKET s3_path = "%s%s-%02d-%02d-%s-%s.zip" % (s3_prefix, today.year, today.month, today.day, abbr, type) if use_cname: s3_url = "http://%s/%s" % (s3_bucket, s3_path) else: s3_url = "http://%s.s3.amazonaws.com/%s" % (s3_bucket, s3_path) # S3 upload s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET, calling_format=OrdinaryCallingFormat()) bucket = s3conn.create_bucket(s3_bucket) k = Key(bucket) k.key = s3_path logging.info("beginning upload to %s" % s3_url) k.set_contents_from_filename(filename) k.set_acl("public-read") meta = metadata(abbr) meta["latest_%s_url" % type] = s3_url meta["latest_%s_date" % type] = datetime.datetime.utcnow() db.metadata.save(meta, safe=True) logging.info("uploaded to %s" % s3_url)
def handle(self, args): for t in metadata(args.abbr)['terms']: if t['name'] == args.term: sessions = t['sessions'] break else: print 'No such term for %s: %s' % (args.abbr, args.term) return for session in sessions: bills = db.bills.find({settings.LEVEL_FIELD: args.abbr, 'session': session}) for bill in bills: match_sponsor_ids(args.abbr, bill) db.bills.save(bill, safe=True) votes = db.votes.find({settings.LEVEL_FIELD: args.abbr, 'session': session}) for vote in votes: vote['_voters'] = [] for type in ('yes_votes', 'no_votes', 'other_votes'): for voter in vote[type]: voter['leg_id'] = get_legislator_id(args.abbr, vote['session'], vote['chamber'], voter['name']) if voter['leg_id']: vote['_voters'].append(voter['leg_id']) db.votes.save(vote, safe=True)
def retire_legislator(request, id): legislator = db.legislators.find_one({"_all_ids": id}) if not legislator: raise Http404("No legislators found for id %r." % id) # retire a legislator abbr = legislator[settings.LEVEL_FIELD] meta = metadata(abbr) term = meta["terms"][-1]["name"] cur_role = legislator["roles"][0] if cur_role["type"] != "member" or cur_role["term"] != term: raise ValueError("member missing role for %s" % term) end_date = request.POST.get("end_date") if not end_date: alert = dict(type="warning", title="Warning!", message="missing end_date for retirement") else: cur_role["end_date"] = datetime.datetime.strptime(end_date, "%Y-%m-%d") db.legislators.save(legislator, safe=True) deactivate_legislators(term, abbr) alert = dict( type="success", title="Retired Legislator", message="{0} was successfully retired.".format(legislator["full_name"]), ) return render(request, "billy/legislator_edit.html", {"leg": legislator, "metadata": meta, "alert": alert})
def read(self, request, abbr, session, bill_id, chamber=None): abbr = abbr.lower() level = metadata(abbr)["level"] query = {level: abbr, "session": session, "bill_id": bill_id} if chamber: query["chamber"] = chamber.lower() return db.bills.find_one(query, fields=_build_field_list(request))
def summary_object_key_vals( request, abbr, urlencode=urllib.urlencode, collections=("bills", "legislators", "committees") ): meta = metadata(abbr) session = request.GET["session"] object_type = request.GET["object_type"] key = request.GET["key"] val = request.GET["val"] try: val = json.loads(val) except ValueError: pass spec = {settings.LEVEL_FIELD: abbr, "session": session} fields = {"_id": 1} if object_type in collections: spec.update({key: val}) objects = getattr(db, object_type).find(spec, fields) objects = ((object_type, obj["_id"]) for obj in objects) else: spec.update({".".join([object_type, key]): val}) objects = db.bills.find(spec, fields) objects = (("bills", obj["_id"]) for obj in objects) spec = json.dumps(spec, cls=JSONEncoderPlus, indent=4) return render( request, "billy/summary_object_keyvals.html", dict(object_type=object_type, objects=objects, spec=spec, meta=meta), )
def state_index(request, state): meta = metadata(state) if not meta: raise Http404 context = {} context["metadata"] = SortedDict(sorted(meta.items())) # types latest_session = meta["terms"][-1]["sessions"][-1] context["session"] = latest_session context.update(_bill_stats_for_session(state, latest_session)) # legislators context["upper_leg_count"] = db.legislators.find({"state": state, "active": True, "chamber": "upper"}).count() context["lower_leg_count"] = db.legislators.find({"state": state, "active": True, "chamber": "lower"}).count() context["lower_leg_count"] = db.legislators.find({"state": state, "active": True, "chamber": "lower"}).count() context["leg_count"] = context["upper_leg_count"] + context["lower_leg_count"] context["inactive_leg_count"] = db.legislators.find({"state": state, "active": False}).count() context["ns_leg_count"] = db.legislators.find({"state": state, "active": True, "sources": {"$size": 0}}).count() context.update(_get_state_leg_id_stats(state)) # committees context["upper_com_count"] = db.committees.find({"state": state, "chamber": "upper"}).count() context["lower_com_count"] = db.committees.find({"state": state, "chamber": "lower"}).count() context["joint_com_count"] = db.committees.find({"state": state, "chamber": "joint"}).count() context["com_count"] = context["upper_com_count"] + context["lower_com_count"] + context["joint_com_count"] context["ns_com_count"] = db.committees.find({"state": state, "sources": {"$size": 0}}).count() return render_to_response("state_index.html", context)
def random_bill(request, abbr): meta = metadata(abbr) if not meta: raise Http404 spec = _bill_spec(meta, request.GET.get('limit', '')) bills = db.bills.find(spec) count = bills.count() if count: bill = bills[random.randint(0, count - 1)] warning = None else: bill = None warning = 'No bills matching the criteria were found.' try: bill_id = bill['_id'] except TypeError: # Bill was none (see above). bill_id = None context = {'bill': bill, 'id': bill_id, 'random': True, 'state': abbr.lower(), 'warning': warning, 'metadata': meta} return render(request, 'billy/bill.html', context)
def retire_legislator(request, id): legislator = db.legislators.find_one({'_all_ids': id}) if not legislator: raise Http404('No legislators found for id %r.' % id) # retire a legislator abbr = legislator[settings.LEVEL_FIELD] meta = metadata(abbr) term = meta['terms'][-1]['name'] cur_role = legislator['roles'][0] if cur_role['type'] != 'member' or cur_role['term'] != term: raise ValueError('member missing role for %s' % term) end_date = request.POST.get('end_date') if not end_date: alert = dict(type='warning', title='Warning!', message='missing end_date for retirement') else: cur_role['end_date'] = datetime.datetime.strptime(end_date, '%Y-%m-%d') db.legislators.save(legislator, safe=True) deactivate_legislators(term, abbr) alert = dict(type='success', title='Retired Legislator', message='{0} was successfully retired.'.format( legislator['full_name'])) return render(request, 'billy/legislator_edit.html', {'leg': legislator, 'metadata': meta, 'alert': alert})
def run_detail_list(request, abbr): try: allruns = db.billy_runs.find({"abbr": abbr} ).sort("scraped.started", direction=pymongo.DESCENDING)[:25] runlog = allruns[0] except IndexError as e: return render(request, 'billy/run_detail.html', { "warning": "No records exist. Fetch returned a(n) %s" % ( e.__class__.__name__)}) # pre-process goodies for the template runlog['scraped']['t_delta'] = ( runlog['scraped']['ended'] - runlog['scraped']['started']) for entry in runlog['scraped']['run_record']: if not "exception" in entry: entry['t_delta'] = ( entry['end_time'] - entry['start_time']) context = {"runlog": runlog, "allruns": allruns, "abbr": abbr, "metadata": metadata(abbr)} if "failure" in runlog: context["alert"] = dict(type='error', title="Exception during Execution", message=""" This build had an exception during it's execution. Please check below for the exception and error message. """) return render(request, 'billy/run_detail_list.html', context)
def handle(self, args): for t in metadata(args.abbr)["terms"]: if t["name"] == args.term: sessions = t["sessions"] break else: print("No such term for %s: %s" % (args.abbr, args.term)) return for session in sessions: bills = db.bills.find({settings.LEVEL_FIELD: args.abbr, "session": session}) for bill in bills: match_sponsor_ids(args.abbr, bill) db.bills.save(bill, safe=True) votes = db.votes.find({settings.LEVEL_FIELD: args.abbr, "session": session}) for vote in votes: vote["_voters"] = [] for type in ("yes_votes", "no_votes", "other_votes"): for voter in vote[type]: voter["leg_id"] = get_legislator_id(args.abbr, vote["session"], vote["chamber"], voter["name"]) if voter["leg_id"]: vote["_voters"].append(voter["leg_id"]) db.votes.save(vote, safe=True)
def dump(self, abbr, filename, validate, schema_dir): scraper = scrapelib.Scraper(requests_per_minute=600, follow_robots=False) level = metadata(abbr)['level'] zip = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) if not schema_dir: cwd = os.path.split(__file__)[0] schema_dir = os.path.join(cwd, "../schemas/api/") with open(os.path.join(schema_dir, "bill.json")) as f: bill_schema = json.load(f) with open(os.path.join(schema_dir, "legislator.json")) as f: legislator_schema = json.load(f) with open(os.path.join(schema_dir, "committee.json")) as f: committee_schema = json.load(f) logging.info('exporting %s bills...' % abbr) for bill in db.bills.find({'level': level, level: abbr}, timeout=False): path = "bills/%s/%s/%s/%s" % (abbr, bill['session'], bill['chamber'], bill['bill_id']) url = api_url(path) response = scraper.urlopen(url) if validate: validictory.validate(json.loads(response), bill_schema, validator_cls=APIValidator) zip.writestr(path, response) logging.info('exporting %s legislators...' % abbr) for legislator in db.legislators.find({'level': level, level: abbr}): path = 'legislators/%s' % legislator['_id'] url = api_url(path) response = scraper.urlopen(url) if validate: validictory.validate(json.loads(response), legislator_schema, validator_cls=APIValidator) zip.writestr(path, response) logging.info('exporting %s committees...' % abbr) for committee in db.committees.find({'level': level, level: abbr}): path = 'committees/%s' % committee['_id'] url = api_url(path) response = scraper.urlopen(url) if validate: validictory.validate(json.loads(response), committee_schema, validator_cls=APIValidator) zip.writestr(path, response) zip.close()
def overview(request, abbr): meta = metadata(abbr) if not meta: raise Http404 context = {} context['metadata'] = SortedDict(sorted(meta.items())) # types latest_session = meta['terms'][-1]['sessions'][-1] context['session'] = latest_session level = meta['level'] context.update(_bill_stats_for_session(level, abbr, latest_session)) # legislators context['upper_leg_count'] = db.legislators.find({'level': level, level: abbr, 'active': True, 'chamber': 'upper'}).count() context['lower_leg_count'] = db.legislators.find({'level': level, level: abbr, 'active': True, 'chamber': 'lower'}).count() context['leg_count'] = (context['upper_leg_count'] + context['lower_leg_count']) context['inactive_leg_count'] = db.legislators.find({'level': level, level: abbr, 'active': False}).count() context['ns_leg_count'] = db.legislators.find({'level': level, level: abbr, 'active': True, 'sources': {'$size': 0}}).count() context.update(_get_leg_id_stats(level, abbr)) # committees context['upper_com_count'] = db.committees.find({'level': level, level: abbr, 'chamber': 'upper'}).count() context['lower_com_count'] = db.committees.find({'level': level, level: abbr, 'chamber': 'lower'}).count() context['joint_com_count'] = db.committees.find({'level': level, level: abbr, 'chamber': 'joint'}).count() context['com_count'] = (context['upper_com_count'] + context['lower_com_count'] + context['joint_com_count']) context['ns_com_count'] = db.committees.find({'level': level, level: abbr, 'sources': {'$size': 0}}).count() return render_to_response('billy/state_index.html', context)
def committees(request, abbr): meta = metadata(abbr) level = metadata(abbr)['level'] upper_coms = db.committees.find({'level': level, level: abbr.lower(), 'chamber': 'upper'}) lower_coms = db.committees.find({'level': level, level: abbr.lower(), 'chamber': 'lower'}) joint_coms = db.committees.find({'level': level, level: abbr.lower(), 'chamber': 'joint'}) upper_coms = sorted(upper_coms) lower_coms = sorted(lower_coms) joint_coms = sorted(joint_coms) return render_to_response('billy/committees.html', { 'upper_coms': upper_coms, 'lower_coms': lower_coms, 'joint_coms': joint_coms, 'metadata': meta, })
def legislators(request, abbr): meta = metadata(abbr) level = metadata(abbr)['level'] upper_legs = db.legislators.find({'level': level, level: abbr.lower(), 'active': True, 'chamber': 'upper'}) lower_legs = db.legislators.find({'level': level, level: abbr.lower(), 'active': True, 'chamber': 'lower'}) inactive_legs = db.legislators.find({'level': level, level: abbr.lower(), 'active': False}) upper_legs = sorted(upper_legs, key=keyfunc) lower_legs = sorted(lower_legs, key=keyfunc) inactive_legs = sorted(inactive_legs, key=lambda x: x['last_name']) return render_to_response('billy/legislators.html', { 'upper_legs': upper_legs, 'lower_legs': lower_legs, 'inactive_legs': inactive_legs, 'metadata': meta, })
def random_bill(request, state): meta = metadata(state) if not meta: raise Http404 latest_session = meta['terms'][-1]['sessions'][-1] spec = {'state': state.lower(), 'session': latest_session} count = db.bills.find(spec).count() bill = db.bills.find(spec)[random.randint(0, count - 1)] return render_to_response('billy/bill.html', {'bill': bill})
def categorize_bills(self, latest_term_only=False): meta = metadata(self.abbr) spec = {meta['level']: self.abbr} # process just the sessions from the latest term if latest_term_only: sessions = meta['terms'][-1]['sessions'] spec['session'] = {'$in': sessions} for bill in db.bills.find(spec): self.categorize_bill(bill) db.bills.save(bill, safe=True)
def events(request, abbr): meta = metadata(abbr) events = db.events.find({settings.LEVEL_FIELD: abbr.lower()}, sort=[('when', pymongo.DESCENDING)]).limit(20) # sort and get rid of old events. return render(request, 'billy/events.html', { 'events': ((e, e['_id']) for e in events), 'metadata': meta, })
def dump(self, abbr, filename): level = metadata(abbr)['level'] files = [] files += dump_legislator_csvs(level, abbr) files += dump_bill_csvs(level, abbr) zfile = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) for fname in files: arcname = fname.split('/')[-1] zfile.write(fname, arcname=arcname) os.remove(fname)
def retire_legislator(leg_id, date): legislator = db.legislators.find_one({'leg_id':leg_id}) term = metadata(legislator['state'])['terms'][-1]['name'] cur_role = legislator['roles'][0] if cur_role['type'] != 'member' or cur_role['term'] != term: raise ValueError('member missing role for %s' % term) date = datetime.datetime.strptime(date, '%Y-%m-%d') cur_role['end_date'] = date db.legislators.save(legislator, safe=True) print('deactivating legislator {0}'.format(leg_id)) deactivate_legislators(legislator['state'], term)
def bill(request, abbr, session, id): level = metadata(abbr)['level'] bill = db.bills.find_one({ 'level': level, level: abbr, 'session': session, 'bill_id': id.upper() }) if not bill: raise Http404 return render_to_response('billy/bill.html', {'bill': bill})
def categorize_subjects(abbr, data_dir, process_all): categorizer = defaultdict(set) categories_per_bill = defaultdict(int) uncategorized = defaultdict(int) filename = os.path.join(data_dir, abbr + '.csv') try: reader = csv.reader(open(filename)) # build category mapping for n, row in enumerate(reader): for subj in row[1:]: if subj: subj = subj.strip() if subj not in settings.BILLY_SUBJECTS: raise Exception('invalid subject %s (%s - %s)' % (subj, row[0], n)) categorizer[row[0]].add(subj) except IOError: print 'Proceeding without', filename meta = metadata(abbr) spec = {meta['level']: abbr} if not process_all: sessions = meta['terms'][-1]['sessions'] spec['session'] = {'$in': sessions} for bill in db.bills.find(spec): subjects = set() for ss in bill.get('scraped_subjects', []): categories = categorizer[ss] if not categories: uncategorized[ss] += 1 subjects.update(categories) bill['subjects'] = list(subjects) # increment # of bills with # of subjects categories_per_bill[len(subjects)] += 1 db.bills.save(bill, safe=True) print 'Categories per bill' print '-------------------' for ncats, total in sorted(categories_per_bill.items()): print '%s categories: %s bills' % (ncats, total) print 'Uncategorized' print '-------------' subjects_i = sorted([(v, k) for k, v in uncategorized.items()], reverse=True) for n, category in subjects_i: print '%s,"%s"' % (n, category.encode('ascii', 'replace'))
def quality_exception_remove(request, abbr, obj): meta = metadata(abbr) errors = [] db.quality_exceptions.remove({"_id": ObjectId(obj)}) if errors != []: return render(request, 'billy/quality_exception_error.html', { 'metadata': meta, 'errors': errors }) return redirect('quality_exceptions', abbr)
def _csv_response(request, csv_name, columns, data, abbr): if 'csv' in request.REQUEST: resp = HttpResponse(mimetype="text/plain") resp['Content-Disposition'] = 'attachment; filename=%s_%s.csv' % ( abbr, csv_name) out = unicodecsv.writer(resp) for item in data: out.writerow(item) return resp else: return render(request, 'billy/generic_table.html', {'columns': columns, 'data': data, 'metadata': metadata(abbr)})
def quality_exceptions(request, abbr): meta = metadata(abbr) exceptions = db.quality_exceptions.find({'abbr': abbr.lower() }) # Natural sort is fine extypes = QUALITY_EXCEPTIONS return render(request, 'billy/quality_exceptions.html', { 'metadata': meta, 'exceptions': exceptions, "extypes": extypes })
def read(self, request, abbr, session=None, chamber=None): abbr = abbr.lower() level = metadata(abbr)['level'] spec = {level: abbr} if session: spec['session'] = session if chamber: chamber = chamber.lower() spec['chamber'] = _chamber_aliases.get(chamber, chamber) result = {} for subject in settings.BILLY_SUBJECTS: count = db.bills.find(dict(spec, subjects=subject)).count() result[subject] = count return result
def committees(request, abbr): meta = metadata(abbr) chambers = meta['chambers'].copy() chambers['joint'] = {'name': 'Joint'} for chamber_type, chamber in chambers.iteritems(): chamber['committees'] = sorted(db.committees.find( {settings.LEVEL_FIELD: abbr.lower(), 'chamber': chamber_type})) return render(request, 'billy/committees.html', { 'chambers': chambers.values(), 'metadata': meta, })
def bills(request, state): meta = metadata(state) if not meta: raise Http404 sessions = [] for term in meta['terms']: for session in term['sessions']: stats = _bill_stats_for_session(state, session) stats['session'] = session sessions.append(stats) return render_to_response('billy/bills.html', {'sessions': sessions, 'metadata': meta})
def progress_meter_gaps(request, abbr): '''List all bills that have been signed but haven't passed their house of origin. See billy.importers.bills for the actual conditions applied. There are a few. ''' meta = metadata(abbr) if not meta: raise Http404('No metadata found for abbreviation %r' % abbr) report = mdb.reports.find_one({'_id': abbr}) ids = report['bills']['progress_meter_gaps'] bills = db.bills.find({'_id': {'$in': ids}}) context = {'metadata': meta, 'bill_ids': ids, 'bills': bills, 'query_text': 'progress meter gaps exist'} return render(request, 'billy/bill_list.html', context)
def read(self, request, abbr=None, session=None, bill_id=None, chamber=None, billy_bill_id=None): if billy_bill_id: query = {'_id': billy_bill_id} else: abbr = abbr.lower() level = metadata(abbr)['level'] query = {level: abbr, 'session': session, 'bill_id': bill_id} if chamber: query['chamber'] = chamber.lower() return find_bill(query, fields=_build_field_list(request))
def edit(request, abbr): meta = metadata(abbr) report = db.reports.find_one({'_id': abbr}) legs = list(db.legislators.find({settings.LEVEL_FIELD: abbr})) committees = list(db.committees.find({settings.LEVEL_FIELD: abbr})) matchers = db.manual.name_matchers.find({"abbr": abbr}) sorted_ids = {} known_objs = {} seen_names = set() for leg in legs: known_objs[leg['_id']] = leg for com in committees: known_objs[com['_id']] = com for item in matchers: sorted_ids[item['_id']] = item seen_names.add((item['term'], item['chamber'], item['name'])) if not report: raise Http404('No reports found for abbreviation %r.' % abbr) bill_unmatched = set( tuple(i + ['sponsor']) for i in report['bills']['unmatched_sponsors']) vote_unmatched = set( tuple(i + ['vote']) for i in report['votes']['unmatched_voters']) com_unmatched = set( tuple(i + ['committee']) for i in report['committees']['unmatched_leg_ids']) combined_sets = bill_unmatched | vote_unmatched | com_unmatched unmatched_ids = [] for term, chamber, name, id_type in combined_sets: if (term, chamber, name) in seen_names: continue unmatched_ids.append((term, chamber, name, id_type)) return render( request, 'billy/matching.html', { "metadata": meta, "unmatched_ids": unmatched_ids, "all_ids": sorted_ids, "committees": committees, "known_objs": known_objs, "legs": legs })
def committees(request, state): upper_coms = db.committees.find({'state': state.lower(), 'chamber': 'upper'}) lower_coms = db.committees.find({'state': state.lower(), 'chamber': 'lower'}) joint_coms = db.committees.find({'state': state.lower(), 'chamber': 'joint'}) upper_coms = sorted(upper_coms) lower_coms = sorted(lower_coms) joint_coms = sorted(joint_coms) return render_to_response('billy/committees.html', { 'upper_coms': upper_coms, 'lower_coms': lower_coms, 'joint_coms': joint_coms, 'metadata': metadata(state) })
def metadata_json(request, abbr): re_attr = re.compile(r'^ "(.{1,100})":', re.M) obj = metadata(abbr) obj_json = json.dumps(obj, indent=4, cls=JSONDateEncoder) def subfunc(m, tmpl=' <a name="%s">%s:</a>'): val = m.group(1) return tmpl % (val, val) for k in obj: obj_json = re_attr.sub(subfunc, obj_json) tmpl = '<a href="{0}">{0}</a>' obj_json = re.sub('"(http://.+?)"', lambda m: tmpl.format(*m.groups()), obj_json) context = {'metadata': obj, 'keys': sorted(obj), 'metadata_json': obj_json} return render(request, 'billy/metadata_json.html', context)
def summary_object_key(request, abbr, urlencode=urllib.urlencode, collections=("bills", "legislators", "committees"), dumps=json.dumps, Decimal=decimal.Decimal): meta = metadata(abbr) session = request.GET['session'] object_type = request.GET['object_type'] key = request.GET['key'] spec = {'state': abbr, 'session': session} if object_type in collections: collection = getattr(db, object_type) fields_key = key objs = collection.find(spec, {fields_key: 1}) objs = imap(itemgetter(key), objs) else: collection = db.bills fields_key = '%s.%s' % (object_type, key) objs = collection.find(spec, {fields_key: 1}) objs = imap(itemgetter(object_type), objs) def get_objects(objs): for _list in objs: for _obj in _list: try: yield _obj[key] except KeyError: pass objs = get_objects(objs) objs = (dumps(obj, cls=JSONDateEncoder) for obj in objs) counter = defaultdict(Decimal) for obj in objs: counter[obj] += 1 params = lambda val: urlencode( dict(object_type=object_type, key=key, val=val, session=session)) total = len(counter) objs = sorted(counter, key=counter.get, reverse=True) objs = ((obj, counter[obj], counter[obj] / total, params(obj)) for obj in objs) return render(request, 'billy/summary_object_key.html', locals())
def bill(request, abbr, session, id): meta = metadata(abbr) level = meta['level'] bill = find_bill({ 'level': level, level: abbr, 'session': session, 'bill_id': id.upper() }) if not bill: raise Http404 return render(request, 'billy/bill.html', { 'bill': bill, 'metadata': meta, 'id': bill['_id'] })
def categorize_subjects(state, data_dir, process_all): categorizer = defaultdict(set) categories_per_bill = defaultdict(int) uncategorized = defaultdict(int) reader = csv.reader(open(os.path.join(data_dir, state+'.csv'))) # build category mapping for row in reader: for subj in row[1:]: if subj: if subj not in settings.BILLY_SUBJECTS: raise Exception('invalid subject %s (%s)' % (subj, row[0])) categorizer[row[0]].add(subj) spec = {'state':state} if not process_all: sessions = metadata(state)['terms'][-1]['sessions'] spec['session'] = {'$in': sessions} for bill in db.bills.find(spec): subjects = set() for ss in bill.get('scraped_subjects', []): categories = categorizer[ss] if not categories: uncategorized[ss] += 1 subjects.update(categories) bill['subjects'] = list(subjects) # increment # of bills with # of subjects categories_per_bill[len(subjects)] += 1 db.bills.save(bill) print 'Categories per bill' print '-------------------' for ncats, total in sorted(categories_per_bill.items()): print '%s categories: %s bills' % (ncats, total) print 'Uncategorized' print '-------------' subjects_i = sorted([(v,k) for k,v in uncategorized.items()], reverse=True) for n, category in subjects_i: print '%s,%s' % (n, category)
def bill_list(request, abbr): meta = metadata(abbr) if not meta: raise Http404 level = meta['level'] spec = {'level': level, level: abbr} if 'version_url' in request.GET: version_url = request.GET.get('version_url') spec['versions.url'] = version_url bills = db.bills.find(spec) query_text = repr(spec) context = {'metadata': meta, 'query_text': query_text, 'bills': bills} return render(request, 'billy/bill_list.html', context)
def legislators(request, state): upper_legs = db.legislators.find({'state': state.lower(), 'active': True, 'chamber': 'upper'}) lower_legs = db.legislators.find({'state': state.lower(), 'active': True, 'chamber': 'lower'}) inactive_legs = db.legislators.find({'state': state.lower(), 'active': False}) upper_legs = sorted(upper_legs, key=keyfunc) lower_legs = sorted(lower_legs, key=keyfunc) inactive_legs = sorted(inactive_legs, key=lambda x: x['last_name']) return render_to_response('billy/legislators.html', { 'upper_legs': upper_legs, 'lower_legs': lower_legs, 'inactive_legs': inactive_legs, 'metadata': metadata(state) })
class BasePageyThing(object): metadata = metadata('ny') chamber_name = None def __init__(self, scraper, session, chamber, details): (senate_url, assembly_url, bill_chamber, bill_type, bill_id, title, bill_id_parts) = details self.scraper = scraper self.session = session self.chamber = chamber self.data = {} self.bill = Bill(session, bill_chamber, bill_id, title, type=bill_type) self.term = term_for_session('ny', session) for data in self.metadata['terms']: if session in data['sessions']: self.termdata = data self.term_start_year = data['start_year'] self.assembly_url = assembly_url self.senate_url = senate_url self.bill_chamber = bill_chamber self.bill_type = bill_type self.bill_id = bill_id self.title = title self.letter, self.number, self.version = bill_id_parts self.urls = Urls(scraper=self.scraper, urls={ 'assembly': assembly_url, 'senate': senate_url }) def build(self): '''Run all the build_* functions. ''' for name, member in inspect.getmembers(self): if inspect.ismethod(member): if name.startswith('build_'): key = re.sub(r'^build_', '', name) member()
def dump_csv(abbr, filename, nozip): level = metadata(abbr)['level'] files = [] files += dump_legislator_csvs(level, abbr) files += dump_bill_csvs(level, abbr) if not nozip: zfile = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) for fname in files: arcname = fname.split('/')[-1] zfile.write(fname, arcname=arcname) os.remove(fname) else: dirname = abbr + '_csv' try: os.makedirs(dirname) except OSError: pass for fname in files: shutil.move(fname, dirname)
def object_json(request, collection, _id): re_attr = re.compile(r'^ "(.{1,100})":', re.M) model_obj = getattr(mdb, collection).find_one(_id) if model_obj is None: msg = 'No object found with id %r in collection %r' raise Http404(msg % (_id, collection)) obj = OrderedDict(sorted(model_obj.items())) obj_id = obj['_id'] obj_json = json.dumps(obj, cls=JSONEncoderPlus, indent=4) def subfunc(m, tmpl=' <a name="%s">%s:</a>'): val = m.group(1) return tmpl % (val, val) for k in obj: obj_json = re_attr.sub(subfunc, obj_json) tmpl = '<a href="{0}">{0}</a>' obj_json = re.sub('"(http://.+?)"', lambda m: tmpl.format(*m.groups()), obj_json) if obj['_type'] != 'metadata': mdata = metadata(obj[settings.LEVEL_FIELD]) else: mdata = obj return render( request, 'billy/object_json.html', dict(obj=obj, obj_id=obj_id, obj_json=obj_json, collection=collection, metadata=mdata, model_obj=model_obj))
def upload(abbr, filename): today = datetime.date.today() # build URL s3_bucket = settings.AWS_BUCKET s3_path = '%s-%02d-%02d-%s-csv.zip' % (today.year, today.month, today.day, abbr) s3_url = 'http://%s.s3.amazonaws.com/%s' % (s3_bucket, s3_path) # S3 upload s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET) bucket = s3conn.create_bucket(s3_bucket) k = Key(bucket) k.key = s3_path k.set_contents_from_filename(filename) k.set_acl('public-read') meta = metadata(abbr) meta['latest_csv_url'] = s3_url meta['latest_csv_date'] = datetime.datetime.utcnow() db.metadata.save(meta, safe=True) print('uploaded to %s' % s3_url)