def finish_todo(request, todo_pk): user = request.user value = request.POST['amount'] reason = request.POST['name'] debt_typedate_due = datetime.strptime(request.POST['date_due'], '%m/%d/%Y') bill = Bill(user=user, value=value, reason=reason, debt_typedate_due=debt_typedate_due, is_paid=False) bill.save() tenant = Tenant.objects.get(user=request.user.id) unit_app = tenant.unit todo = ToDoTask.objects.get(pk=todo_pk) todo.is_complete = True todo.save() for tenant in Tenant.objects.filter(unit=unit_app): split = float(value) / (len(Tenant.objects.filter(unit=unit_app)) - 1) has_paid = False if tenant.user == request.user: has_paid = True final_split = Split_Bill(original=bill, user=tenant.user, split=split, has_paid=has_paid) print final_split final_split.save() return redirect('/')
def parse_bill_number(q, congress=None, not_exist_ok=False): m = bill_number_re.match( q.replace(" ", "").replace(".", "").replace("-", "")) if m == None: return None search_type_flag = None if m.group(3) != None: cn = int(m.group(4)) search_type_flag = "bill-with-congress" elif congress != None: try: cn = int(congress) except: cn = CURRENT_CONGRESS search_type_flag = "bill-default-congress" else: cn = CURRENT_CONGRESS search_type_flag = "bill-guessed-congress" try: b = Bill.objects.get(congress=cn, bill_type=BillType.by_slug(m.group(1).lower()), number=int(m.group(2))) b.search_type_flag = search_type_flag return b except Bill.DoesNotExist: if not_exist_ok: # Return a dummy bill indicating that string matched the regex. b = Bill(congress=cn, bill_type=BillType.by_slug(m.group(1).lower()), number=int(m.group(2))) b.search_type_flag = search_type_flag return b return None
def dump_reactions(request): from django.db.models import Count from website.models import Reaction from collections import defaultdict, OrderedDict from website.models import Bill # Get subjects with the most users reacting. reactions = Reaction.objects.values_list("subject").annotate(count=Count('subject')).order_by('-count')[0:100] # Build ouptut. def emojis(subject): counts = defaultdict(lambda : 0) for r in Reaction.objects.filter(subject=subject): for e in (r.reaction or {}).get("emojis", []): counts[e] += 1 return OrderedDict(sorted(counts.items(), key = lambda kv : -kv[1])) ret = [ OrderedDict([ ("subject", r[0]), ("title", Bill.from_congressproject_id(r[0][5:]).title), ("unique_users", r[1]), ("emojis", emojis(r[0])), ]) for r in reactions ] return HttpResponse(json.dumps(ret, indent=2), content_type="application/json")
def fixup_item(item): item = dict(item) if item["my_ratio"] * item["other_ratio"] > .9: item["identical"] = True item["other"] = Bill.from_congressproject_id(item["other"]) item["other_ratio"] *= 100 return item
def export_panel_user_data(request, panel_id, download): import csv, io from django.utils.text import slugify from website.models import UserPosition from bill.models import Bill from events.models import Feed panel = get_object_or_404(Panel, id=panel_id, admins=request.user) buf = io.BytesIO() w = csv.writer(buf) if download == "members": # Download the panel's membership, with one row per member. w.writerow(["id", "email", "joined", "invitation_code", "notes"]) for mbr in PanelMembership.objects.filter( panel=panel).order_by('created').select_related("user"): w.writerow([ mbr.id, mbr.user.email, mbr.created, mbr.invitation_code, mbr.extra.get("notes", ""), ]) elif download == "positions": # Download the positions panel members have taken on legislation, # with one row per member-position. members = dict( PanelMembership.objects.filter(panel=panel).values_list( "user_id", "id")) w.writerow([ "position_id", "member_id", "member_email", "position_created", "bill_id", "bill_title", "bill_link", "likert_score", "reason_text" ]) for upos in UserPosition.objects.filter(user__in=members)\ .order_by('created')\ .select_related("user"): w.writerow([ upos.id, members[upos.user.id], upos.user.email, upos.created, Bill.from_feed(Feed.from_name( upos.subject)).congressproject_id, upos.get_subject_title().encode("utf8"), "https://www.govtrack.us" + upos.get_subject_link(), upos.likert, upos.reason.encode("utf8"), ]) else: return HttpResponse("invalid") ret = HttpResponse(buf.getvalue()) if True: # disable to make debugging easier ret["Content-Type"] = "text/csv" ret["Content-Disposition"] = "attachment;filename=%s_%s.csv" % ( slugify(panel.title), download) else: ret["Content-Type"] = "text/plain" return ret
def dump_reactions(request): from django.db.models import Count from website.models import Reaction from collections import defaultdict, OrderedDict from website.models import Bill # Get subjects with the most users reacting. reactions = Reaction.objects.values_list("subject").annotate( count=Count('subject')).order_by('-count')[0:100] # Build ouptut. def emojis(subject): counts = defaultdict(lambda: 0) for r in Reaction.objects.filter(subject=subject): for e in (r.reaction or {}).get("emojis", []): counts[e] += 1 return OrderedDict(sorted(counts.items(), key=lambda kv: -kv[1])) ret = [ OrderedDict([ ("subject", r[0]), ("title", Bill.from_congressproject_id(r[0][5:]).title), ("unique_users", r[1]), ("emojis", emojis(r[0])), ]) for r in reactions ] return HttpResponse(json.dumps(ret, indent=2), content_type="application/json")
def get_transparency_stats(person, role, stats, congress, startdate, enddate): global transparency_bills if not transparency_bills: transparency_bills = [] for line in open("analysis/transparency-bills.txt"): bill = Bill.from_congressproject_id(re.split("\s", line)[0]) if bill.congress != congress: continue transparency_bills.append(bill) # which bills are in the right chamber? plausible_bills = [] for bill in transparency_bills: if BillType.by_value(bill.bill_type).chamber == RoleType.by_value(role.role_type).congress_chamber: plausible_bills.append(bill) # did person sponsor any of these within this session? sponsored = [] for bill in transparency_bills: if startdate <= bill.introduced_date <= enddate and bill.sponsor == person: sponsored.append(bill) # did person cosponsor any of these within this session? cosponsored = [] for cosp in Cosponsor.objects.filter(person=person, bill__in=transparency_bills, joined__gte=startdate, joined__lte=enddate): cosponsored.append(cosp.bill) stats["transparency-bills"] = { "value": len(sponsored)*3 + len(cosponsored), "sponsored": make_bill_entries(sponsored), "cosponsored": make_bill_entries(cosponsored), "num_bills": len(plausible_bills), "chamber": RoleType.by_value(role.role_type).congress_chamber, }
def build_info(): # feeds about all legislation that we offer the user to subscribe to feeds = [ f for f in Feed.get_simple_feeds() if f.category == "federal-bills" ] # info about bills by status groups = [ ( g[0], # title g[1], # text 1 g[2], # text 2 "/congress/bills/browse?status=" + ",".join(str(s) for s in g[4]) + "&sort=-current_status_date", # link load_bill_status_qs(g[4]).count(), # count in category load_bill_status_qs(g[4]).order_by( '-current_status_date')[0:6], # top 6 in this category ) for g in bill_status_groups ] # legislation coming up dhg_bills = Bill.objects.filter( congress=CURRENT_CONGRESS, docs_house_gov_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=10)).filter( docs_house_gov_postdate__gt=F('current_status_date')) sfs_bills = Bill.objects.filter( congress=CURRENT_CONGRESS, senate_floor_schedule_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=5)).filter( senate_floor_schedule_postdate__gt=F('current_status_date')) coming_up = list( (dhg_bills | sfs_bills).order_by('scheduled_consideration_date')) # top tracked bills top_bills = Feed.objects\ .filter(feedname__startswith='bill:')\ .filter(feedname__regex='^bill:[hs][jcr]?%d-' % CURRENT_CONGRESS) top_bills = top_bills\ .annotate(count=Count('tracked_in_lists'))\ .order_by('-count')\ .values('feedname', 'count')\ [0:25] top_bills = [(Bill.from_feed(Feed.from_name(bf["feedname"])), bf["count"]) for bf in top_bills] return { "feeds": feeds, "total": Bill.objects.filter(congress=CURRENT_CONGRESS).count(), "current_congress": CURRENT_CONGRESS, "current_congress_dates": get_congress_dates(CURRENT_CONGRESS), "groups": groups, "coming_up": coming_up, "top_tracked_bills": top_bills, "subjects": subject_choices(), "BILL_STATUS_INTRO": (BillStatus.introduced, BillStatus.reported), }
def export_panel_user_data(request, panel_id, download): import csv, io from django.utils.text import slugify from website.models import UserPosition from bill.models import Bill from events.models import Feed panel = get_object_or_404(Panel, id=panel_id, admins=request.user) buf = io.StringIO() w = csv.writer(buf) if download == "members": # Download the panel's membership, with one row per member. w.writerow(["id", "email", "joined", "invitation_code", "notes"]) for mbr in PanelMembership.objects.filter(panel=panel).order_by('created').select_related("user"): w.writerow([ str(mbr.id), mbr.user.email, mbr.created.isoformat(), mbr.invitation_code, mbr.extra.get("notes", ""), ]) elif download == "positions": # Download the positions panel members have taken on legislation, # with one row per member-position. members = dict(PanelMembership.objects.filter(panel=panel).values_list("user_id", "id")) w.writerow(["position_id", "member_id", "member_email", "position_created", "bill_id", "bill_title", "bill_link", "likert_score", "reason_text"]) for upos in UserPosition.objects.filter(user__in=members)\ .order_by('created')\ .select_related("user"): w.writerow([ str(upos.id), members[upos.user.id], upos.user.email, upos.created.isoformat(), Bill.from_feed(Feed.from_name(upos.subject)).congressproject_id, upos.get_subject_title(), "https://www.govtrack.us" + upos.get_subject_link(), str(upos.likert), upos.reason, ]) else: return HttpResponse("invalid") ret = HttpResponse(buf.getvalue()) if True: # disable to make debugging easier ret["Content-Type"] = "text/csv" ret["Content-Disposition"] = "attachment;filename=%s_%s.csv" % ( slugify(panel.title), download ) else: ret["Content-Type"] = "text/plain" return ret
def build_info(): # feeds about all legislation that we offer the user to subscribe to feeds = [f for f in Feed.get_simple_feeds() if f.category == "federal-bills"] # info about bills by status groups = [ ( g[0], # title g[1], # text 1 g[2], # text 2 "/congress/bills/browse?status=" + ",".join(str(s) for s in g[4]) + "&sort=-current_status_date", # link load_bill_status_qs(g[4]).count(), # count in category load_bill_status_qs(g[4]).order_by('-current_status_date')[0:6], # top 6 in this category ) for g in bill_status_groups ] # legislation coming up dhg_bills = Bill.objects.filter(congress=CURRENT_CONGRESS, docs_house_gov_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=10)).filter(docs_house_gov_postdate__gt=F('current_status_date')) sfs_bills = Bill.objects.filter(congress=CURRENT_CONGRESS, senate_floor_schedule_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=5)).filter(senate_floor_schedule_postdate__gt=F('current_status_date')) coming_up = list(dhg_bills | sfs_bills) coming_up.sort(key = lambda b : b.docs_house_gov_postdate if (b.docs_house_gov_postdate and (not b.senate_floor_schedule_postdate or b.senate_floor_schedule_postdate < b.docs_house_gov_postdate)) else b.senate_floor_schedule_postdate, reverse=True) # top tracked bills top_bills = Feed.objects\ .filter(feedname__startswith='bill:')\ .filter(feedname__regex='^bill:[hs][jcr]?%d-' % CURRENT_CONGRESS) top_bills = top_bills\ .annotate(count=Count('tracked_in_lists'))\ .order_by('-count')\ .values('feedname', 'count')\ [0:25] top_bills = [(Bill.from_feed(Feed.from_name(bf["feedname"])), bf["count"]) for bf in top_bills] # current congrss years start, end = get_congress_dates(CURRENT_CONGRESS) end_year = end.year if end.month > 1 else end.year-1 # count January finishes as the prev year current_congress_years = '%d-%d' % (start.year, end.year) current_congress = ordinal(CURRENT_CONGRESS) return { "feeds": feeds, "total": Bill.objects.filter(congress=CURRENT_CONGRESS).count(), "current_congress_years": current_congress_years, "current_congress": current_congress, "groups": groups, "coming_up": coming_up, "top_tracked_bills": top_bills, "subjects": subject_choices(), "BILL_STATUS_INTRO": (BillStatus.introduced, BillStatus.referred, BillStatus.reported), }
def add_task(request): user = request.user value = request.POST['amount'] reason = request.POST['name'] debt_typedate_due = datetime.strptime(request.POST['date_due'], '%m/%d/%Y') bill = Bill(user=user, value=value, reason=reason, debt_typedate_due=debt_typedate_due, is_paid=False) bill.save() tenant = Tenant.objects.get(user=request.user.id) unit_app = tenant.unit for tenant in Tenant.objects.filter(unit=unit_app): split = float(value) / (len(Tenant.objects.filter(unit=unit_app)) - 1) has_paid = False if tenant.user == request.user: has_paid = True final_split = Split_Bill(original=bill, user=user, split=split, has_paid=has_paid) final_split.save() return redirect('/')
def build_info(): # feeds about all legislation that we offer the user to subscribe to feeds = [f for f in Feed.get_simple_feeds() if f.category == "federal-bills"] # info about bills by status groups = [ ( g[0], # title g[1], # text 1 g[2], # text 2 "/congress/bills/browse?status=" + ",".join(str(s) for s in g[4]) + "&sort=-current_status_date", # link load_bill_status_qs(g[4]).count(), # count in category load_bill_status_qs(g[4]).order_by('-current_status_date')[0:6], # top 6 in this category ) for g in bill_status_groups ] # legislation coming up dhg_bills = Bill.objects.filter(congress=CURRENT_CONGRESS, docs_house_gov_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=10)).filter(docs_house_gov_postdate__gt=F('current_status_date')) sfs_bills = Bill.objects.filter(congress=CURRENT_CONGRESS, senate_floor_schedule_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=5)).filter(senate_floor_schedule_postdate__gt=F('current_status_date')) coming_up = list((dhg_bills | sfs_bills).order_by('scheduled_consideration_date')) # top tracked bills top_bills = Feed.objects\ .filter(feedname__startswith='bill:')\ .filter(feedname__regex='^bill:[hs][jcr]?%d-' % CURRENT_CONGRESS) top_bills = top_bills\ .annotate(count=Count('tracked_in_lists'))\ .order_by('-count')\ .values('feedname', 'count')\ [0:25] top_bills = [(Bill.from_feed(Feed.from_name(bf["feedname"])), bf["count"]) for bf in top_bills] # trending bills trf = Feed.get_trending_feeds() trf = [Feed.objects.get(id=f) for f in trf] trending_bill_feeds = [f for f in trf if f.feedname.startswith("bill:")] return { "feeds": feeds, "total": Bill.objects.filter(congress=CURRENT_CONGRESS).count(), "current_congress": CURRENT_CONGRESS, "current_congress_dates": get_congress_dates(CURRENT_CONGRESS), "groups": groups, "coming_up": coming_up, "top_tracked_bills": top_bills, "trending_bill_feeds": trending_bill_feeds, "subjects": subject_choices(), "BILL_STATUS_INTRO": (BillStatus.introduced, BillStatus.reported), }
def your_docket(request): from bill.models import Bill # Pre-load the user's subscription lists and for each list # pre-load the list of bills entered into the list. lists = [] if request.user.is_authenticated(): lists = request.user.subscription_lists.all() for lst in lists: lst.bills = [] for trk in lst.trackers.all(): try: lst.bills.append( Bill.from_feed(trk) ) except ValueError: pass return { "lists": lists }
def your_docket(request): from bill.models import Bill # Pre-load the user's subscription lists and for each list # pre-load the list of bills entered into the list. lists = [] if request.user.is_authenticated: lists = request.user.subscription_lists.all() for lst in lists: lst.bills = [] for trk in lst.trackers.all(): try: lst.bills.append(Bill.from_feed(trk)) except ValueError: pass return {"lists": lists}
def show_stats(self, recent_users_only): # get feeds, across all congresses top_bills = Feed.objects\ .filter(feedname__startswith='bill:')\ .filter(feedname__regex='^bill:[hs][jcr]?%d-' % settings.CURRENT_CONGRESS) if recent_users_only: top_bills = top_bills.filter(tracked_in_lists__user__date_joined__gt=datetime.datetime.now()-datetime.timedelta(days=14)) top_bills = top_bills\ .annotate(count=Count('tracked_in_lists'))\ .order_by('-count')\ .values('feedname', 'count')\ [0:25] print "new users \t all users \t sponsor \t url \t bill title" for bf in top_bills: f = Feed.from_name(bf["feedname"]) b = Bill.from_feed(f) print bf["count"], "\t", f.tracked_in_lists.all().count(), "\t", b.sponsor.lastname, b.get_absolute_url(), "\t", b
def show_stats(self, recent_users_only): # get feeds, across all congresses top_bills = Feed.objects\ .filter(feedname__startswith='bill:')\ .filter(feedname__regex='^bill:[hs][jcr]?%d-' % settings.CURRENT_CONGRESS) if recent_users_only: top_bills = top_bills.filter(tracked_in_lists__user__date_joined__gt=datetime.datetime.now()-datetime.timedelta(days=14)) top_bills = top_bills\ .annotate(count=Count('tracked_in_lists'))\ .order_by('-count')\ .values('feedname', 'count')\ [0:25] print("new users \t all users \t sponsor \t url \t bill title") for bf in top_bills: f = Feed.from_name(bf["feedname"]) b = Bill.from_feed(f) print(bf["count"], "\t", f.tracked_in_lists.all().count(), "\t", b.sponsor.lastname.encode("utf8"), b.get_absolute_url(), "\t", b)
def chart_data(self, request, **kwargs): start_date = self.request.query_params.get('start_date', None) end_date = self.request.query_params.get('end_date', None) print(start_date) print(end_date) result = dict() result['sells_per_design'] = Bill.sells_per_design( start_date, end_date) result['sells_per_design_color'] = Bill.sells_per_design_color( start_date, end_date) result['sells_per_bg_color'] = Bill.sells_per_bg_color( start_date, end_date) result['sells_per_f_type'] = Bill.sells_per_f_type( start_date, end_date) result['sells_per_material'] = Bill.sells_per_material( start_date, end_date) result['sells_per_customer_age'] = Bill.profit_per_customer_age( start_date, end_date) result['sells_per_customer_type'] = Bill.profit_per_customer_type( start_date, end_date) return Response(result)
def api_create_bill_view(request): bill_post = Bill(owner_id=request.user) account_user = Account.objects.get(email=request.user) if request.method == 'POST': django_statsd.incr('api.createBill') django_statsd.start('api.createBill.time.taken') serializer = BillSerializer(bill_post, data=request.data) data = {} if serializer.is_valid(): categories_list = serializer.validated_data['categories'] if len(categories_list) != len(set(categories_list)): return Response({'response': "Categories must be unique."}, status=status.HTTP_400_BAD_REQUEST) django_statsd.start('api.createBill.db') bill = serializer.save() django_statsd.stop('api.createBill.db') data['response'] = 'successfully added a new bill.' data['uuid_bill_id'] = bill.uuid_bill_id data['created_ts'] = bill.created_ts data['updated_ts'] = bill.updated_ts data['owner_id'] = account_user.uuid_id data['vendor'] = bill.vendor data['bill_date'] = bill.bill_date data['due_date'] = bill.due_date data['amount_due'] = bill.amount_due data['categories'] = bill.categories data['payment_status'] = bill.payment_status logger.info("POST: Added Bill") django_statsd.stop('api.createBill.time.taken') return Response(data, status=status.HTTP_201_CREATED) logger.error("ERROR: Something Happened: %s", serializer.errors) django_statsd.stop('api.createBill.time.taken') return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
if b2_versioncode != latest_version_code[b2_id]: continue # Does this record represent enough text similarity that it is worth # loading into the database? We'll treat this record as indicating # similarity if... # For exceedingly formulaic bills, we'll only identify identical bills. # Bills naming buildings etc. are formulaic and produce text similarity # to other bills of the same type, because the part that differs is very # small. So we use a very high threshold for comparison. This may not # be needed. I added it after seeing the analysis produce false positives, # but then I discovered that cmp_text_len was not being compared right in # the next block so this may actually not be needed to help. b1_ratio = round(float(b1_ratio), 3) b2_ratio = round(float(b2_ratio), 3) b1 = Bill.from_congressproject_id(b1_id) if b1.title_no_number.startswith("A bill to designate ") \ or b1.title_no_number.startswith("To designate ") \ or b1.title_no_number.startswith("To name ") \ or "Commemorative Coin Act" in b1.title_no_number: if b1_ratio * b2_ratio < .85: continue # For other bills... # a) The bills are nearly identical, i.e. the ratios indicating how # must text of each bill is in the other are both high, and # there is some minimum amount of text in the bills so that we're # sure there is substantative text at all. # b) The bills are substantially similar to each other and the text # in common is large enough to exclude cases where all of the # substance in the bills are in the dis-similar parts.
def do_site_search(q, allow_redirect=False, request=None): if q.strip() == "": return [] results = [] from bill.models import Bill from vote.models import Vote if "pass" in q or "fail" in q or "vote" in q: results.append({ "title": "Tracking Federal Legislation", "href": "/start", "noun": "feeds", "results": [{ "href": f.link, "label": f.title, "obj": f, "feed": f, "secondary": False } for f in ( Bill.EnactedBillsFeed(), Bill.ActiveBillsExceptIntroductionsFeed(), Bill.ComingUpFeed(), Vote.AllVotesFeed(), )] }) from haystack.query import SearchQuerySet from events.models import Feed from person.models import RoleType sqs = SearchQuerySet().using("person")\ .filter( indexed_model_name__in=["Person"], all_role_types__in=(RoleType.representative, RoleType.senator), content=q) if 'XapianEngine' not in settings.HAYSTACK_CONNECTIONS['person']['ENGINE']: # Xapian doesn't provide a 'score' so we can't do this when debugging. sqs = sqs.order_by('-is_currently_serving', '-score') results.append({ "title": "Members of Congress", "href": "/congress/members/all", "qsarg": "name", "noun": "Members of Congress", "results": [{ "href": p.object.get_absolute_url(), "label": p.object.name, "obj": p.object, "feed": p.object.get_feed(), "secondary": p.object.get_current_role() == None } for p in sqs[0:9]] }) import us results.append({ "title": "States", "href": "/congress/members", "noun": "states", "results": sorted([{ "href": "/congress/members/%s" % s, "label": us.statenames[s] } for s in us.statenames if us.statenames[s].lower().startswith(q.lower())], key=lambda p: p["label"]) }) # search committees -- name must contain all of the words in the # search query (e.g. "rules committee" should match "committee on rules") from committee.models import Committee committees_qs = Committee.objects.filter(obsolete=False) for word in q.split(" "): committees_qs = committees_qs.filter(name__icontains=word) results.append({ "title": "Congressional Committees", "href": "/congress/committees", "noun": "committees in Congress", "results": sorted([{ "href": c.get_absolute_url(), "label": c.fullname, "feed": c.get_feed(), "obj": c, "secondary": c.committee != None } for c in committees_qs], key=lambda c: c["label"]) }) from settings import CURRENT_CONGRESS from bill.search import parse_bill_citation bill = parse_bill_citation(q) congress = "__ALL__" if not bill or not allow_redirect: # query Solr w/ the boosted field from haystack.inputs import AutoQuery from haystack.query import SQ q = SearchQuerySet().using("bill").filter(indexed_model_name__in=["Bill"])\ .filter( SQ(text=AutoQuery(q)) | SQ(text_boosted=AutoQuery(q)) ) # restrict to current bills if any (at least 10) bills match q1 = q.filter(congress=CURRENT_CONGRESS) if q1.count() >= 10: q = q1 congress = str(CURRENT_CONGRESS) bills = [\ {"href": b.object.get_absolute_url(), "label": b.object.title, "obj": b.object, "feed": b.object.get_feed() if b.object.is_alive else None, "secondary": b.object.congress != CURRENT_CONGRESS } for b in q[0:9]] else: url = bill.get_absolute_url() if request.GET.get("track"): url += "#track" return HttpResponseRedirect(url) results.append({ "title": "Bills and Resolutions", "href": "/congress/bills/browse", "qsarg": "congress=%s&text" % congress, "noun": "federal bills or resolutions", "results": bills }) # subject terms, but exclude subject terms that look like committee names because # that is confusing to also see with committee results from bill.models import BillTerm, TermType results.append({ "title": "Subject Areas", "href": "/congress/bills", "noun": "subject areas", "results": [{ "href": p.get_absolute_url(), "label": p.name, "obj": p, "feed": p.get_feed(), "secondary": not p.is_top_term() } for p in BillTerm.objects.filter( name__icontains=q, term_type=TermType.new).exclude( name__contains=" Committee on ")[0:9]] }) # in each group, make sure the secondary results are placed last, but otherwise preserve order for grp in results: for i, obj in enumerate(grp["results"]): obj["index"] = i grp["results"].sort( key=lambda o: (o.get("secondary", False), o["index"])) # sort categories first by whether all results are secondary results, then by number of matches (fewest first, if greater than zero) results.sort(key=lambda c: (len([ d for d in c["results"] if d.get("secondary", False) == False ]) == 0, len(c["results"]) == 0, len(c["results"]))) return results
def run(): #check old bills today = datetime.datetime.now().strftime("%Y-%m-%d") date_format = "%Y-%m-%d" for item in Bill.objects.filter(updated=True): start_date = datetime.datetime.strptime(str(today), date_format) end_date = datetime.datetime.strptime(str(item.updated_at), date_format) delta = start_date - end_date if delta.days > 6: item.updated = False client = MongoClient('localhost', 27017) db = client.bill_db_ireland updated_bills_complete = db.updated_bills_complete #open all updated tagged bills from mongodb as dataframe bill_df = pd.DataFrame(list(updated_bills_complete.find())) if bill_df.empty == False: for row in bill_df.itertuples(): if Bill.objects.all().filter(title=row[13]).count() == 1: bill = Bill.objects.get(title=row[13]) bill.save() bill.updated = True bill.save() bill.updated_at = today bill.save() updated_stage = Stage(stage=row[9], stage_info=row[10]) updated_stage.save() bill.stage = updated_stage bill.save() elif Bill.objects.all().filter(title=row[13]).count() == 0: o = Origin(origin=row[7][19:]) #set origin o.save() st = Stage(stage=row[9], stage_info=row[10]) # set stage st.save() sponsor_list = row[8][14:].split(';') try: d = datetime.datetime.strptime( row[5][14:], '%d %b %Y').strftime('%Y-%m-%d') except: d = datetime.datetime.now().strftime("%Y-%m-%d") a = Bill(title=row[13], description=row[6], origin=o, stage=st, bill_history=row[3], date=d, url=row[15]) #add all elements to Bill a.save() for name in sponsor_list: name = name.strip() if Sponsor.objects.all().filter(sponsor=name).count() == 1: sp_existing = Sponsor.objects.get(sponsor=name) sp_existing.save() a.sponsor.add(sp_existing) a.save() else: sp = Sponsor(sponsor=name) sp.save() a.sponsor.add(sp) #add all sponsors to Bill a.save() for i in row[11]: c = Category(category=row[11][i]) if Category.objects.all().filter(category=c).count() == 1: c_existing = Category.objects.get(category=c) c_existing.save() a.category.add(c_existing) a.save() else: c.save() a.category.add(c) #add all categories to Bill a.save() for item in row[2]: if item != '': if item[-1] == ')': item = item[:item[-10:].find('(') - 10 + len(item)].strip() act = AssociatedAct(associated_act=item) if AssociatedAct.objects.all().filter( associated_act=act).count() == 1: act_existing = AssociatedAct.objects.get( associated_act=item) act_existing.save() a.associated_act.add(act_existing) a.save() else: act.save() a.associated_act.add(act) #add all acts to Bill a.save()
# Skip if this record is for an outdated version of either bill. if b1_versioncode != latest_version_code[b1_id]: continue if b2_versioncode != latest_version_code[b2_id]: continue # Does this record represent enough text similarity that it is worth # loading into the database? We'll treat this record as indicating # similarity if... # For exceedingly formulaic bills, we'll only identify identical bills. # Bills naming buildings etc. are formulaic and produce text similarity # to other bills of the same type, because the part that differs is very # small. So we use a very high threshold for comparison. b1_ratio = round(float(b1_ratio),3) b2_ratio = round(float(b2_ratio),3) b1 = Bill.from_congressproject_id(b1_id) if b1.title_no_number.startswith("A bill to designate ") \ or b1.title_no_number.startswith("To designate ") \ or b1.title_no_number.startswith("To name ") \ or "Commemorative Coin Act" in b1.title_no_number: if b1_ratio*b2_ratio < .85: continue # For other bills... # a) The bills are nearly identical, i.e. the ratios indicating how # must text of each bill is in the other are both high, and # there is some minimum amount of text in the bills so that we're # sure there is substantative text at all. # b) The bills are substantially similar to each other and the text # in common is large enough to exclude cases where all of the # substance in the bills are in the dis-similar parts.
def new_stakeholder(request): from bill.models import Bill related_bill = None if request.GET.get('bill'): related_bill = Bill.from_congressproject_id(request.GET['bill']) class NewStakehoderForm(Form): organization_name = CharField() organization_website = URLField(initial="http://") twitter_account = CharField(initial="@", required=False) if related_bill: position = ChoiceField(choices=[(None, '(choose)'), (1, "Support"), (0, "Neutral"), (-1, "Oppose")], required=False, label="Your organization's position on " + related_bill.display_number) position_statement_link = URLField( required=False, label= "Link to webpage or PDF containing a position statement about " + related_bill.display_number + " (optional)") position_statement_content = CharField( required=False, widget=Textarea, label="Paste the text of your position statement about " + related_bill.display_number + " (optional)") if request.method == "GET": form = NewStakehoderForm() else: # POST form = NewStakehoderForm(request.POST) if form.is_valid(): # Create a new un-verified Stakeholder object. stk = Stakeholder() stk.name = form.cleaned_data['organization_name'] stk.website = form.cleaned_data['organization_website'] or None stk.twitter_handle = form.cleaned_data['twitter_account'].lstrip( "@") or None stk.save() # Create a new post. if related_bill and ( form.cleaned_data['position'] != '' or form.cleaned_data['position_statement_link'] or form.cleaned_data['position_statement_content']): post = Post() post.stakeholder = stk if form.cleaned_data[ 'position_statement_link'] or form.cleaned_data[ 'position_statement_content']: post.post_type = 1 # summary post.link = (form.cleaned_data['position_statement_link'] or None) post.content = ( form.cleaned_data['position_statement_content'] or None) else: post.post_type = 0 # positions only post.save() bp = BillPosition() bp.post = post bp.bill = related_bill if form.cleaned_data['position'] != '': bp.position = int(form.cleaned_data['position']) bp.save() # Make this user an admin. stk.admins.add(request.user) # Go view it. return HttpResponseRedirect(stk.get_absolute_url()) return render(request, "stakeholder/new.html", { "form": form, "related_bill": related_bill, })
def main(options): """ Process bill terms and bills """ # Terms term_processor = TermProcessor() terms_parsed = set() # Cache existing terms. There aren't so many. existing_terms = {} for term in BillTerm.objects.all(): existing_terms[(int(term.term_type), term.name)] = term log.info('Processing old bill terms') TERMS_FILE = 'data/us/liv.xml' tree = etree.parse(TERMS_FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] term.subterms.add(subterm) terms_parsed.add(subterm.id) except: try: log.debug("Created %s" % subterm) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) log.info('Processing new bill terms') for FILE in ('data/us/liv111.xml', 'data/us/crsnet.xml'): tree = etree.parse(FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] terms_parsed.add(subterm.id) term.subterms.add(subterm) except: try: log.debug("Created %s" % term) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) for term in existing_terms.values(): if not term.id in terms_parsed: log.debug("Deleted %s" % term) term.delete() # Bills bill_index = None if not options.disable_indexing: from bill.search_indexes import BillIndex bill_index = BillIndex() if options.congress and int(options.congress) <= 42: files = glob.glob('data/congress/%s/bills/*/*/*.xml' % options.congress) log.info('Parsing unitedstates/congress bills of only congress#%s' % options.congress) elif options.congress: files = glob.glob('data/us/%s/bills/*.xml' % options.congress) log.info('Parsing bills of only congress#%s' % options.congress) else: files = glob.glob('data/us/*/bills/*.xml') if options.filter: files = [f for f in files if re.match(options.filter, f)] log.info('Processing bills: %d files' % len(files)) total = len(files) progress = Progress(total=total, name='files', step=100) bill_processor = BillProcessor() seen_bill_ids = [] for fname in files: progress.tick() # With indexing or events enabled, if the bill metadata file hasn't changed check # the bill's latest text file for changes so we can create a text-is-available # event and so we can index the bill's text. if (not options.congress or options.congress > 42) and ( bill_index and not options.disable_events ) and not File.objects.is_changed(fname) and not options.force: m = re.search(r"/(\d+)/bills/([a-z]+)(\d+)\.xml$", fname) try: b = Bill.objects.get(congress=m.group(1), bill_type=BillType.by_xml_code( m.group(2)), number=m.group(3)) seen_bill_ids.append(b.id) # Update the index/events for any bill with recently changed text textfile = get_bill_text_metadata(b, None) if not textfile: if b.congress >= 103 and b.introduced_date < ( datetime.now() - timedelta(days=14)).date(): print "No bill text?", fname, b.introduced_date continue textfile = textfile["text_file"] if os.path.exists(textfile) and File.objects.is_changed( textfile): bill_index.update_object( b, using="bill") # index the full text b.create_events() # events for new bill text documents File.objects.save_file(textfile) continue except Bill.DoesNotExist: print "Unchanged metadata file but bill doesn't exist:", fname pass # just parse as normal if options.slow: time.sleep(1) tree = etree.parse(fname) for node in tree.xpath('/bill'): try: bill = bill_processor.process(Bill(), node) except: print fname raise seen_bill_ids.append(bill.id) # don't delete me later if bill.congress >= 93: bill.source = "thomas-congproj" elif bill.congress >= 82: bill.source = "statutesatlarge" if bill.current_status == BillStatus.enacted_signed: bill.current_status = BillStatus.enacted_unknown elif bill.congress <= 42: bill.source = "americanmemory" else: raise ValueError() # So far this is just for American Memory bills. if node.xpath("string(source/@url)"): bill.source_link = unicode(node.xpath("string(source/@url)")) else: bill.source_link = None actions = [] for axn in tree.xpath("actions/*[@state]"): actions.append(( repr( bill_processor.parse_datetime( axn.xpath("string(@datetime)"))), BillStatus.by_xml_code(axn.xpath("string(@state)")), axn.xpath("string(text)"), etree.tostring(axn), )) bill.sliplawpubpriv = None bill.sliplawnum = None for axn in tree.xpath("actions/enacted"): bill.sliplawpubpriv = "PUB" if axn.get( "type") == "public" else "PRI" bill.sliplawnum = int(axn.get("number").split("-")[1]) bill.major_actions = actions try: bill.save() except: print bill raise if bill_index: bill_index.update_object(bill, using="bill") if not options.disable_events: bill.create_events() File.objects.save_file(fname) # delete bill objects that are no longer represented on disk.... this is too dangerous. if options.congress and not options.filter: # this doesn't work because seen_bill_ids is too big for sqlite! for b in Bill.objects.filter(congress=options.congress).exclude( id__in=seen_bill_ids): print "Bill is no longer on disk: ", b.id, b # The rest is for current only... if options.congress and int(options.congress) != CURRENT_CONGRESS: return # Parse docs.house.gov for what might be coming up this week. import iso8601 dhg_html = urllib.urlopen("http://docs.house.gov/floor/").read() m = re.search(r"class=\"downloadXML\" href=\"(Download.aspx\?file=.*?)\"", dhg_html) if not m: log.error( 'No docs.house.gov download link found at http://docs.house.gov.') else: def bt_re(bt): return re.escape(bt[1]).replace(r"\.", r"\.?\s*") try: dhg = etree.parse( urllib.urlopen("http://docs.house.gov/floor/" + m.group(1))).getroot() except: print "http://docs.house.gov/floor/" + m.group(1) raise # iso8601.parse_date(dhg.get("week-date")+"T00:00:00").date() for item in dhg.xpath("category/floor-items/floor-item"): billname = item.xpath("legis-num")[0].text if billname is None: continue # weird but OK m = re.match( r"\s*(?:Concur in the Senate Amendment to |Senate Amendment to )?(" + "|".join(bt_re(bt) for bt in BillType) + r")(\d+)\s*(\[Conference Report\]\s*)?$", billname, re.I) if not m: if not billname.strip().endswith(" __"): log.error( 'Could not parse legis-num "%s" in docs.house.gov.' % billname) else: for bt in BillType: if re.match(bt_re(bt) + "$", m.group(1), re.I): try: bill = Bill.objects.get(congress=CURRENT_CONGRESS, bill_type=bt[0], number=m.group(2)) bill.docs_house_gov_postdate = iso8601.parse_date( item.get("add-date")).replace(tzinfo=None) bill.save() if bill_index: bill_index.update_object(bill, using="bill") if not options.disable_events: bill.create_events() except Bill.DoesNotExist: log.error( 'Could not find bill "%s" in docs.house.gov.' % billname) break else: log.error( 'Could not parse legis-num bill type "%s" in docs.house.gov.' % m.group(1)) # Parse Senate.gov's "Floor Schedule" blurb for coming up tomorrow. now = datetime.now() sfs = urllib.urlopen( "http://www.senate.gov/pagelayout/legislative/d_three_sections_with_teasers/calendars.htm" ).read() try: sfs = re.search(r"Floor Schedule([\w\W]*)Previous Meeting", sfs).group(1) for congress, bill_type, number in re.findall( r"http://hdl.loc.gov/loc.uscongress/legislation.(\d+)([a-z]+)(\d+)", sfs): bill_type = BillType.by_slug(bill_type) bill = Bill.objects.get(congress=congress, bill_type=bill_type, number=number) if bill.senate_floor_schedule_postdate == None or now - bill.senate_floor_schedule_postdate > timedelta( days=7): bill.senate_floor_schedule_postdate = now bill.save() if bill_index: bill_index.update_object(bill, using="bill") if not options.disable_events: bill.create_events() except Exception as e: log.error('Could not parse Senate Floor Schedule: ' + repr(e))
def lookup_reps(request): from django.contrib.humanize.templatetags.humanize import ordinal from person.name import get_person_name # Get the state and district from the query string. try: state = request.GET['state'] district = int(request.GET['district']) if state not in stateapportionment: raise Exception() except: return { } # Get the bill (optional) from the query string from bill.models import Bill try: bill = Bill.from_congressproject_id(request.GET["bill"]) except: bill = None # Helper to get relevant committee assignments. from committee.models import CommitteeMember, CommitteeMemberRole from committee.util import sort_members def mention_committees_once(committeeassignments): # The committee assignments have been sorted first by role (i.e. # committees that the person is the chair of come first) and then # by committee name (which also puts subcommittees after committees). # In order to be less verbose, only mention each full committee # once --- take the first in each mention. seen = set() for c in committeeassignments: if (c.committee in seen) or (c.committee.committee in seen): continue yield c if c.committee.committee is not None: seen.add(c.committee.committee) # add main committee else: seen.add(c.committee) # add this committee bounds = get_district_bounds(state, district) return { "state": { "name": statenames[state], "isTerritory": stateapportionment[state] == "T", }, "district": { "ordinal": ordinal(district) if district > 0 else "At Large", "bounds": { "center": { "latitude": bounds[0], "longitude": bounds[1] }, "zoom": bounds[2] } }, "members": [ { "id": p.id, "name": get_person_name(p, role_recent=True, firstname_position="before", show_title=True, show_party=False, show_district=False), "name_formal": p.current_role.get_title() + " " + p.lastname, "url": p.get_absolute_url(), "type": p.current_role.get_role_type_display(), "description": p.current_role.get_description(), "party": p.current_role.party, "photo_url": p.get_photo_url_50() if p.has_photo() else None, "contact_url": (p.current_role.extra or {}).get("contact_form") or p.current_role.website, "phone": p.current_role.phone, "website": p.current_role.website, "pronouns": { "him_her": p.him_her, "his_her": p.his_her, "he_she": p.he_she, }, "bill-status": { "cosponsor": p in bill.cosponsors.all(), "committee-assignments": [ { "committee": c.committee.fullname, "role": c.get_role_display() if c.role in (CommitteeMemberRole.chair, CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chair) else None, } for c in mention_committees_once( sort_members( CommitteeMember.objects.filter(person=p, committee__in=bill.committees.all()))) ] } if bill else None, } for p in list(Person.objects.filter(roles__current=True, roles__state=state, roles__role_type=RoleType.senator) .order_by('roles__senator_rank')) + list(Person.objects.filter(roles__current=True, roles__state=state, roles__district=district, roles__role_type=RoleType.representative)) ] }
def build_info(): # feeds about all legislation that we offer the user to subscribe to feeds = [ f for f in Feed.get_simple_feeds() if f.category == "federal-bills" ] # info about bills by status groups = [ ( g[0], # title g[1], # text 1 g[2], # text 2 "/congress/bills/browse?status=" + ",".join(str(s) for s in g[4]) + "&sort=-current_status_date", # link load_bill_status_qs(g[4]).count(), # count in category load_bill_status_qs(g[4]).order_by( '-current_status_date')[0:6], # top 6 in this category ) for g in bill_status_groups ] # legislation coming up dhg_bills = Bill.objects.filter( congress=CURRENT_CONGRESS, docs_house_gov_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=10)).filter( docs_house_gov_postdate__gt=F('current_status_date')) sfs_bills = Bill.objects.filter( congress=CURRENT_CONGRESS, senate_floor_schedule_postdate__gt=datetime.datetime.now() - datetime.timedelta(days=5)).filter( senate_floor_schedule_postdate__gt=F('current_status_date')) coming_up = list(dhg_bills | sfs_bills) coming_up.sort( key=lambda b: b.docs_house_gov_postdate if (b.docs_house_gov_postdate and (not b.senate_floor_schedule_postdate or b. senate_floor_schedule_postdate < b.docs_house_gov_postdate)) else b.senate_floor_schedule_postdate, reverse=True) # top tracked bills top_bills = Feed.objects\ .filter(feedname__startswith='bill:')\ .filter(feedname__regex='^bill:[hs][jcr]?%d-' % CURRENT_CONGRESS) top_bills = top_bills\ .annotate(count=Count('tracked_in_lists'))\ .order_by('-count')\ .values('feedname', 'count')\ [0:25] top_bills = [(Bill.from_feed(Feed.from_name(bf["feedname"])), bf["count"]) for bf in top_bills] # current congrss years start, end = get_congress_dates(CURRENT_CONGRESS) end_year = end.year if end.month > 1 else end.year - 1 # count January finishes as the prev year current_congress_years = '%d-%d' % (start.year, end.year) current_congress = ordinal(CURRENT_CONGRESS) return { "feeds": feeds, "total": Bill.objects.filter(congress=CURRENT_CONGRESS).count(), "current_congress_years": current_congress_years, "current_congress": current_congress, "groups": groups, "coming_up": coming_up, "top_tracked_bills": top_bills, "subjects": subject_choices(), "BILL_STATUS_INTRO": (BillStatus.introduced, BillStatus.referred, BillStatus.reported), }
def new_stakeholder_post(request): from bill.models import Bill related_bill = None if request.GET.get('bill'): related_bill = Bill.from_congressproject_id(request.GET['bill']) data = {} user_admin_of_stakeholders = request.user.stakeholder_set.all() class NewStakehoderForm(Form): if not user_admin_of_stakeholders: organization_name = CharField() organization_website = URLField(initial="http://") twitter_account = CharField(initial="@", required=False) else: organization = ChoiceField(choices=[ (s.id, s.name) for s in user_admin_of_stakeholders ], label="Your organization") if related_bill: position = ChoiceField(choices=[(None, '(choose)'), (1, "Support"), (0, "Neutral"), (-1, "Oppose")], required=True, label="Your organization's position on " + related_bill.display_number) position_statement_link = URLField( required=False, label= "Link to webpage or PDF containing a position statement about " + related_bill.display_number) position_statement_content = CharField( required=True, widget=Textarea, label="Paste the text of your position statement about " + related_bill.display_number) #If post already exists, update it instead of making a new one. Assumes only one of user's accounts has a statement on a given bill. Should be integrated into code below for clarity. Code added by Ben, a bad coder. if user_admin_of_stakeholders: for s in user_admin_of_stakeholders: for p in Post.objects.filter(stakeholder=s): for bp in p.bill_positions.all(): if bp.bill == related_bill: data = { 'organization': (s.id, s.name), 'position': bp.position, 'position_statement_link': p.link, 'position_statement_content': p.content } if request.method == 'POST': form = NewStakehoderForm(request.POST, initial=data) if form.is_valid(): if form.cleaned_data['position'] != '': bp.position = int( form.cleaned_data['position']) p.link = form.cleaned_data[ 'position_statement_link'] p.content = form.cleaned_data[ 'position_statement_content'] bp.save() p.save() return HttpResponseRedirect( s.get_absolute_url()) if request.method == "GET": form = NewStakehoderForm(initial=data) else: # POST form = NewStakehoderForm(request.POST) if form.is_valid(): if not user_admin_of_stakeholders: # Create a new un-verified Stakeholder object. stk = Stakeholder() stk.name = form.cleaned_data['organization_name'] stk.website = form.cleaned_data['organization_website'] or None stk.twitter_handle = form.cleaned_data[ 'twitter_account'].lstrip("@") or None stk.save() # Make this user an admin. stk.admins.add(request.user) else: # Get an existing Stakeholder that they are the admin of. stk = get_object_or_404(Stakeholder, id=form.cleaned_data['organization']) if request.user not in stk.admins.all(): # Invalid. Get out of here. return HttpResponseRedirect(stk.get_absolute_url()) # Create a new post if this page is for a related bill and a position, # link, or statement are provided. if related_bill and ( form.cleaned_data['position'] != '' or form.cleaned_data['position_statement_link'] or form.cleaned_data['position_statement_content']): # Create a new Post object. post = Post() post.stakeholder = stk if form.cleaned_data[ 'position_statement_link'] or form.cleaned_data[ 'position_statement_content']: post.post_type = 1 # summary post.link = (form.cleaned_data['position_statement_link'] or None) post.content = ( form.cleaned_data['position_statement_content'] or None) else: post.post_type = 0 # positions only post.save() # Attach a BillPosition to the Post. bp = BillPosition() bp.post = post bp.bill = related_bill if form.cleaned_data['position'] != '': bp.position = int(form.cleaned_data['position']) bp.save() # Go view it. return HttpResponseRedirect(stk.get_absolute_url()) return render(request, "stakeholder/new.html", { "form": form, "related_bill": related_bill, })
def lookup_reps(request): from django.contrib.humanize.templatetags.humanize import ordinal from person.name import get_person_name # Get the state and district from the query string. try: state = request.GET['state'] district = int(request.GET['district']) if state not in stateapportionment: raise Exception() except: return {} # Get the bill (optional) from the query string from bill.models import Bill try: bill = Bill.from_congressproject_id(request.GET["bill"]) except: bill = None # Helper to get relevant committee assignments. from committee.models import CommitteeMember, CommitteeMemberRole from committee.util import sort_members def mention_committees_once(committeeassignments): # The committee assignments have been sorted first by role (i.e. # committees that the person is the chair of come first) and then # by committee name (which also puts subcommittees after committees). # In order to be less verbose, only mention each full committee # once --- take the first in each mention. seen = set() for c in committeeassignments: if (c.committee in seen) or (c.committee.committee in seen): continue yield c if c.committee.committee is not None: seen.add(c.committee.committee) # add main committee else: seen.add(c.committee) # add this committee bounds = get_district_bounds(state, district) return { "state": { "name": statenames[state], "isTerritory": stateapportionment[state] == "T", }, "district": { "ordinal": ordinal(district) if district > 0 else "At Large", "bounds": { "center": { "latitude": bounds[0], "longitude": bounds[1] }, "zoom": bounds[2] } }, "members": [{ "id": p.id, "name": get_person_name(p, role_recent=True, firstname_position="before", show_title=True, show_party=False, show_district=False), "name_formal": p.current_role.get_title() + " " + p.lastname, "url": p.get_absolute_url(), "type": p.current_role.get_role_type_display(), "description": p.current_role.get_description(), "party": p.current_role.party, "photo_url": p.get_photo_url_50() if p.has_photo() else None, "contact_url": (p.current_role.extra or {}).get("contact_form") or p.current_role.website, "phone": p.current_role.phone, "website": p.current_role.website, "pronouns": { "him_her": p.him_her, "his_her": p.his_her, "he_she": p.he_she, }, "bill-status": { "cosponsor": p in bill.cosponsors.all(), "committee-assignments": [{ "committee": c.committee.fullname, "role": c.get_role_display() if c.role in ( CommitteeMemberRole.chair, CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chair) else None, } for c in mention_committees_once( sort_members( CommitteeMember.objects.filter( person=p, committee__in=bill.committees.all())))] } if bill else None, } for p in list( Person.objects.filter(roles__current=True, roles__state=state, roles__role_type=RoleType.senator).order_by( 'roles__senator_rank')) + list( Person.objects.filter( roles__current=True, roles__state=state, roles__district=district, roles__role_type=RoleType.representative))] }
def main(options): """ Process bill terms and bills """ # Terms term_processor = TermProcessor() terms_parsed = set() # Cache existing terms. There aren't so many. existing_terms = {} for term in BillTerm.objects.all(): existing_terms[(int(term.term_type), term.name)] = term log.info('Processing old bill terms') TERMS_FILE = 'bill/liv.xml' tree = etree.parse(TERMS_FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] term.subterms.add(subterm) terms_parsed.add(subterm.id) except: try: log.debug("Created %s" % subterm) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) log.info('Processing new bill terms') for FILE in ('bill/liv111.xml', 'bill/crsnet.xml'): tree = etree.parse(FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] terms_parsed.add(subterm.id) term.subterms.add(subterm) except: try: log.debug("Created %s" % term) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) for term in existing_terms.values(): if not term.id in terms_parsed: log.debug("Deleted %s" % term) term.delete() # Bills bill_index = None if not options.disable_indexing: from bill.search_indexes import BillIndex bill_index = BillIndex() if options.congress: files = glob.glob(settings.CONGRESS_DATA_PATH + '/%s/bills/*/*/data.xml' % options.congress) log.info('Parsing unitedstates/congress bills of only congress#%s' % options.congress) else: files = glob.glob(settings.CONGRESS_DATA_PATH + '/*/bills/*/*/data.xml') if options.filter: files = [f for f in files if re.match(options.filter, f)] log.info('Processing bills: %d files' % len(files)) total = len(files) progress = Progress(total=total, name='files', step=100) bill_processor = BillProcessor() seen_bill_ids = [] for fname in files: progress.tick() # With indexing or events enabled, if the bill metadata file hasn't changed check # the bill's latest text file for changes so we can create a text-is-available # event and so we can index the bill's text. if (not options.congress or int(options.congress) > 42) and ( bill_index and not options.disable_events ) and not File.objects.is_changed(fname) and not options.force: m = re.match( re.escape(settings.CONGRESS_DATA_PATH) + r'/(?P<congress>\d+)/bills/(?P<bill_type>[a-z]+)/(?P<bill_type_2>[a-z]+)(?P<number>\d+)/data.xml', fname) try: b = Bill.objects.get(congress=int(m.group("congress")), bill_type=BillType.by_slug( m.group("bill_type")), number=m.group("number")) seen_bill_ids.append(b.id) # Update the index/events for any bill with recently changed text textfile = get_bill_text_metadata(b, None) if not textfile: if b.congress >= 103 and b.introduced_date < ( datetime.now() - timedelta(days=14)).date(): print("No bill text?", fname, b.introduced_date) continue textfile = textfile["text_file"] if os.path.exists(textfile) and File.objects.is_changed( textfile): b.update_index(bill_index) # index the full text b.create_events() # events for new bill text documents File.objects.save_file(textfile) continue except Bill.DoesNotExist: print("Unchanged metadata file but bill doesn't exist:", fname) pass # just parse as normal if options.slow: time.sleep(1) tree = etree.parse(fname) for node in tree.xpath('/bill'): try: bill = bill_processor.process(Bill(), node) except: print(fname) raise seen_bill_ids.append(bill.id) # don't delete me later # So far this is just for American Memory bills. if node.xpath("string(source/@url)"): bill.source_link = str(node.xpath("string(source/@url)")) else: bill.source_link = None actions = [] for axn in tree.xpath("actions/*[@state]"): if axn.xpath("string(@state)") == "REFERRED": continue # we don't track this state actions.append(( repr( bill_processor.parse_datetime( axn.xpath("string(@datetime)"))), BillStatus.by_xml_code(axn.xpath("string(@state)")), axn.xpath("string(text)"), etree.tostring(axn, encoding=str), )) bill.sliplawpubpriv = None bill.sliplawnum = None for axn in tree.xpath("actions/enacted"): bill.sliplawpubpriv = "PUB" if axn.get( "type") == "public" else "PRI" bill.sliplawnum = int(axn.get("number").split("-")[1]) bill.major_actions = actions try: bill.save() except: print(bill) raise if bill_index: bill.update_index(bill_index) if not options.disable_events: bill.create_events() File.objects.save_file(fname) # delete bill objects that are no longer represented on disk.... this is too dangerous. if options.congress and not options.filter: # this doesn't work because seen_bill_ids is too big for sqlite! for b in Bill.objects.filter(congress=options.congress).exclude( id__in=seen_bill_ids): print("Bill is no longer on disk: ", b.id, b) # The rest is for current only... if options.congress and int(options.congress) != settings.CURRENT_CONGRESS: return # Find what might be coming up this week. load_docs_house_gov(options, bill_index) load_senate_floor_schedule(options, bill_index)
if b2_versioncode != latest_version_code[b2_id]: continue # Does this record represent enough text similarity that it is worth # loading into the database? We'll treat this record as indicating # similarity if... # For exceedingly formulaic bills, we'll only identify identical bills. # Bills naming buildings etc. are formulaic and produce text similarity # to other bills of the same type, because the part that differs is very # small. So we use a very high threshold for comparison. This may not # be needed. I added it after seeing the analysis produce false positives, # but then I discovered that cmp_text_len was not being compared right in # the next block so this may actually not be needed to help. b1_ratio = round(float(b1_ratio),3) b2_ratio = round(float(b2_ratio),3) b1 = Bill.from_congressproject_id(b1_id) if b1.title_no_number.startswith("A bill to designate ") \ or b1.title_no_number.startswith("To designate ") \ or b1.title_no_number.startswith("To name ") \ or b1.title_no_number.startswith("A bill for the relief of ") \ or "Commemorative Coin Act" in b1.title_no_number: if b1_ratio*b2_ratio < .85: continue # For other bills... if is_text_incorporated(b1_ratio, b2_ratio, cmp_text_len): # Index this information with both bills. # For b2, we're saying that it (or parts of it) were enacted # through these other bills... text_incorporation[b2_id][b1_id] = {
def do_site_search(q, allow_redirect=False): if q.strip() == "": return [] results = [] from bill.models import Bill from vote.models import Vote if "pass" in q or "fail" in q or "vote" in q: results.append({ "title": "Tracking Federal Legislation", "href": "/start", "noun": "feeds", "results": [{ "href": f.link, "label": f.title, "obj": f, "feed": f, "secondary": False } for f in ( Bill.EnactedBillsFeed(), Bill.ActiveBillsExceptIntroductionsFeed(), Bill.ComingUpFeed(), Vote.AllVotesFeed(), )] }) from haystack.query import SearchQuerySet from events.models import Feed results.append({ "title": "Members of Congress, Presidents, and Vice Presidents", "href": "/congress/members/all", "qsarg": "name", "noun": "Members of Congress, Presidents, or Vice Presidents", "results": [{ "href": p.object.get_absolute_url(), "label": p.object.name, "obj": p.object, "feed": p.object.get_feed(), "secondary": p.object.get_current_role() == None } for p in SearchQuerySet().using("person").filter( indexed_model_name__in=["Person"], content=q).order_by( '-is_currently_serving', '-score')[0:9]] }) # Skipping states for now because we might want to go to the district maps or to # the state's main page for state legislative information. #import us #results.append(("States", "/congress/members", "most_recent_role_state", "states", # sorted([{"href": "/congress/members/%s" % s, "label": us.statenames[s] } # for s in us.statenames # if us.statenames[s].lower().startswith(q.lower()) # ], key=lambda p : p["label"]))) from committee.models import Committee results.append({ "title": "Congressional Committees", "href": "/congress/committees", "noun": "committees in Congress", "results": sorted([{ "href": c.get_absolute_url(), "label": c.fullname, "feed": c.get_feed(), "obj": c, "secondary": c.committee != None } for c in Committee.objects.filter(name__icontains=q, obsolete=False) ], key=lambda c: c["label"]) }) from settings import CURRENT_CONGRESS from bill.search import parse_bill_citation bill = parse_bill_citation(q) if not bill or not allow_redirect: from haystack.inputs import AutoQuery bills = [\ {"href": b.object.get_absolute_url(), "label": b.object.title, "obj": b.object, "feed": b.object.get_feed() if b.object.is_alive else None, "secondary": b.object.congress != CURRENT_CONGRESS } for b in SearchQuerySet().using("bill").filter(indexed_model_name__in=["Bill"], content=AutoQuery(q)).order_by('-current_status_date')[0:9]] else: #bills = [{"href": bill.get_absolute_url(), "label": bill.title, "obj": bill, "secondary": bill.congress != CURRENT_CONGRESS }] return HttpResponseRedirect(bill.get_absolute_url()) results.append({ "title": "Bills and Resolutions (Federal)", "href": "/congress/bills/browse", "qsarg": "congress=__ALL__&text", "noun": "federal bills or resolutions", "results": bills }) if "states" in settings.HAYSTACK_CONNECTIONS: results.append({ "title": "State Legislation", "href": "/states/bills/browse", "qsarg": "text", "noun": "state legislation", "results": [{ "href": p.object.get_absolute_url(), "label": p.object.short_display_title, "obj": p.object, "feed": Feed(feedname="states_bill:%d" % p.object.id), "secondary": True } for p in SearchQuerySet().using('states').filter( indexed_model_name__in=["StateBill"], content=q)[0:9]] }) # subject terms, but exclude subject terms that look like committee names because # that is confusing to also see with committee results from bill.models import BillTerm, TermType results.append({ "title": "Subject Areas (Federal Legislation)", "href": "/congress/bills", "noun": "subject areas", "results": [{ "href": p.get_absolute_url(), "label": p.name, "obj": p, "feed": p.get_feed(), "secondary": not p.is_top_term() } for p in BillTerm.objects.filter( name__icontains=q, term_type=TermType.new).exclude( name__contains=" Committee on ")[0:9]] }) # in each group, make sure the secondary results are placed last, but otherwise preserve order for grp in results: for i, obj in enumerate(grp["results"]): obj["index"] = i grp["results"].sort( key=lambda o: (o.get("secondary", False), o["index"])) # sort categories first by whether all results are secondary results, then by number of matches (fewest first, if greater than zero) results.sort(key=lambda c: (len([ d for d in c["results"] if d.get("secondary", False) == False ]) == 0, len(c["results"]) == 0, len(c["results"]))) return results
def run(): client = MongoClient( 'mongodb://*****:*****@localhost:27017/bill_db_ireland?authSource=bill_db_ireland' ) db = client.bill_db_ireland bill_collection = db.bill_collection final_tagged_bills = db.final_tagged_bills bill_df = pd.DataFrame(list(final_tagged_bills.find())) #open all tagged bills from csv for row in bill_df.itertuples(): o = Origin(origin=row[7][19:]) #set origin o.save() st = Stage(stage=row[9], stage_info=row[10]) # set stage st.save() sponsor_list = row[8][14:].split(';') try: d = datetime.strptime(row[5][14:], '%d %b %Y').strftime('%Y-%m-%d') except: d = datetime.today().strftime('%Y-%m-%d') a = Bill(title=row[13], description=row[6], origin=o, stage=st, bill_history=row[3], date=d, url=row[15]) #add all elements to Bill a.save() for name in sponsor_list: name = name.strip() if Sponsor.objects.all().filter(sponsor=name).count() == 1: sp_existing = Sponsor.objects.get(sponsor=name) sp_existing.save() a.sponsor.add(sp_existing) a.save() else: sp = Sponsor(sponsor=name) sp.save() a.sponsor.add(sp) #add all sponsors to Bill a.save() for i in row[11]: c = Category(category=row[11][i]) if Category.objects.all().filter(category=c).count() == 1: c_existing = Category.objects.get(category=c) c_existing.save() a.category.add(c_existing) a.save() else: c.save() a.category.add(c) #add all categories to Bill a.save() for item in row[2]: if item != '': if item[-1] == ')': item = item[:item[-10:].find('(') - 10 + len(item)].strip() act = AssociatedAct(associated_act=item) if AssociatedAct.objects.all().filter( associated_act=act).count() == 1: act_existing = AssociatedAct.objects.get( associated_act=item) act_existing.save() a.associated_act.add(act_existing) a.save() else: act.save() a.associated_act.add(act) #add all acts to Bill a.save()