def parse_bill_number(q, congress=None, not_exist_ok=False): m = bill_number_re.match( q.replace(" ", "").replace(".", "").replace("-", "")) if m == None: return None search_type_flag = None if m.group(3) != None: cn = int(m.group(4)) search_type_flag = "bill-with-congress" elif congress != None: try: cn = int(congress) except: cn = CURRENT_CONGRESS search_type_flag = "bill-default-congress" else: cn = CURRENT_CONGRESS search_type_flag = "bill-guessed-congress" try: b = Bill.objects.get(congress=cn, bill_type=BillType.by_slug(m.group(1).lower()), number=int(m.group(2))) b.search_type_flag = search_type_flag return b except Bill.DoesNotExist: if not_exist_ok: # Return a dummy bill indicating that string matched the regex. b = Bill(congress=cn, bill_type=BillType.by_slug(m.group(1).lower()), number=int(m.group(2))) b.search_type_flag = search_type_flag return b return None
def finish_todo(request, todo_pk): user = request.user value = request.POST['amount'] reason = request.POST['name'] debt_typedate_due = datetime.strptime(request.POST['date_due'], '%m/%d/%Y') bill = Bill(user=user, value=value, reason=reason, debt_typedate_due=debt_typedate_due, is_paid=False) bill.save() tenant = Tenant.objects.get(user=request.user.id) unit_app = tenant.unit todo = ToDoTask.objects.get(pk=todo_pk) todo.is_complete = True todo.save() for tenant in Tenant.objects.filter(unit=unit_app): split = float(value) / (len(Tenant.objects.filter(unit=unit_app)) - 1) has_paid = False if tenant.user == request.user: has_paid = True final_split = Split_Bill(original=bill, user=tenant.user, split=split, has_paid=has_paid) print final_split final_split.save() return redirect('/')
def add_task(request): user = request.user value = request.POST['amount'] reason = request.POST['name'] debt_typedate_due = datetime.strptime(request.POST['date_due'], '%m/%d/%Y') bill = Bill(user=user, value=value, reason=reason, debt_typedate_due=debt_typedate_due, is_paid=False) bill.save() tenant = Tenant.objects.get(user=request.user.id) unit_app = tenant.unit for tenant in Tenant.objects.filter(unit=unit_app): split = float(value) / (len(Tenant.objects.filter(unit=unit_app)) - 1) has_paid = False if tenant.user == request.user: has_paid = True final_split = Split_Bill(original=bill, user=user, split=split, has_paid=has_paid) final_split.save() return redirect('/')
def api_create_bill_view(request): bill_post = Bill(owner_id=request.user) account_user = Account.objects.get(email=request.user) if request.method == 'POST': django_statsd.incr('api.createBill') django_statsd.start('api.createBill.time.taken') serializer = BillSerializer(bill_post, data=request.data) data = {} if serializer.is_valid(): categories_list = serializer.validated_data['categories'] if len(categories_list) != len(set(categories_list)): return Response({'response': "Categories must be unique."}, status=status.HTTP_400_BAD_REQUEST) django_statsd.start('api.createBill.db') bill = serializer.save() django_statsd.stop('api.createBill.db') data['response'] = 'successfully added a new bill.' data['uuid_bill_id'] = bill.uuid_bill_id data['created_ts'] = bill.created_ts data['updated_ts'] = bill.updated_ts data['owner_id'] = account_user.uuid_id data['vendor'] = bill.vendor data['bill_date'] = bill.bill_date data['due_date'] = bill.due_date data['amount_due'] = bill.amount_due data['categories'] = bill.categories data['payment_status'] = bill.payment_status logger.info("POST: Added Bill") django_statsd.stop('api.createBill.time.taken') return Response(data, status=status.HTTP_201_CREATED) logger.error("ERROR: Something Happened: %s", serializer.errors) django_statsd.stop('api.createBill.time.taken') return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def main(options): """ Process bill terms and bills """ # Terms term_processor = TermProcessor() terms_parsed = set() # Cache existing terms. There aren't so many. existing_terms = {} for term in BillTerm.objects.all(): existing_terms[(int(term.term_type), term.name)] = term log.info('Processing old bill terms') TERMS_FILE = 'bill/liv.xml' tree = etree.parse(TERMS_FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] term.subterms.add(subterm) terms_parsed.add(subterm.id) except: try: log.debug("Created %s" % subterm) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) log.info('Processing new bill terms') for FILE in ('bill/liv111.xml', 'bill/crsnet.xml'): tree = etree.parse(FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] terms_parsed.add(subterm.id) term.subterms.add(subterm) except: try: log.debug("Created %s" % term) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) for term in existing_terms.values(): if not term.id in terms_parsed: log.debug("Deleted %s" % term) term.delete() # Bills bill_index = None if not options.disable_indexing: from bill.search_indexes import BillIndex bill_index = BillIndex() if options.congress: files = glob.glob(settings.CONGRESS_DATA_PATH + '/%s/bills/*/*/data.xml' % options.congress) log.info('Parsing unitedstates/congress bills of only congress#%s' % options.congress) else: files = glob.glob(settings.CONGRESS_DATA_PATH + '/*/bills/*/*/data.xml') if options.filter: files = [f for f in files if re.match(options.filter, f)] log.info('Processing bills: %d files' % len(files)) total = len(files) progress = Progress(total=total, name='files', step=100) bill_processor = BillProcessor() seen_bill_ids = [] for fname in files: progress.tick() # With indexing or events enabled, if the bill metadata file hasn't changed check # the bill's latest text file for changes so we can create a text-is-available # event and so we can index the bill's text. if (not options.congress or int(options.congress) > 42) and ( bill_index and not options.disable_events ) and not File.objects.is_changed(fname) and not options.force: m = re.match( re.escape(settings.CONGRESS_DATA_PATH) + r'/(?P<congress>\d+)/bills/(?P<bill_type>[a-z]+)/(?P<bill_type_2>[a-z]+)(?P<number>\d+)/data.xml', fname) try: b = Bill.objects.get(congress=int(m.group("congress")), bill_type=BillType.by_slug( m.group("bill_type")), number=m.group("number")) seen_bill_ids.append(b.id) # Update the index/events for any bill with recently changed text textfile = get_bill_text_metadata(b, None) if not textfile: if b.congress >= 103 and b.introduced_date < ( datetime.now() - timedelta(days=14)).date(): print("No bill text?", fname, b.introduced_date) continue textfile = textfile["text_file"] if os.path.exists(textfile) and File.objects.is_changed( textfile): b.update_index(bill_index) # index the full text b.create_events() # events for new bill text documents File.objects.save_file(textfile) continue except Bill.DoesNotExist: print("Unchanged metadata file but bill doesn't exist:", fname) pass # just parse as normal if options.slow: time.sleep(1) tree = etree.parse(fname) for node in tree.xpath('/bill'): try: bill = bill_processor.process(Bill(), node) except: print(fname) raise seen_bill_ids.append(bill.id) # don't delete me later # So far this is just for American Memory bills. if node.xpath("string(source/@url)"): bill.source_link = str(node.xpath("string(source/@url)")) else: bill.source_link = None actions = [] for axn in tree.xpath("actions/*[@state]"): if axn.xpath("string(@state)") == "REFERRED": continue # we don't track this state actions.append(( repr( bill_processor.parse_datetime( axn.xpath("string(@datetime)"))), BillStatus.by_xml_code(axn.xpath("string(@state)")), axn.xpath("string(text)"), etree.tostring(axn, encoding=str), )) bill.sliplawpubpriv = None bill.sliplawnum = None for axn in tree.xpath("actions/enacted"): bill.sliplawpubpriv = "PUB" if axn.get( "type") == "public" else "PRI" bill.sliplawnum = int(axn.get("number").split("-")[1]) bill.major_actions = actions try: bill.save() except: print(bill) raise if bill_index: bill.update_index(bill_index) if not options.disable_events: bill.create_events() File.objects.save_file(fname) # delete bill objects that are no longer represented on disk.... this is too dangerous. if options.congress and not options.filter: # this doesn't work because seen_bill_ids is too big for sqlite! for b in Bill.objects.filter(congress=options.congress).exclude( id__in=seen_bill_ids): print("Bill is no longer on disk: ", b.id, b) # The rest is for current only... if options.congress and int(options.congress) != settings.CURRENT_CONGRESS: return # Find what might be coming up this week. load_docs_house_gov(options, bill_index) load_senate_floor_schedule(options, bill_index)
def main(options): """ Process bill terms and bills """ # Terms term_processor = TermProcessor() terms_parsed = set() # Cache existing terms. There aren't so many. existing_terms = {} for term in BillTerm.objects.all(): existing_terms[(int(term.term_type), term.name)] = term log.info('Processing old bill terms') TERMS_FILE = 'data/us/liv.xml' tree = etree.parse(TERMS_FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.old try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] term.subterms.add(subterm) terms_parsed.add(subterm.id) except: try: log.debug("Created %s" % subterm) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) log.info('Processing new bill terms') for FILE in ('data/us/liv111.xml', 'data/us/crsnet.xml'): tree = etree.parse(FILE) for node in tree.xpath('/liv/top-term'): term = term_processor.process(BillTerm(), node) term.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. term = existing_terms[(int(term.term_type), term.name)] terms_parsed.add(term.id) except: log.debug("Created %s" % term) term.save() term.subterms.clear() for subnode in node.xpath('./term'): subterm = term_processor.process(BillTerm(), subnode) subterm.term_type = TermType.new try: # No need to update an existing term because there are no other attributes. subterm = existing_terms[(int(subterm.term_type), subterm.name)] terms_parsed.add(subterm.id) term.subterms.add(subterm) except: try: log.debug("Created %s" % term) subterm.save() term.subterms.add(subterm) existing_terms[(int(subterm.term_type), subterm.name)] = subterm terms_parsed.add(subterm.id) except IntegrityError: log.error('Duplicated term %s' % term_processor.display_node(subnode)) for term in existing_terms.values(): if not term.id in terms_parsed: log.debug("Deleted %s" % term) term.delete() # Bills bill_index = None if not options.disable_indexing: from bill.search_indexes import BillIndex bill_index = BillIndex() if options.congress and int(options.congress) <= 42: files = glob.glob('data/congress/%s/bills/*/*/*.xml' % options.congress) log.info('Parsing unitedstates/congress bills of only congress#%s' % options.congress) elif options.congress: files = glob.glob('data/us/%s/bills/*.xml' % options.congress) log.info('Parsing bills of only congress#%s' % options.congress) else: files = glob.glob('data/us/*/bills/*.xml') if options.filter: files = [f for f in files if re.match(options.filter, f)] log.info('Processing bills: %d files' % len(files)) total = len(files) progress = Progress(total=total, name='files', step=100) bill_processor = BillProcessor() seen_bill_ids = [] for fname in files: progress.tick() # With indexing or events enabled, if the bill metadata file hasn't changed check # the bill's latest text file for changes so we can create a text-is-available # event and so we can index the bill's text. if (not options.congress or options.congress > 42) and ( bill_index and not options.disable_events ) and not File.objects.is_changed(fname) and not options.force: m = re.search(r"/(\d+)/bills/([a-z]+)(\d+)\.xml$", fname) try: b = Bill.objects.get(congress=m.group(1), bill_type=BillType.by_xml_code( m.group(2)), number=m.group(3)) seen_bill_ids.append(b.id) # Update the index/events for any bill with recently changed text textfile = get_bill_text_metadata(b, None) if not textfile: if b.congress >= 103 and b.introduced_date < ( datetime.now() - timedelta(days=14)).date(): print "No bill text?", fname, b.introduced_date continue textfile = textfile["text_file"] if os.path.exists(textfile) and File.objects.is_changed( textfile): bill_index.update_object( b, using="bill") # index the full text b.create_events() # events for new bill text documents File.objects.save_file(textfile) continue except Bill.DoesNotExist: print "Unchanged metadata file but bill doesn't exist:", fname pass # just parse as normal if options.slow: time.sleep(1) tree = etree.parse(fname) for node in tree.xpath('/bill'): try: bill = bill_processor.process(Bill(), node) except: print fname raise seen_bill_ids.append(bill.id) # don't delete me later if bill.congress >= 93: bill.source = "thomas-congproj" elif bill.congress >= 82: bill.source = "statutesatlarge" if bill.current_status == BillStatus.enacted_signed: bill.current_status = BillStatus.enacted_unknown elif bill.congress <= 42: bill.source = "americanmemory" else: raise ValueError() # So far this is just for American Memory bills. if node.xpath("string(source/@url)"): bill.source_link = unicode(node.xpath("string(source/@url)")) else: bill.source_link = None actions = [] for axn in tree.xpath("actions/*[@state]"): actions.append(( repr( bill_processor.parse_datetime( axn.xpath("string(@datetime)"))), BillStatus.by_xml_code(axn.xpath("string(@state)")), axn.xpath("string(text)"), etree.tostring(axn), )) bill.sliplawpubpriv = None bill.sliplawnum = None for axn in tree.xpath("actions/enacted"): bill.sliplawpubpriv = "PUB" if axn.get( "type") == "public" else "PRI" bill.sliplawnum = int(axn.get("number").split("-")[1]) bill.major_actions = actions try: bill.save() except: print bill raise if bill_index: bill_index.update_object(bill, using="bill") if not options.disable_events: bill.create_events() File.objects.save_file(fname) # delete bill objects that are no longer represented on disk.... this is too dangerous. if options.congress and not options.filter: # this doesn't work because seen_bill_ids is too big for sqlite! for b in Bill.objects.filter(congress=options.congress).exclude( id__in=seen_bill_ids): print "Bill is no longer on disk: ", b.id, b # The rest is for current only... if options.congress and int(options.congress) != CURRENT_CONGRESS: return # Parse docs.house.gov for what might be coming up this week. import iso8601 dhg_html = urllib.urlopen("http://docs.house.gov/floor/").read() m = re.search(r"class=\"downloadXML\" href=\"(Download.aspx\?file=.*?)\"", dhg_html) if not m: log.error( 'No docs.house.gov download link found at http://docs.house.gov.') else: def bt_re(bt): return re.escape(bt[1]).replace(r"\.", r"\.?\s*") try: dhg = etree.parse( urllib.urlopen("http://docs.house.gov/floor/" + m.group(1))).getroot() except: print "http://docs.house.gov/floor/" + m.group(1) raise # iso8601.parse_date(dhg.get("week-date")+"T00:00:00").date() for item in dhg.xpath("category/floor-items/floor-item"): billname = item.xpath("legis-num")[0].text if billname is None: continue # weird but OK m = re.match( r"\s*(?:Concur in the Senate Amendment to |Senate Amendment to )?(" + "|".join(bt_re(bt) for bt in BillType) + r")(\d+)\s*(\[Conference Report\]\s*)?$", billname, re.I) if not m: if not billname.strip().endswith(" __"): log.error( 'Could not parse legis-num "%s" in docs.house.gov.' % billname) else: for bt in BillType: if re.match(bt_re(bt) + "$", m.group(1), re.I): try: bill = Bill.objects.get(congress=CURRENT_CONGRESS, bill_type=bt[0], number=m.group(2)) bill.docs_house_gov_postdate = iso8601.parse_date( item.get("add-date")).replace(tzinfo=None) bill.save() if bill_index: bill_index.update_object(bill, using="bill") if not options.disable_events: bill.create_events() except Bill.DoesNotExist: log.error( 'Could not find bill "%s" in docs.house.gov.' % billname) break else: log.error( 'Could not parse legis-num bill type "%s" in docs.house.gov.' % m.group(1)) # Parse Senate.gov's "Floor Schedule" blurb for coming up tomorrow. now = datetime.now() sfs = urllib.urlopen( "http://www.senate.gov/pagelayout/legislative/d_three_sections_with_teasers/calendars.htm" ).read() try: sfs = re.search(r"Floor Schedule([\w\W]*)Previous Meeting", sfs).group(1) for congress, bill_type, number in re.findall( r"http://hdl.loc.gov/loc.uscongress/legislation.(\d+)([a-z]+)(\d+)", sfs): bill_type = BillType.by_slug(bill_type) bill = Bill.objects.get(congress=congress, bill_type=bill_type, number=number) if bill.senate_floor_schedule_postdate == None or now - bill.senate_floor_schedule_postdate > timedelta( days=7): bill.senate_floor_schedule_postdate = now bill.save() if bill_index: bill_index.update_object(bill, using="bill") if not options.disable_events: bill.create_events() except Exception as e: log.error('Could not parse Senate Floor Schedule: ' + repr(e))
def run(): client = MongoClient( 'mongodb://*****:*****@localhost:27017/bill_db_ireland?authSource=bill_db_ireland' ) db = client.bill_db_ireland bill_collection = db.bill_collection final_tagged_bills = db.final_tagged_bills bill_df = pd.DataFrame(list(final_tagged_bills.find())) #open all tagged bills from csv for row in bill_df.itertuples(): o = Origin(origin=row[7][19:]) #set origin o.save() st = Stage(stage=row[9], stage_info=row[10]) # set stage st.save() sponsor_list = row[8][14:].split(';') try: d = datetime.strptime(row[5][14:], '%d %b %Y').strftime('%Y-%m-%d') except: d = datetime.today().strftime('%Y-%m-%d') a = Bill(title=row[13], description=row[6], origin=o, stage=st, bill_history=row[3], date=d, url=row[15]) #add all elements to Bill a.save() for name in sponsor_list: name = name.strip() if Sponsor.objects.all().filter(sponsor=name).count() == 1: sp_existing = Sponsor.objects.get(sponsor=name) sp_existing.save() a.sponsor.add(sp_existing) a.save() else: sp = Sponsor(sponsor=name) sp.save() a.sponsor.add(sp) #add all sponsors to Bill a.save() for i in row[11]: c = Category(category=row[11][i]) if Category.objects.all().filter(category=c).count() == 1: c_existing = Category.objects.get(category=c) c_existing.save() a.category.add(c_existing) a.save() else: c.save() a.category.add(c) #add all categories to Bill a.save() for item in row[2]: if item != '': if item[-1] == ')': item = item[:item[-10:].find('(') - 10 + len(item)].strip() act = AssociatedAct(associated_act=item) if AssociatedAct.objects.all().filter( associated_act=act).count() == 1: act_existing = AssociatedAct.objects.get( associated_act=item) act_existing.save() a.associated_act.add(act_existing) a.save() else: act.save() a.associated_act.add(act) #add all acts to Bill a.save()
def run(): #check old bills today = datetime.datetime.now().strftime("%Y-%m-%d") date_format = "%Y-%m-%d" for item in Bill.objects.filter(updated=True): start_date = datetime.datetime.strptime(str(today), date_format) end_date = datetime.datetime.strptime(str(item.updated_at), date_format) delta = start_date - end_date if delta.days > 6: item.updated = False client = MongoClient('localhost', 27017) db = client.bill_db_ireland updated_bills_complete = db.updated_bills_complete #open all updated tagged bills from mongodb as dataframe bill_df = pd.DataFrame(list(updated_bills_complete.find())) if bill_df.empty == False: for row in bill_df.itertuples(): if Bill.objects.all().filter(title=row[13]).count() == 1: bill = Bill.objects.get(title=row[13]) bill.save() bill.updated = True bill.save() bill.updated_at = today bill.save() updated_stage = Stage(stage=row[9], stage_info=row[10]) updated_stage.save() bill.stage = updated_stage bill.save() elif Bill.objects.all().filter(title=row[13]).count() == 0: o = Origin(origin=row[7][19:]) #set origin o.save() st = Stage(stage=row[9], stage_info=row[10]) # set stage st.save() sponsor_list = row[8][14:].split(';') try: d = datetime.datetime.strptime( row[5][14:], '%d %b %Y').strftime('%Y-%m-%d') except: d = datetime.datetime.now().strftime("%Y-%m-%d") a = Bill(title=row[13], description=row[6], origin=o, stage=st, bill_history=row[3], date=d, url=row[15]) #add all elements to Bill a.save() for name in sponsor_list: name = name.strip() if Sponsor.objects.all().filter(sponsor=name).count() == 1: sp_existing = Sponsor.objects.get(sponsor=name) sp_existing.save() a.sponsor.add(sp_existing) a.save() else: sp = Sponsor(sponsor=name) sp.save() a.sponsor.add(sp) #add all sponsors to Bill a.save() for i in row[11]: c = Category(category=row[11][i]) if Category.objects.all().filter(category=c).count() == 1: c_existing = Category.objects.get(category=c) c_existing.save() a.category.add(c_existing) a.save() else: c.save() a.category.add(c) #add all categories to Bill a.save() for item in row[2]: if item != '': if item[-1] == ')': item = item[:item[-10:].find('(') - 10 + len(item)].strip() act = AssociatedAct(associated_act=item) if AssociatedAct.objects.all().filter( associated_act=act).count() == 1: act_existing = AssociatedAct.objects.get( associated_act=item) act_existing.save() a.associated_act.add(act_existing) a.save() else: act.save() a.associated_act.add(act) #add all acts to Bill a.save()