def find_current_bills(): """ Update status of most recent set of bills from http://pmg.org.za/billsstatus/proceedings, via the csv at /data/current_status.csv """ data = [] with open("../data/current_status.csv", 'Ur') as f: reader = csv.reader(f) headers = reader.next() for i, row in enumerate(reader): entry = row # fix bill types if entry[0].startswith("PM"): entry[0] = "PMB" + entry[0][2::] elif not entry[0].startswith("B"): entry[0] = "B" + entry[0] tmp_code = entry[0] tmp_status = entry[1].lower() # clean bill code tmp = analyze_bill_code(tmp_code) code = tmp["code"] logger.info(code + " " + str(entry)) bill = Bill.query.filter(Bill.code==code).first() available_status = { "act": "enacted", "": None, "pc": "na", "sc": "ncop", "intro": "na", } if available_status.get(tmp_status): tmp_status = available_status[tmp_status] bill.status = tmp_status db.session.add(bill) db.session.commit() return
def version_state(self, fragment): """ Extract available versions from second row. """ link = fragment.find("a") # test whether the row contains a link to a bill version if link and not ("bills.pmg.org.za" in link["href"] or "Bill Tracker" in link.text): versions = self.current_bill.setdefault("versions", []) url = link["href"] if not self.current_bill.get("code"): tmp = link.text info = scrapertools.analyze_bill_code(tmp) if info: self.current_bill = dict(self.current_bill.items() + info.items()) else: logger.error("No bill found in string: " + tmp) try: version = { "url": link["href"], "title": link.text, "date": date_parser.parse(fragment.findAll("td")[1].text).date(), "entry_type": "bill-version", } except Exception as e: logger.debug(str(fragment)) raise # set entry_type appropriately if this bill has already been enacted if "as enacted" in link.text: version['entry_type'] = "act" versions.append(version) self.state_fn = self.version_state return True else: self.state_fn = self.header_state return False
def handle_assent(): """ Add entries relating to a bill's assent from http://pmg.org.za/billsstatus/proceedings, via the csv at /data/bill_assent_dates.csv """ with open("../data/bill_assent_dates.csv", 'Ur') as f: data = list(list(rec) for rec in csv.reader(f, delimiter=',')) president = Agent.query.filter(Agent.name == "The President").first() for i in range(len(data)): # ignore column title row if i==0: continue entry = data[i] # fix bill types if entry[0].startswith("PM"): entry[0] = "PMB" + entry[0][2::] elif not entry[0].startswith("B"): entry[0] = "B" + entry[0] tmp_code = entry[0] # clean bill code tmp = analyze_bill_code(tmp_code) if tmp: code = tmp["code"] else: logger.error("Error analyzing bill code " + tmp_code) continue logger.info(code + " " + str(entry)) bill = Bill.query.filter(Bill.code==code).first() if bill is None: logger.error("Error finding bill " + code) continue try: act_no = unicode(entry[1]) assent_date = unicode(entry[2]) # convert date to python date object try: assent_date = date_parser.parse(assent_date).date() except Exception: logger.error("Error parsing date " + entry[2]) continue if entry[3] and len(entry[3]) > 2: gazette = unicode(entry[3]) except UnicodeDecodeError: logger.error("Unicode error: " + str(entry)) continue # update bill record bill.status = "enacted" if gazette: bill.gazette = gazette db.session.add(bill) # add relevant entry in bill history tmp_entry = Entry.query.join(Entry.bills).filter(Bill.code==code).filter(Entry.type=="assent").first() if not tmp_entry: tmp_entry = Entry() tmp_entry.bills.append(bill) tmp_entry.date = assent_date tmp_entry.type = "assent" tmp_entry.location = 3 tmp_entry.title = "Signed into law by the President." tmp_entry.agent = president if act_no and gazette: tmp_entry.description = "Enacted as Act " + act_no + ". Refer to Government Gazette " + gazette + "." db.session.add(tmp_entry) db.session.commit() return