def scrape_meeting_notice(self, chamber, item, url): # Since Event Name is not provided for all mettings. event_name = str(item["CommitteeName"]) # 04/25/2012 03:00:00 PM fmt = "%m/%d/%y %I:%M %p" start_time = dt.datetime.strptime(str(item["MeetingDateTime"]), fmt) location_name = str(item["AddressAliasNickname"]) event = Event( location_name=location_name, start_date=self._tz.localize(start_time), name=event_name, description="Committee Meeting Status: {}".format( item["CommitteeMeetingStatusName"]), ) event.add_source(url) event.add_committee(name=str(item["CommitteeName"]), id=item["CommitteeId"]) page_url = ("http://legis.delaware.gov/json/MeetingNotice/" "GetCommitteeMeetingItems?committeeMeetingId={}".format( item["CommitteeMeetingId"])) event.add_source(page_url) page_data = self.post(page_url).json()["Data"] for item in page_data: event.add_agenda_item(description=str(item["ItemDescription"])) event.add_person( name=str(item["PrimarySponsorShortName"]), id=str(item["PrimarySponsorPersonId"]), note="Sponsor", ) yield event
def scrape(self): page = self.lxmlize(calurl) events = page.xpath("//table[@class='agenda-body']//tr")[1:] for event in events: comit_url = event.xpath(".//a[contains(@title,'Committee Details')]") if len(comit_url) != 1: continue comit_url = comit_url[0] who = self.scrape_participants(comit_url.attrib["href"]) tds = event.xpath("./*") date = tds[0].text_content().strip() cttie = tds[1].text_content().strip() chamber, cttie = [x.strip() for x in cttie.split(" - ", 1)] info = tds[2] name = info.xpath("./a[contains(@href, 'raw')]")[0] notice = name.attrib["href"] name = name.text time, where = info.xpath("./i/text()") what = tds[3].text_content() what = what.replace("Items: ", "") if "(None)" in what: continue what = [x.strip() for x in what.split(";")] when = ", ".join([date, str(dt.datetime.now().year), time]) when = dt.datetime.strptime(when, "%a %b %d, %Y, %I:%M %p") if cttie: cttie = cttie.replace("Committee on", "").strip() cttie = f"{chamber} {cttie}" name = cttie event = Event( name=name, location_name=where, start_date=self._tz.localize(when) ) event.add_source(calurl) event.add_committee(cttie, note="host") event.add_document("notice", notice, media_type="application/pdf") for entry in what: item = event.add_agenda_item(entry) if entry.startswith("AB") or entry.startswith("SB"): item.add_bill(entry) for thing in who: event.add_person(thing["name"]) yield event
def scrape_chamber(self, chamber): grouped_hearings = defaultdict(list) for hearing in self.session.query(CACommitteeHearing): location = (self.session.query(CALocation).filter_by( location_code=hearing.location_code)[0].description) date = self._tz.localize(hearing.hearing_date) chamber_abbr = location[0:3] event_chamber = {"Asm": "lower", "Sen": "upper"}[chamber_abbr] if event_chamber != chamber: continue grouped_hearings[(location, date)].append(hearing) for ((location, date), hearings) in grouped_hearings.items(): # Get list of bill_ids from the database. bill_ids = [hearing.bill_id for hearing in hearings] bills = [ "%s %s" % re.match(r"\d+([^\d]+)(\d+)", bill).groups() for bill in bill_ids ] # Dereference the committee_nr number and get display name. msg = "More than one committee meeting at (location, date) %r" msg = msg % ((location, date), ) assert len(set(hearing.committee_nr for hearing in hearings)) == 1, msg committee_name = _committee_nr[hearings.pop().committee_nr] desc = "Committee Meeting: " + committee_name event = Event(name=desc, start_date=date, location_name=committee_name) for bill_id in bills: if "B" in bill_id: type_ = "bill" else: type_ = "resolution" item = event.add_agenda_item("consideration") item.add_bill(bill_id, note=type_) event.add_person(committee_name + " Committee", note="host") event.add_source("https://downloads.leginfo.legislature.ca.gov/") yield event
def scrape_meeting_notice(self, item, url): # Since Event Name is not provided for all mettings. if "Joint" in str(item["CommitteeName"]): event_name = str(item["CommitteeName"]) else: event_name = "{} {}".format(str(item["CommitteeTypeName"]), str(item["CommitteeName"])) # 04/25/2012 03:00:00 PM fmt = "%m/%d/%y %I:%M %p" start_time = dt.datetime.strptime(str(item["MeetingDateTime"]), fmt) location_name = str(item["AddressAliasNickname"]) event = Event( location_name=location_name, start_date=self._tz.localize(start_time), name=event_name, description="Committee Meeting Status: {}".format( item["CommitteeMeetingStatusName"]), ) event.add_committee(name=str(item["CommitteeName"]), id=item["CommitteeId"]) html_url = f'https://legis.delaware.gov/MeetingNotice?committeeMeetingId={item["CommitteeMeetingId"]}' event.add_source(html_url) page_url = f'https://legis.delaware.gov/json/MeetingNotice/GetCommitteeMeetingItems?committeeMeetingId={item["CommitteeMeetingId"]}' page_data = [] try: page_data = self.post(page_url).json()["Data"] except json.decoder.JSONDecodeError: # No agenda items self.info(f"POST returned nothing on {page_url}") for item in page_data: a = event.add_agenda_item(description=str(item["ItemDescription"])) if item["LegislationDisplayText"] is not None: a.add_bill(item["LegislationDisplayText"]) event.add_person( name=str(item["PrimarySponsorShortName"]), id=str(item["PrimarySponsorPersonId"]), note="Sponsor", ) yield event