class AtriumePlanningParser: def __init__(self, authority_name, authority_short_name, base_url, debug=False): self.authority_name = authority_name self.authority_short_name = authority_short_name self.base_url = base_url self.info_url = urlparse.urljoin(base_url, info_path) self.comment_url = urlparse.urljoin(base_url, comment_path) self.debug = debug self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) def getResultsByDayMonthYear(self, day, month, year): # The end date for the search needs to be one day after the start # date - presumably the date is used as a timestamp at midnight search_start_date = datetime.date(year, month, day) search_end_date = search_start_date + datetime.timedelta(1) search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"), "monthRegStart": search_start_date.strftime("%b"), "yearRegStart": search_start_date.strftime("%Y"), "dayRegEnd": search_end_date.strftime("%d"), "monthRegEnd": search_end_date.strftime("%b"), "yearRegEnd": search_end_date.strftime("%Y"), "searchType": "current", "dispatch": "Search" }) response = urllib2.urlopen(self.base_url, search_data) html = response.read() soup = BeautifulSoup(html) # Get a list of the trs in the results table if soup.find(text="Results"): tds = soup.find(text="Results").parent.findNext("table").findAll("td") for td in tds: if td.string: if td.string.strip() == "Date Registered": # We are starting a new App self._current_application = PlanningApplication() # day, month, year = [int(x) for x in td.findNext("td").string.split("-")] self._current_application.date_received = datetime.date(year, month, day) # FIXME - when python on haggis is a bit newer, #we can do the following, which is neater #(and get rid of the import of time). #self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y") elif td.string.strip() == "Application Number": self._current_application.council_reference = td.findNext("td").string elif td.string.strip() == "Location": location = td.findNext("td").string self._current_application.address = location postcode = getPostcodeFromText(location) if postcode: self._current_application.postcode = postcode elif td.string.strip() == "Proposal": self._current_application.description = td.findNext("td").string elif td.a and td.a.string.strip() == "View Full Details": # The info url is td.a messy_info_url = td.a["href"] # We need to get an id out of this url query_str = urlparse.urlsplit(messy_info_url)[3] self._current_application.info_url = self.info_url + "?" + query_str self._current_application.comment_url = self.comment_url + "?" + query_str if self._current_application.is_ready(): self._results.addApplication(self._current_application) return self._results def getResults(self, day, month, year): return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
class BrentParser: def __init__(self, *args): self.authority_name = "London Borough of Brent" self.authority_short_name = "Brent" # self.base_url = "http://www.brent.gov.uk/servlet/ep.ext?extId=101149&byPeriod=Y&st=PL&periodUnits=day&periodMultiples=14" self.base_url = "http://www.brent.gov.uk/servlet/ep.ext" self._current_application = None self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) def getResultsByDayMonthYear(self, day, month, year): search_day = datetime.date(year, month, day) post_data = [ ("from", search_day.strftime(date_format)), ("until", search_day.strftime(date_format)), ("EXECUTEQUERY", "Query"), # ("auth", "402"), ("st", "PL"), ("periodUnits", "day"), ("periodMultiples", "14"), ("title", "Search+by+Application+Date"), ("instructions", "Enter+a+date+range+to+search+for+existing+applications+by+the+date+of+application.%0D%0A%3Cbr%3E%3Cbr%3E%0D%0A%3Cstrong%3ENote%3A%3C%2Fstrong%3E+Where+%27%28Applicant%27s+Description%29%27+appears+in+the+proposal%2C+the+text+may+subsequently+be+amended+when+the+application+is+checked."), ("byFormat", "N"), ("byOther1", "N"), ("byOther2", "N"), ("byOther3", "N"), ("byOther4", "N"), ("byOther5", "N"), ("byPostcode", "N"), ("byStreet", "N"), ("byHouseNumber", "N"), ("byAddress", "N"), ("byPeriod", "Y"), ("extId", "101149"), # I wonder what this is... ("queried", "Y"), ("other1Label", "Other1"), ("other2Label", "Other2"), ("other3Label", "Other3"), ("other4Label", "Other4"), ("other5Label", "Other5"), ("other1List", ""), ("other2List", ""), ("other3List", ""), ("other4List", ""), ("other5List", ""), ("periodLabel", "From"), ("addressLabel", "Select+Address"), ("print", "") ] # Now get the search page response = urllib2.urlopen(self.base_url, urllib.urlencode(post_data)) soup = BeautifulSoup(response.read()) trs = soup.find(text="Search Results").findNext("table").findAll("tr")[:-1] # There are six trs per application, ish # The first contains the case no and the application date. # The second contains the address # The third contains the description # The fourth contains the info page link # The fifth contains the comment link (or a note that comments are currently not being accepted # The sixth is a spacer. count = 0 for tr in trs: count +=1 ref = tr.find(text=re.compile("Case No:")) if ref: self._current_application = PlanningApplication() count = 1 self._current_application.council_reference = ref.split(":")[1].strip() self._current_application.date_received = search_day if count % 6 == 2: self._current_application.address = tr.td.string.strip() self._current_application.postcode = getPostcodeFromText(self._current_application.address) if count % 6 == 3: self._current_application.description = tr.td.string.strip() if count % 6 == 4: self._current_application.info_url = urlparse.urljoin(self.base_url, tr.a['href']) if count % 6 == 5: try: self._current_application.comment_url = urlparse.urljoin(self.base_url, tr.a['href']) except: # Comments are not currently being accepted. We'll leave this app for the moment - we'll pick it up later if they start accepting comments continue if count % 6 == 0 and self._current_application.is_ready(): self._results.addApplication(self._current_application) return self._results def getResults(self, day, month, year): return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()