class AtriumePlanningParser:
    def __init__(self,
                 authority_name,
                 authority_short_name,
                 base_url,
                 debug=False):

        self.authority_name = authority_name
        self.authority_short_name = authority_short_name
        self.base_url = base_url

        self.info_url = urlparse.urljoin(base_url, info_path)
        self.comment_url = urlparse.urljoin(base_url, comment_path)

        self.debug = debug

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):

        # The end date for the search needs to be one day after the start
        # date - presumably the date is used as a timestamp at midnight
        search_start_date = datetime.date(year, month, day)
        search_end_date = search_start_date + datetime.timedelta(1)


        search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"),
                    "monthRegStart": search_start_date.strftime("%b"),
                    "yearRegStart": search_start_date.strftime("%Y"),
                    "dayRegEnd": search_end_date.strftime("%d"),
                    "monthRegEnd": search_end_date.strftime("%b"),
                    "yearRegEnd": search_end_date.strftime("%Y"),
                    "searchType": "current",
                    "dispatch": "Search"
                    })

        response = urllib2.urlopen(self.base_url, search_data)

        html =  response.read()

        soup = BeautifulSoup(html)
        
        # Get a list of the trs in the results table
        if soup.find(text="Results"):
            
            tds = soup.find(text="Results").parent.findNext("table").findAll("td")

            for td in tds:
                if td.string:
                    if td.string.strip() == "Date Registered":
                        # We are starting a new App
                        self._current_application = PlanningApplication()

                        # 
                        day, month, year = [int(x) for x in td.findNext("td").string.split("-")]
                        self._current_application.date_received = datetime.date(year, month, day)
                        # FIXME - when python on haggis is a bit newer, 
                        #we can do the following, which is neater 
                        #(and get rid of the import of time).
                        #self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y")
                    elif td.string.strip() == "Application Number":
                        self._current_application.council_reference = td.findNext("td").string
                    elif td.string.strip() == "Location":
                        location = td.findNext("td").string
                        self._current_application.address = location

                        postcode = getPostcodeFromText(location)
                        if postcode:
                            self._current_application.postcode = postcode
                    elif td.string.strip() == "Proposal":
                        self._current_application.description = td.findNext("td").string
                elif td.a and td.a.string.strip() == "View Full Details":
                    # The info url is td.a
                    messy_info_url = td.a["href"]

                    # We need to get an id out of this url
                    query_str = urlparse.urlsplit(messy_info_url)[3]

                    self._current_application.info_url = self.info_url + "?" + query_str
                    self._current_application.comment_url = self.comment_url + "?" + query_str

                    if self._current_application.is_ready():
                        self._results.addApplication(self._current_application)



        return self._results


    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
Example #2
0
class BrentParser:
    def __init__(self, *args):

        self.authority_name = "London Borough of Brent"
        self.authority_short_name = "Brent"
#        self.base_url = "http://www.brent.gov.uk/servlet/ep.ext?extId=101149&byPeriod=Y&st=PL&periodUnits=day&periodMultiples=14"
        self.base_url = "http://www.brent.gov.uk/servlet/ep.ext"

        self._current_application = None

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        post_data = [
            ("from", search_day.strftime(date_format)),
            ("until", search_day.strftime(date_format)),
            ("EXECUTEQUERY", "Query"),
#            ("auth", "402"),
            ("st", "PL"),
            ("periodUnits", "day"),
            ("periodMultiples", "14"),
            ("title", "Search+by+Application+Date"),
            ("instructions", "Enter+a+date+range+to+search+for+existing+applications+by+the+date+of+application.%0D%0A%3Cbr%3E%3Cbr%3E%0D%0A%3Cstrong%3ENote%3A%3C%2Fstrong%3E+Where+%27%28Applicant%27s+Description%29%27+appears+in+the+proposal%2C+the+text+may+subsequently+be+amended+when+the+application+is+checked."),
            ("byFormat", "N"),
            ("byOther1", "N"),
            ("byOther2", "N"),
            ("byOther3", "N"),
            ("byOther4", "N"),
            ("byOther5", "N"),
            ("byPostcode", "N"),
            ("byStreet", "N"),
            ("byHouseNumber", "N"),
            ("byAddress", "N"),
            ("byPeriod", "Y"),
            ("extId", "101149"), # I wonder what this is...
            ("queried", "Y"),
            ("other1Label", "Other1"),
            ("other2Label", "Other2"),
            ("other3Label", "Other3"),
            ("other4Label", "Other4"),
            ("other5Label", "Other5"),
            ("other1List", ""),
            ("other2List", ""),
            ("other3List", ""),
            ("other4List", ""),
            ("other5List", ""),
            ("periodLabel", "From"),
            ("addressLabel", "Select+Address"),
            ("print", "")
            ]

        # Now get the search page
        response = urllib2.urlopen(self.base_url, urllib.urlencode(post_data))

        soup = BeautifulSoup(response.read())

        trs = soup.find(text="Search Results").findNext("table").findAll("tr")[:-1]

        # There are six trs per application, ish

        # The first contains the case no and the application date.
        # The second contains the address
        # The third contains the description
        # The fourth contains the info page link
        # The fifth contains the comment link (or a note that comments are currently not being accepted
        # The sixth is a spacer.

        count = 0
        for tr in trs:
            count +=1

            ref = tr.find(text=re.compile("Case No:"))
            
            if ref:
                self._current_application = PlanningApplication()
                count = 1

                self._current_application.council_reference = ref.split(":")[1].strip()
                self._current_application.date_received = search_day

            if count % 6 == 2:
                self._current_application.address = tr.td.string.strip()
                self._current_application.postcode = getPostcodeFromText(self._current_application.address)
            if count % 6 == 3:
                self._current_application.description = tr.td.string.strip()
            if count % 6 == 4:
                self._current_application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
            if count % 6 == 5:
                try:
                    self._current_application.comment_url = urlparse.urljoin(self.base_url, tr.a['href'])
                except:
                    # Comments are not currently being accepted. We'll leave this app for the moment - we'll pick it up later if they start accepting comments
                    continue
            if count % 6 == 0 and self._current_application.is_ready():
                self._results.addApplication(self._current_application)

        return self._results

    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()