예제 #1
0
    def getResultsByDateRange(self, date_from, date_to):
        # first we fetch the search page to get ourselves some session info...
        search_form_response = self._getSearchResponse()
        
        search_form_contents = search_form_response.read()

        # This sometimes causes a problem in HTMLParser, so let's just get the link
        # out with a regex...
        groups = self.action_regex.search(search_form_contents).groups()

        action = groups[0] 
        #print action

        # This is to handle the amp; which seems to have appeared in this
        # url on the Oldham site
        action = ''.join(action.split('amp;'))

        action_url = urlparse.urljoin(self.base_url, action)
        #print action_url

        search_data = {"regdate1": date_from.strftime(date_format),
                       "regdate2": date_to.strftime(date_format),
                       }
        
        opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler)
        response = opener.open(action_url, search_data)
        results_html = response.read()

        # This is for doing site specific html cleanup
        results_html = self._cleanupHTML(results_html)

        #some javascript garbage in the header upsets HTMLParser,
        #so we'll just have the body
        just_body = "<html>" + end_head_regex.split(results_html)[-1]

        #self.feed(just_body)
        
        soup = BeautifulSoup(just_body, convertEntities=BeautifulSoup.ALL_ENTITIES)

        # Each app is in a table of it's own.
        results_tables = self._getResultsSections(soup)


        for app_table in results_tables:
            self._current_application = PlanningApplication()
            self._current_application.council_reference = self._getCouncilReference(app_table)
            self._current_application.address = self._getAddress(app_table)
            
            # Get the postcode from the address
            self._current_application.postcode = getPostcodeFromText(self._current_application.address)
            
            self._current_application.description = self._getDescription(app_table)
            self._current_application.info_url = self._getInfoUrl(app_table)
            self._current_application.comment_url = self._getCommentUrl(app_table)
            self._current_application.date_received = self._getDateReceived(app_table)
            self._current_application.ward_name = self._getWard(app_table)

            self._results.addApplication(self._current_application)

        return self._results
예제 #2
0
    def getResultsByDayMonthYear(self, day, month, year):
        # Now get the search page
        response = urllib2.urlopen(self.base_url)
        soup = BeautifulSoup(response.read())

        trs = soup.table.table.findAll("tr", {"class": re.compile("(?:odd)|(?:even)")})


        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.council_reference = tds[0].a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
            application.address = tds[2].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = tds[3].string.strip()

            # Fetch the info url in order to get the date received and the comment url

            info_response = urllib2.urlopen(application.info_url)

            info_soup = BeautifulSoup(info_response.read())

            application.date_received = datetime.datetime.strptime(info_soup.find(text=re.compile("\s*Received:\s*")).findNext("td").string.strip(), date_format).date()

            application.comment_url = urlparse.urljoin(self.base_url, info_soup.find("input", value="Comment on this application").parent['action'])


            self._results.addApplication(application)

        return self._results
예제 #3
0
    def getResultsByDayMonthYear(self, day, month, year):
        # Now get the search page
        response = urllib2.urlopen(self.base_url)

        soup = BeautifulSoup(response.read())

        # Each application contains the nav string "Application: "
        nav_strings = soup.findAll(text="Application: ")

        for nav_string in nav_strings:
            application = PlanningApplication()

            application.council_reference = nav_string.findPrevious("tr").findAll("td", limit=2)[1].string.strip()

            application.address = nav_string.findNext(text=location_re).split(":")[1].strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = nav_string.findNext(text="Proposal: ").findNext("td").string.strip()

            application.comment_url = urlparse.urljoin(self.base_url, nav_string.findNext(text="Proposal: ").findNext("a")['href'])

            application.date_received = datetime.datetime.strptime(nav_string.findNext(text=date_received_re).split(": ")[1], date_format).date()

            # FIXME: There is no appropriate info_url for the Harrow apps. 
            # I'll put the base url for the moment, but as that is
            # a list of apps from the last 7 days that will quickly be out of date.

            application.info_url = self.base_url
            
            self._results.addApplication(application)

        return self._results
예제 #4
0
    def getResultsByDayMonthYear(self, day, month, year):
        response = urllib2.urlopen(self.base_url)
        soup = BeautifulSoup.BeautifulSoup(response.read())

        trs = soup.find("table", {"class": "planningtable"}).tbody.findAll("tr")

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.council_reference = tds[0].a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a["href"])
            application.address = " ".join(tds[1].contents[1].strip().split()[1:])
            application.postcode = getPostcodeFromText(application.address)

            # We're going to need to download the info page in order to get
            # the comment link, the date received, and the description.

            info_response = urllib2.urlopen(application.info_url)
            info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

            application.description = info_soup.find(text="Proposal").findNext(text=True).strip()
            date_received_str = info_soup.find(text="Date Application Valid").findNext(text=True).split(",")[1].strip()

            # This is a nasty botch, but the easiest way I can see to get a date out of this is to make another string and use strptime
            better_date_str = "%s %s %s" % date_received_re.match(date_received_str).groups()
            application.date_received = datetime.datetime.strptime(better_date_str, "%d %B %Y").date()
            application.comment_url = info_soup.find("a", title="Comment on this planning application.")["href"]

            self._results.addApplication(application)

        return self._results
예제 #5
0
    def handle_endtag(self, tag):
        if self._state == IN_INNER_TABLE and tag == "table":
            # The next if should never be false, but it pays to be careful :-)
            if self._current_application.council_reference is not None:
                self.results.addApplication(self._current_application)
            self._state = IN_RESULTS_TABLE_TD

        elif self._state == IN_RESULTS_TABLE_TD and tag == "td":
            self._state = FINISHED
            
        elif self._state == IN_INNER_TABLE and tag == "td":
            if self._td_count == 2:
                # This data is the App No.
                council_reference = self.get_data().strip()
                self._current_application.council_reference = council_reference

                # This also gives us everything we need for the info and comment urls
                self._current_application.info_url = urlparse.urljoin(self.base_url, info_url_end %(council_reference))
                self._current_application.comment_url = urlparse.urljoin(self.base_url, comment_url_end %(council_reference))
                
            elif self._td_count == 4:
                # This data is the address
                self._current_application.address = self.get_data().strip()
                self._current_application.postcode = getPostcodeFromText(self._current_application.address)
            elif self._td_count == 7:
                # This data is the description
                self._current_application.description = self.get_data().strip()
    def handle_endtag(self, tag):
        # There is no need to do anything unless we are in table 3.
        if self._table_count == 3:

            # The end <tr> indicates that the current application is finished.
            # Now we can fetch the info_page to get the address, postcode,
            # and description.
            # If we don't have a reference, then we are in the header row,
            # which we don't want.
            # There is no point in doing this if the date is not the requested one.

            if tag == 'tr' and \
                   self._current_application.council_reference is not None and \
                   self._current_application.date_received == self._requested_date:
                
                info_page_parser = SouthOxfordshireInfoURLParser()
                info_page_parser.feed(urllib2.urlopen(self._current_application.info_url).read())

                self._current_application.address = info_page_parser.address
                self._current_application.postcode = getPostcodeFromText(info_page_parser.address)
                self._current_application.description = info_page_parser.description

                # Add the current application to the results set
                self._results.addApplication(self._current_application)

            # At the end of the 5th <td>, self._data should contain
            # the received date of the application.
            if tag == 'td' and self._td_count == 5:
                app_year, app_month, app_day = tuple(time.strptime(self._data, "%d %B %Y")[:3])
                self._current_application.date_received = datetime.date(app_year, app_month, app_day)
                self._data = ''
                self._td_count = 0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Each app is stored in a table on it's own. 
        # These tables don't have any nice distinguishing features,
        # but they do all contain a NavigableString "Application",
        # and nothing else in the page does.
        nav_strings = soup.findAll(text="Application")
        
        for nav_string in nav_strings:
            results_table = nav_string.findPrevious("table")

            application = PlanningApplication()
            application.date_received = search_day

            application.council_reference = results_table.a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, results_table.a['href'])
            application.address = results_table.findAll("td")[7].a.string.strip()

            application.postcode = getPostcodeFromText(application.address)
            application.description = results_table.findAll("td")[-1].contents[0].strip()

            # A few applications have comment urls, but most don't.
            # When they do, they have a case officer - I don't think we can
            # work out the other urls - even if they exist.
            # Best to use the email address.
            application.comment_url = self.comments_email_address

            self._results.addApplication(application)

        return self._results
    def handle_endtag(self, tag):
        if self.debug:
            print "ending: ", tag

        if tag == "table" and self._in_results_table == 2:
            self._in_results_table = 3
        elif tag == "tr" and self._in_results_table == 2:
            if self._current_application.council_reference is not None:

                # get the received date
                # info_response = urllib2.urlopen(self._current_application.info_url)
                # info_page_parser = InfoPageParser()
                # info_page_parser.feed(info_response.read())
                self._current_application.date_received = self._requested_date  # info_page_parser.date_received
                self._results.addApplication(self._current_application)
        elif tag == "td" and self._in_results_table == 2:
            if self._td_count == self._location_col_no:
                data = " ".join(self._data_list).strip()
                self._current_application.address = data
                postcode = getPostcodeFromText(data)
                if postcode is not None:
                    self._current_application.postcode = postcode
                self._data_list = []
            elif self._td_count == self._description_col_no:
                data = " ".join(self._data_list).strip()
                self._current_application.description = data
                self._data_list = []
        elif tag == "a" and self._in_results_table == 2 and self._td_count == self._reference_col_no:
            data = "".join(self._data_list).strip()
            self._current_application.council_reference = data
            self._current_application.comment_url = self._comment_url % {"council_reference": data}
            self._data_list = []
예제 #9
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # What we actually need is the monday before the date searched for:
        monday_before = search_day - datetime.timedelta(search_day.weekday())

        # Now get the search page
        response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
        soup = BeautifulSoup(response.read())

        result_tables = soup.findAll("table", align="Center", cellpadding="3")

        for table in result_tables:
            application = PlanningApplication()

            # We can set the date received and the comment url straight away.
            application.comment_url = self.comments_email_address

            trs = table.findAll("tr")

            application.council_reference = trs[0].a.string.strip()
            relative_info_url = trs[0].a['href']

            application.info_url = urlparse.urljoin(self.base_url, relative_info_url)

            application.date_received = monday_before

            application.address = trs[1].findAll("td")[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = trs[2].findAll("td")[1].string.strip()

            self._results.addApplication(application)

        return self._results
예제 #10
0
    def _getPostCode(self, info_soup):
        """In most cases, the postcode can be got from the address in
        the results table. Some councils put the address there without the
        postcode. In this case we will have to go to the info page to get
        the postcode. This should be done by overriding this method with
        one that parses the info page."""

        return getPostcodeFromText(self._current_application.address)
예제 #11
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        get_response = urllib2.urlopen(self.base_url)
        
        get_soup = BeautifulSoup(get_response.read())

        post_data = (
            ("__VIEWSTATE", get_soup.find("input", id="__VIEWSTATE")["value"]),
#            ("QuickSearchApplicationNumber$TextBox_ApplicationNumber", ""),
#            ("QuickSearchThisWeek$DropDownList_PastWeek", ""),
#            ("DetailedSearch$TextBox_PropertyNameNumber", ""),
#            ("DetailedSearch$Textbox_StreetName", ""),
#            ("DetailedSearch$Textbox_TownVillage", ""),
#            ("DetailedSearch$Textbox_Postcode", ""),
#            ("DetailedSearch$Textbox_Parish", ""),
#            ("DetailedSearch$Textbox_ApplicantSurname", ""),
#            ("DetailedSearch$TextBox_AgentName", ""),
            ("DetailedSearch$TextBox_DateRaisedFrom", search_date.strftime(date_format)),
            ("DetailedSearch$TextBox_DateRaisedTo", search_date.strftime(date_format)),
#            ("DetailedSearch$TextBox_DecisionFrom", "dd%2Fmm%2Fyyyy"),
#            ("DetailedSearch$TextBox_DecisionTo", "dd%2Fmm%2Fyyyy"),
            ("DetailedSearch$Button_DetailedSearch", "Search"),
            ("__EVENTVALIDATION", get_soup.find("input", id="__EVENTVALIDATION")["value"]),
            )

        # The response to the GET is a redirect. We'll need to post to the new url.
        post_response = urllib2.urlopen(get_response.url, urllib.urlencode(post_data))
        post_soup = BeautifulSoup(post_response.read())

        if not post_soup.find(text = re.compile("No matching record")):
            # The first row contains headers.
            trs = post_soup.find("table", {"class": "searchresults"}).findAll("tr")[1:]

            for tr in trs:
                application = PlanningApplication()

                # We can fill the date received in straight away from the date we searched for.
                application.date_received = search_date

                tds = tr.findAll("td")

                application.council_reference = tds[0].font.string.strip()
                application.address = tds[2].font.string.strip()
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[3].font.string.strip()

                # Set the info url and the comment url to be the same - can't get to the comment
                # one directly without javascript.
                application.info_url = self.info_url %(application.council_reference)
                application.comment_url = application.info_url

                self._results.addApplication(application)

        return self._results
예제 #12
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        next = self.base_url %{"day": day,
                               "month": month,
                               "year": year,
                               }

        while next:
            
            # Now get the search page
            response = urllib2.urlopen(next)

            soup = BeautifulSoup.BeautifulSoup(response.read())

            trs = soup.table.findAll("tr")[1:] # First one is just headers

            for tr in trs:
                application = PlanningApplication()

                application.date_received = search_day
                application.council_reference = tr.a.string
                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
                tds = tr.findAll("td")

                application.address = ' '.join([x.replace("&nbsp;", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[4].string.replace("&nbsp;", " ").strip()

                # Get the info page in order to find the comment url
                # we could do this without a download if it wasn't for the
                # sector parameter - I wonder what that is?
                info_response = urllib2.urlopen(application.info_url)
                info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

                comment_navstring = info_soup.find(text=comment_re)
                
                if comment_navstring:
                    application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
                else:
                    application.comment_url = "No Comments"

                # While we're at it, let's get the OSGB
                application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]

                self._results.addApplication(application)
                
            next_element = soup.find(text="next").parent

            if next_element.name == 'a':
                next = urlparse.urljoin(self.base_url, next_element['href'])
            else:
                next = None

        return self._results
예제 #13
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)
        search_date_string = search_date.strftime(date_format)

        "appstat=&decision=&appdec=&ward=&parish=&dadfrom=&dadto=&davfrom=01%2F06%2F2008&davto=02%2F06%2F2008&searchbut=Search"
        search_data = urllib.urlencode(
            [("searchtype", "1"),
             ("appstat", ""),
             ("decision", ""),
             ("appdec", ""),
             ("ward", ""),
             ("parish", ""),
             ("dadfrom", ""),
             ("dadto", ""),
             ("davfrom", search_date_string),
             ("davto", search_date_string),
             ("searchbut", "Search"),
                ]
            )

        split_search_url = self._split_base_url[:3] + (search_data, '')
        search_url = urlparse.urlunsplit(split_search_url)

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find(text="Application No").parent.parent.parent
        trs = results_table.findAll("tr")[1:]

        tr_counter = 0
        
        while tr_counter < len(trs):
            tr = trs[tr_counter]

            if tr_counter % 2 == 0:
                application = PlanningApplication()
                application.date_received = search_date
                application.comment_url = self.comment_email_address

                tds = tr.findAll("td")

                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
                application.council_reference = tr.a.string.strip()

                application.address = tds[1].string.strip()
                application.postcode = getPostcodeFromText(application.address)

                application.description = tds[2].string.strip()

                self._results.addApplication(application)

            tr_counter += 1

        return self._results
예제 #14
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        search_data = urllib.urlencode(
            [("fuseaction", "planapps.list"),
             ("SUBJECTID", "104CC166-3ED1-4D22-B9F1E2FB8438478A"),
             ("src_fromdayRec", day),
             ("src_frommonthRec", month),
             ("src_fromyearRec", year),
             ("src_todayRec", day),
             ("src_tomonthRec", month),
             ("src_toyearRec", year),
             ("findroadworks", "GO"),
             ]
            )
        
        search_url = self.base_url + "?" + search_data

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find("table", id="results")

        # Apart from the first tr, which contains headers, the trs come in pairs for each application

        trs = results_table.findAll("tr")[1:]

        tr_count = 0
        while tr_count < len(trs):
            tr = trs[tr_count]

            if tr_count % 2 == 0:
                application = PlanningApplication()
                application.date_received = search_date
                
                tds = tr.findAll("td")

                application.council_reference = tds[0].a.string.strip()
                application.comment_url = self.comment_url %(application.council_reference)

                application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
                application.address = ', '.join([x.strip() for x in tds[1].findAll(text=True)])
                application.postcode = getPostcodeFromText(application.address)
            else:
                # Get rid of the "Details: " at the beginning.
                application.description = tr.td.string.strip()[9:]

                self._results.addApplication(application)

            tr_count += 1

        return self._results
예제 #15
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        get_request = urllib2.Request(self.base_url)
        get_response = urllib2.urlopen(get_request)
        cookie_jar.extract_cookies(get_response, get_request)
        
        get_soup = BeautifulSoup(get_response.read())

        post_data = (
            ("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]),
            ("pgid", get_soup.find("input", {"name": "pgid"})["value"]),
            ("action", "Search"),
#            ("ApplicationSearch21%3AtbDevAddress", ""),
#            ("ApplicationSearch21%3AtbApplicantName", ""),
#            ("ApplicationSearch21%3AtbAgentName", ""),
            ("ApplicationSearch21:tbDateSubmitted", search_date.strftime(search_date_format)),
            ("ApplicationSearch21:btnDateSubmitted", "Search"),
#            ("ApplicationSearch21%3AtbDateDetermined", ""),
            )

        
        post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data))
        cookie_jar.add_cookie_header(post_request)
        post_response = cookie_handling_opener.open(post_request)

        post_soup = BeautifulSoup(post_response.read())

        # Discard the first <tr>, which contains headers
        trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()
            
            tds = tr.findAll("td")

            application.council_reference = tds[0].string.strip()
            application.address = tds[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = tds[2].string.strip()

            application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6]))
            application.info_url = self.info_url %(application.council_reference)

            # The comment url must be accessed by a POST, so we'll just use the info url for that as well

            application.comment_url = application.info_url

            self._results.addApplication(application)

        return self._results
예제 #16
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        # We'll set the start date to be one day earlier in order to catch the first result on every day at some point - see TODO list
        response = urllib2.urlopen(self.base_url %{"end_date": search_date.strftime(date_format),
                                                   "start_date": (search_date - datetime.timedelta(1)).strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Each app is stored in it's own table
        result_tables = soup.findAll("table", border="1")

        # For the moment, we'll have to ignore the first result (see TODO list).
        for table in result_tables[1:]:
            application = PlanningApplication()

            # It's not clear to me why this next one isn't the string of the next sibling. This works though!
            application.council_reference = table.find(text=re.compile("Reference")).parent.findNextSibling().contents[0]

            application.address = table.find(text="Location").parent.findNextSibling().string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.info_url = urlparse.urljoin(self.base_url, table.a['href'])

            # Let's go to the info_page and get the OSGB and the date_received
            info_request = urllib2.Request(application.info_url)

            # We need to add the language header in order to get UK style dates
            info_request.add_header("Accept-Language", "en-gb,en")
            info_response = urllib2.urlopen(info_request)
            info_soup = BeautifulSoup(info_response.read())
            
            grid_reference_td = info_soup.find(text="Grid Reference").findNext("td")
            x_element = grid_reference_td.font
            
            application.osgb_x = x_element.string.strip()
            application.osgb_y = x_element.nextSibling.nextSibling.string.strip()
            
            date_string = info_soup.find(text="Date Valid").findNext("td").string.strip()

            application.date_received = datetime.datetime(*(time.strptime(date_string, date_format)[0:6]))

            application.description = table.find(text=re.compile("Description of Proposal")).parent.nextSibling.string.strip()


            # There is a link to comment from the info page, though I can't click it.
            application.comment_url = application.info_url

            self._results.addApplication(application)

        return self._results
예제 #17
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)
        date_string = search_date.strftime(date_format)
        
        search_data = urllib.urlencode({"reference": "",
                                        "undecided": "yes",
                                        "dateFrom": date_string,
                                        "dateTo": date_string,
                                        "Address": "",
                                        "validate": "true",
                                        })


        request = urllib2.Request(self.base_url, search_data)
        response = urllib2.urlopen(request)

        html =  response.read()

        soup = BeautifulSoup(html)

        tables = soup.findAll("table", {"style": "width:auto;"})

        if not tables:
            return self._results

        # We don't want the first or last tr
        trs = tables[0].findAll("tr")[1:-1]

        for tr in trs:
            app = PlanningApplication()

            tds = tr.findAll("td")

            if len(tds) == 4:
                local_info_url = tds[0].a['href']
                app.info_url = urlparse.urljoin(self.base_url, local_info_url)
                app.council_reference = tds[0].a.string

                app.address = tds[1].string
                app.postcode = getPostcodeFromText(app.address)

                app.description = tds[2].string

                app.comment_url = urlparse.urljoin(self.base_url, comment_url_end %app.council_reference)
                app.date_received = search_date

                self._results.addApplication(app)

        return self._results
예제 #18
0
    def getResultsByDayMonthYear(self):
        # Note that we don't take the day, month and year parameters here.

        # First get the search page
        request = urllib2.Request(self.base_url)
        response = urllib2.urlopen(request)

        soup = BeautifulSoup(response.read())

        trs = soup.findAll("tr", {"class": "dbResults"})

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.council_reference = tds[0].a.contents[0].strip()
            application.address = tds[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = tds[2].string.strip()
            application.info_url= urlparse.urljoin(self.base_url, tds[0].a['href'])

# These bits have been commented out for performance reasons. We can't afford to go to every application's details page ten times a day while it is open. Instead, we'll just set the date_received to be the scrape date. The comment url can be got by using the id in the info url
            application.date_received = datetime.datetime.today()
            
            relative_comment_url_template = "PlanAppComment.aspx?appId=%d"

            # Get the appId from the info_url

            app_id = int(cgi.parse_qs(urlparse.urlsplit(application.info_url)[3])['frmId'][0])

            application.comment_url = urlparse.urljoin(self.base_url, relative_comment_url_template %(app_id))


#             # I'm afraid we're going to have to get each info url...
#             this_app_response = urllib2.urlopen(application.info_url)
#             this_app_soup = BeautifulSoup(this_app_response.read())

#             # If there is no received date, for some reason. We'll use the publicicty date instead.
#             date_string = (this_app_soup.find("span", id="lblTrackRecievedDate") or this_app_soup.find("span", id="lblPubDate")).string
#             application.date_received = datetime.datetime(*(time.strptime(date_string, date_format)[0:6]))

#             application.comment_url = urlparse.urljoin(self.base_url, this_app_soup.find("a", id="lnkMakeComment")['href'])

            self._results.addApplication(application)

        return self._results
예제 #19
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        pagenum = 1

        while pagenum:
            response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format), 
                                                       "pagenum": pagenum}
                                       )
            soup = BeautifulSoup.BeautifulSoup(response.read())

            # This is not a nice way to find the results table, but I can't 
            # see anything good to use, and it works...

            # There are two trs with style attributes per app. This will find all the first ones of the pairs.
            trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2]

            for tr in trs:
                tds = tr.findAll("td")
                date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date()

                # Stop looking through the list if we have found one which is earlier than the date searched for.
                if date_received < search_date:
                    # If we break out, then we won't want the next page
                    pagenum = None
                    break

                application = PlanningApplication()
                application.date_received = date_received

                application.council_reference = tds[0].small.string.strip()

                # The second <td> contains the address, split up with <br/>s
                application.address = ' '.join([x for x in tds[1].contents if isinstance(x, BeautifulSoup.NavigableString)])
                application.postcode = getPostcodeFromText(application.address)

                application.description = tds[2].string.strip()

                application.info_url = urlparse.urljoin(self.base_url, tr.findNext("a")['href'])
                application.comment_url = self.comments_email_address

                self._results.addApplication(application)
            else:
                # If we got through the whole list without breaking out,
                # then we'll want to get the next page.
                pagenum += 1

        return self._results
예제 #20
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url %{"day": day,
                                                   "month": month,
                                                   "year": year,
                                                   })
        soup = BeautifulSoup(response.read())

        trs = soup.findAll("tr", valign="middle")

        count = 0
        for tr in trs:
            # The odd trs are just spacers
            if count % 2 == 0:
                application = PlanningApplication()

                tds = tr.findAll("td")
                
                application.date_received = search_day
                application.council_reference = tds[1].a.string
                application.address = tds[3].a.string
                application.postcode = getPostcodeFromText(application.address)
                
                # All the links in this <tr> go to the same place...
                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])

                # Still looking for description and comment url
                
                # For the description, we'll need the info page
                info_soup = BeautifulSoup(urllib2.urlopen(application.info_url).read())

                application.description = info_soup.find(text="Description").findNext("td").findNext("td").font.string

                # While we're here, lets get the OSGB grid ref
                application.osgb_x, application.osgb_y = info_soup.find(text="Grid Reference").findNext("td").font.string.split("-")

                # We'll have to use an email address for comments
                application.comment_url = self.comments_email_address

                self._results.addApplication(application)

            count += 1

        return self._results
예제 #21
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        monday_before = search_day - datetime.timedelta(search_day.weekday())

        thursday = monday_before + datetime.timedelta(3)
        if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday
            # We need to add a week
            thursday = thursday + datetime.timedelta(7)

        this_url = self.base_url %(thursday.strftime(search_date_format))
        # Now get the search page
        response = urllib2.urlopen(this_url)
        soup = BeautifulSoup(response.read())

        # Each app is stored in a table of its own. The tables don't have
        # any useful attributes, so we'll find all the NavigableString objects
        # which look like " Application Number:" and then look at the 
        #tables they are in.

        nav_strings = soup.findAll(text=" Application Number:")

        for nav_string in nav_strings:
            application = PlanningApplication()

            application.council_reference = nav_string.findNext("p").string.strip()

            result_table = nav_string.findPrevious("table")

            application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format)

            application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip()
            application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip()

            application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip()
            application.address = result_table.find(text=" Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.info_url = this_url

            application.comment_url = self.comments_email_address

            self._results.addApplication(application)

        return self._results
예제 #22
0
    def handle_endtag(self, tag):
        if tag == "table" and self._in_result_table:
            self._current_application.description = self._current_application.description.strip()
            self._current_application.address = ' '.join(self._current_application.address.strip().split())
            self._current_application.postcode = getPostcodeFromText(self._current_application.address)
            self._current_application.info_url = self.base_url # Can't link to the info page, due to javascript idiocy.
            self._current_application.comment_url = self.comment_url_template %{"reference": urllib.quote_plus(self._current_application.council_reference),
                                                                                "address": urllib.quote_plus(self._current_application.address),
                                                                                "description": urllib.quote_plus(self._current_application.description),
                                                                                }
            
            self._results.addApplication(self._current_application)

            self._in_result_table = False
            self._td_count = None

        if tag == "a":
            self._get_ref = False
예제 #23
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        post_data = urllib.urlencode((
                ("type", "app"),
                ("time", "0"),
                ))
                                     
        # Now get the search page
        response = urllib2.urlopen(self.base_url, post_data)
        soup = BeautifulSoup(response.read())

        caseno_strings = soup.findAll(text="Case No:")

        for caseno_string in caseno_strings:
            application = PlanningApplication()

            application.council_reference = caseno_string.findNext("a").string.strip()
            info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])

            # See above for why we can't use the proper info url.
            application.info_url = self.info_url

            # In order to avoid doing a download to find the comment page, we'll
            # get the system key from this url

            syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]

            application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()

            application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()

#http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
            application.comment_url = self.comment_url_template %(application.council_reference, syskey)

            self._results.addApplication(application)

        return self._results
예제 #24
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        search_data = urllib.urlencode(
            [
                ("parent_directory_id", "200"),
                ("nav", "679"),
                ("id", "13266"),
                ("RecStart", "1"),
                ("RecCount", "100"),
                ("SDate", search_date.strftime(date_format)),
                ("EDate", search_date.strftime(date_format)),
                ]
            )

        search_url = self.base_url + "?" + search_data

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find("table", summary="List of planning applications that match your query")

        for tr in results_table.findAll("tr")[1:]:
            application = PlanningApplication()
            
            application.date_received = search_date
            
            tds = tr.findAll("td")

            application.council_reference = tds[0].a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
            application.comment_url = application.info_url

            application.address = ' '.join(tds[1].string.strip().split())
            application.postcode = getPostcodeFromText(application.address)

            application.description = tds[2].string.strip()

            self._results.addApplication(application)

        return self._results
예제 #25
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # What we actually need is the monday before the date searched for:
        monday_before = search_day - datetime.timedelta(search_day.weekday())

        # Now get the search page
        response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
        soup = BeautifulSoup(response.read())

        result_tables = soup.findAll("table", width="98%", cellpadding="2")

        for table in result_tables:
            application = PlanningApplication()

            trs = table.findAll("tr")
	    application.council_reference = trs[0].strong.string.strip()
            relative_info_url = trs[0].a['href']
            application.info_url = urlparse.urljoin(self.base_url, relative_info_url)

            application.address = trs[1].findAll("td")[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = trs[2].findAll("td")[1].string.strip()

	    #There's probably a prettier way to get the date, but with Python, it's easier for me to reinvent the wheel than to find an existing wheel!
	    raw_date_recv = trs[3].findAll("td")[3].string.strip().split("/")
	    #Check whether the application is on the target day. If not, discard it and move on.
	    if int(raw_date_recv[0]) != day:
	      continue
	    application.date_received = datetime.date(int(raw_date_recv[2]), int(raw_date_recv[1]), int(raw_date_recv[0]))

            try:
                relative_comment_url = trs[5].findAll("td")[1].a['href']
                application.comment_url = urlparse.urljoin(self.base_url, relative_comment_url)
            except:
                application.comment_url = "No Comment URL."

            self._results.addApplication(application)

        return self._results
예제 #26
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        next_page_url = self.base_url %{"date": search_date.strftime(date_format)}

        while next_page_url:
            try:
                response = urllib2.urlopen(next_page_url)
            except urllib2.HTTPError:
                # This is what seems to happen if there are no apps
                break

            soup = BeautifulSoup(response.read())

            next = soup.find(text="Next")
            if next:
                next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
            else:
                next_page_url = None

            # There is an <h3> for each app that we can use 
            for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
                application = PlanningApplication()

                application.date_received = search_date
                application.council_reference = h3.string.split(": ")[1]
                application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()

                application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
                application.postcode = getPostcodeFromText(application.address)

                application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])

                application.info_url = self.info_url %(urllib.quote(application.council_reference))

                application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()

                self._results.addApplication(application)

        return self._results
예제 #27
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url % {"date": search_day.strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Results are shown in a table each. The tables don't have any nice
        # attributes, but they do all contain a NavString "Application",
        # and nothing else does...
        nav_strings = soup.findAll(text="Application")

        for nav_string in nav_strings:
            result_table = nav_string.findPrevious("table")

            application = PlanningApplication()
            application.date_received = search_day

            links = result_table.findAll("a")

            # We can get OSGB coordinates from the link to streetmap
            map_qs_dict = cgi.parse_qs(urlparse.urlsplit(links[0]["href"])[3])

            application.osgb_x = map_qs_dict.get("x")[0]
            application.osgb_y = map_qs_dict.get("y")[0]

            application.council_reference = links[1].string.strip()
            application.info_url = urlparse.urljoin(self.base_url, links[1]["href"])
            application.comment_url = urlparse.urljoin(self.base_url, links[2]["href"])

            application.address = " ".join(links[0].previous.strip().split())
            application.postcode = getPostcodeFromText(application.address)

            application.description = links[2].previous.strip()

            self._results.addApplication(application)

        return self._results
예제 #28
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format)})
        soup = BeautifulSoup.BeautifulSoup(response.read())

        if not soup.find(text=re.compile("No Results Found")):
            
            trs = soup.findAll("table", {"class": "dataTable"})[1].findAll("tr")[1:]

            for tr in trs:
                tds = tr.findAll("td")

                application = PlanningApplication()

                # We can fill in the date received without actually looking at the data
                application.date_received = search_date

                application.council_reference = tds[0].a.string.strip()
                application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
                application.address = ', '.join([x for x in tds[1].contents 
                                                 if isinstance(x, BeautifulSoup.NavigableString)])
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[2].string.strip()

                # To get the comment link we need to fetch the info page

                info_response = urllib2.urlopen(application.info_url)
                info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

                base = info_soup.base['href']

                application.comment_url = urlparse.urljoin(base,
                                                           info_soup.find("a", target="Planning Application Consultation Form")['href'])

                self._results.addApplication(application)

        return self._results
예제 #29
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # We want the sunday of the week being searched for.
        # (sunday is at the end of the week).
        friday = search_day - datetime.timedelta(search_day.weekday()) + datetime.timedelta(4)

        # Not using urllib.urlencode as it insists on turning the "+" into "%2B"
        post_data = "WeekEndDate=%d%%2F%d%%2F%d&order=Received+Date&submit=search" %(friday.day, friday.month, friday.year)


        # Now get the search page
        response = urllib2.urlopen(self.base_url, post_data)
        soup = BeautifulSoup(response.read())

        trs = soup.find("table", summary="Planning Application search results table").findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            # Not sure why these are entities. We'll convert them back.
            application.council_reference = tds[0].a.contents[1].strip().replace("&#47;", "/")
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
            application.comment_url = application.info_url

            application.date_received = datetime.datetime(*(time.strptime(tds[1].string.strip(), date_format)[0:6]))

            application.address = tds[2].string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = tds[3].string.strip()

            self._results.addApplication(application)
        
        return self._results
예제 #30
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        response = urllib2.urlopen(self.base_url %(search_day.strftime(search_date_format)))
        soup = BeautifulSoup(response.read())

        # The first <tr> contains headers
        trs = soup.table.findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.date_received = datetime.datetime.strptime(tds[0].string, received_date_format).date()

            application.info_url = urllib.unquote(urllib.quote_plus(urlparse.urljoin(self.base_url, tds[1].a['href'])))
            application.council_reference = tds[1].a.string.strip()
            application.address = tds[2].a.string.strip()
            application.postcode = getPostcodeFromText(application.address)

            # Now fetch the info url

            info_response = urllib.urlopen(application.info_url)
            info_soup = BeautifulSoup(info_response.read())

            application.description = info_soup.find(text="Proposal:").findNext("td").string.strip()

            try:
                application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text="Comment").parent['href'])
            except:
                application.comment_url = "No Comments"

            self._results.addApplication(application)

        return self._results