Ejemplo n.º 1
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        next = self.base_url %{"day": day,
                               "month": month,
                               "year": year,
                               }

        while next:
            
            # Now get the search page
            response = urllib2.urlopen(next)

            soup = BeautifulSoup.BeautifulSoup(response.read())

            trs = soup.table.findAll("tr")[1:] # First one is just headers

            for tr in trs:
                application = PlanningApplication()

                application.date_received = search_day
                application.council_reference = tr.a.string
                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
                tds = tr.findAll("td")

                application.address = ' '.join([x.replace(" ", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[4].string.replace(" ", " ").strip()

                # Get the info page in order to find the comment url
                # we could do this without a download if it wasn't for the
                # sector parameter - I wonder what that is?
                info_response = urllib2.urlopen(application.info_url)
                info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

                comment_navstring = info_soup.find(text=comment_re)
                
                if comment_navstring:
                    application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
                else:
                    application.comment_url = "No Comments"

                # While we're at it, let's get the OSGB
                application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]

                self._results.addApplication(application)
                
            next_element = soup.find(text="next").parent

            if next_element.name == 'a':
                next = urlparse.urljoin(self.base_url, next_element['href'])
            else:
                next = None

        return self._results
Ejemplo n.º 2
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        # We'll set the start date to be one day earlier in order to catch the first result on every day at some point - see TODO list
        response = urllib2.urlopen(self.base_url %{"end_date": search_date.strftime(date_format),
                                                   "start_date": (search_date - datetime.timedelta(1)).strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Each app is stored in it's own table
        result_tables = soup.findAll("table", border="1")

        # For the moment, we'll have to ignore the first result (see TODO list).
        for table in result_tables[1:]:
            application = PlanningApplication()

            # It's not clear to me why this next one isn't the string of the next sibling. This works though!
            application.council_reference = table.find(text=re.compile("Reference")).parent.findNextSibling().contents[0]

            application.address = table.find(text="Location").parent.findNextSibling().string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.info_url = urlparse.urljoin(self.base_url, table.a['href'])

            # Let's go to the info_page and get the OSGB and the date_received
            info_request = urllib2.Request(application.info_url)

            # We need to add the language header in order to get UK style dates
            info_request.add_header("Accept-Language", "en-gb,en")
            info_response = urllib2.urlopen(info_request)
            info_soup = BeautifulSoup(info_response.read())
            
            grid_reference_td = info_soup.find(text="Grid Reference").findNext("td")
            x_element = grid_reference_td.font
            
            application.osgb_x = x_element.string.strip()
            application.osgb_y = x_element.nextSibling.nextSibling.string.strip()
            
            date_string = info_soup.find(text="Date Valid").findNext("td").string.strip()

            application.date_received = datetime.datetime(*(time.strptime(date_string, date_format)[0:6]))

            application.description = table.find(text=re.compile("Description of Proposal")).parent.nextSibling.string.strip()


            # There is a link to comment from the info page, though I can't click it.
            application.comment_url = application.info_url

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 3
0
    def getResultsByDayMonthYear(self, day, month, year):
        response = urllib2.urlopen(self.base_url)
        soup = BeautifulSoup.BeautifulSoup(response.read())

        trs = soup.find("table", {"class": "planningtable"}).tbody.findAll("tr")

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.council_reference = tds[0].a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a["href"])
            application.address = " ".join(tds[1].contents[1].strip().split()[1:])
            application.postcode = getPostcodeFromText(application.address)

            # We're going to need to download the info page in order to get
            # the comment link, the date received, and the description.

            info_response = urllib2.urlopen(application.info_url)
            info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

            application.description = info_soup.find(text="Proposal").findNext(text=True).strip()
            date_received_str = info_soup.find(text="Date Application Valid").findNext(text=True).split(",")[1].strip()

            # This is a nasty botch, but the easiest way I can see to get a date out of this is to make another string and use strptime
            better_date_str = "%s %s %s" % date_received_re.match(date_received_str).groups()
            application.date_received = datetime.datetime.strptime(better_date_str, "%d %B %Y").date()
            application.comment_url = info_soup.find("a", title="Comment on this planning application.")["href"]

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 4
0
    def getResultsByDayMonthYear(self, day, month, year):
        # Now get the search page
        response = urllib2.urlopen(self.base_url)

        soup = BeautifulSoup(response.read())

        # Each application contains the nav string "Application: "
        nav_strings = soup.findAll(text="Application: ")

        for nav_string in nav_strings:
            application = PlanningApplication()

            application.council_reference = nav_string.findPrevious("tr").findAll("td", limit=2)[1].string.strip()

            application.address = nav_string.findNext(text=location_re).split(":")[1].strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = nav_string.findNext(text="Proposal: ").findNext("td").string.strip()

            application.comment_url = urlparse.urljoin(self.base_url, nav_string.findNext(text="Proposal: ").findNext("a")['href'])

            application.date_received = datetime.datetime.strptime(nav_string.findNext(text=date_received_re).split(": ")[1], date_format).date()

            # FIXME: There is no appropriate info_url for the Harrow apps. 
            # I'll put the base url for the moment, but as that is
            # a list of apps from the last 7 days that will quickly be out of date.

            application.info_url = self.base_url
            
            self._results.addApplication(application)

        return self._results
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Each app is stored in a table on it's own. 
        # These tables don't have any nice distinguishing features,
        # but they do all contain a NavigableString "Application",
        # and nothing else in the page does.
        nav_strings = soup.findAll(text="Application")
        
        for nav_string in nav_strings:
            results_table = nav_string.findPrevious("table")

            application = PlanningApplication()
            application.date_received = search_day

            application.council_reference = results_table.a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, results_table.a['href'])
            application.address = results_table.findAll("td")[7].a.string.strip()

            application.postcode = getPostcodeFromText(application.address)
            application.description = results_table.findAll("td")[-1].contents[0].strip()

            # A few applications have comment urls, but most don't.
            # When they do, they have a case officer - I don't think we can
            # work out the other urls - even if they exist.
            # Best to use the email address.
            application.comment_url = self.comments_email_address

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 6
0
    def getResultsByDayMonthYear(self, day, month, year):
        # Now get the search page
        response = urllib2.urlopen(self.base_url)
        soup = BeautifulSoup(response.read())

        trs = soup.table.table.findAll("tr", {"class": re.compile("(?:odd)|(?:even)")})


        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.council_reference = tds[0].a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
            application.address = tds[2].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = tds[3].string.strip()

            # Fetch the info url in order to get the date received and the comment url

            info_response = urllib2.urlopen(application.info_url)

            info_soup = BeautifulSoup(info_response.read())

            application.date_received = datetime.datetime.strptime(info_soup.find(text=re.compile("\s*Received:\s*")).findNext("td").string.strip(), date_format).date()

            application.comment_url = urlparse.urljoin(self.base_url, info_soup.find("input", value="Comment on this application").parent['action'])


            self._results.addApplication(application)

        return self._results
Ejemplo n.º 7
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # What we actually need is the monday before the date searched for:
        monday_before = search_day - datetime.timedelta(search_day.weekday())

        # Now get the search page
        response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
        soup = BeautifulSoup(response.read())

        result_tables = soup.findAll("table", align="Center", cellpadding="3")

        for table in result_tables:
            application = PlanningApplication()

            # We can set the date received and the comment url straight away.
            application.comment_url = self.comments_email_address

            trs = table.findAll("tr")

            application.council_reference = trs[0].a.string.strip()
            relative_info_url = trs[0].a['href']

            application.info_url = urlparse.urljoin(self.base_url, relative_info_url)

            application.date_received = monday_before

            application.address = trs[1].findAll("td")[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = trs[2].findAll("td")[1].string.strip()

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 8
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        get_response = urllib2.urlopen(self.base_url)
        
        get_soup = BeautifulSoup(get_response.read())

        post_data = (
            ("__VIEWSTATE", get_soup.find("input", id="__VIEWSTATE")["value"]),
#            ("QuickSearchApplicationNumber$TextBox_ApplicationNumber", ""),
#            ("QuickSearchThisWeek$DropDownList_PastWeek", ""),
#            ("DetailedSearch$TextBox_PropertyNameNumber", ""),
#            ("DetailedSearch$Textbox_StreetName", ""),
#            ("DetailedSearch$Textbox_TownVillage", ""),
#            ("DetailedSearch$Textbox_Postcode", ""),
#            ("DetailedSearch$Textbox_Parish", ""),
#            ("DetailedSearch$Textbox_ApplicantSurname", ""),
#            ("DetailedSearch$TextBox_AgentName", ""),
            ("DetailedSearch$TextBox_DateRaisedFrom", search_date.strftime(date_format)),
            ("DetailedSearch$TextBox_DateRaisedTo", search_date.strftime(date_format)),
#            ("DetailedSearch$TextBox_DecisionFrom", "dd%2Fmm%2Fyyyy"),
#            ("DetailedSearch$TextBox_DecisionTo", "dd%2Fmm%2Fyyyy"),
            ("DetailedSearch$Button_DetailedSearch", "Search"),
            ("__EVENTVALIDATION", get_soup.find("input", id="__EVENTVALIDATION")["value"]),
            )

        # The response to the GET is a redirect. We'll need to post to the new url.
        post_response = urllib2.urlopen(get_response.url, urllib.urlencode(post_data))
        post_soup = BeautifulSoup(post_response.read())

        if not post_soup.find(text = re.compile("No matching record")):
            # The first row contains headers.
            trs = post_soup.find("table", {"class": "searchresults"}).findAll("tr")[1:]

            for tr in trs:
                application = PlanningApplication()

                # We can fill the date received in straight away from the date we searched for.
                application.date_received = search_date

                tds = tr.findAll("td")

                application.council_reference = tds[0].font.string.strip()
                application.address = tds[2].font.string.strip()
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[3].font.string.strip()

                # Set the info url and the comment url to be the same - can't get to the comment
                # one directly without javascript.
                application.info_url = self.info_url %(application.council_reference)
                application.comment_url = application.info_url

                self._results.addApplication(application)

        return self._results
Ejemplo n.º 9
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        monday_before = search_day - datetime.timedelta(search_day.weekday())

        thursday = monday_before + datetime.timedelta(3)
        if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday
            # We need to add a week
            thursday = thursday + datetime.timedelta(7)

        this_url = self.base_url %(thursday.strftime(search_date_format))
        # Now get the search page
        response = urllib2.urlopen(this_url)
        soup = BeautifulSoup(response.read())

        # Each app is stored in a table of its own. The tables don't have
        # any useful attributes, so we'll find all the NavigableString objects
        # which look like " Application Number:" and then look at the 
        #tables they are in.

        nav_strings = soup.findAll(text=" Application Number:")

        for nav_string in nav_strings:
            application = PlanningApplication()

            application.council_reference = nav_string.findNext("p").string.strip()

            result_table = nav_string.findPrevious("table")

            application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format)

            application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip()
            application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip()

            application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip()
            application.address = result_table.find(text=" Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.info_url = this_url

            application.comment_url = self.comments_email_address

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 10
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)
        search_date_string = search_date.strftime(date_format)

        "appstat=&decision=&appdec=&ward=&parish=&dadfrom=&dadto=&davfrom=01%2F06%2F2008&davto=02%2F06%2F2008&searchbut=Search"
        search_data = urllib.urlencode(
            [("searchtype", "1"),
             ("appstat", ""),
             ("decision", ""),
             ("appdec", ""),
             ("ward", ""),
             ("parish", ""),
             ("dadfrom", ""),
             ("dadto", ""),
             ("davfrom", search_date_string),
             ("davto", search_date_string),
             ("searchbut", "Search"),
                ]
            )

        split_search_url = self._split_base_url[:3] + (search_data, '')
        search_url = urlparse.urlunsplit(split_search_url)

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find(text="Application No").parent.parent.parent
        trs = results_table.findAll("tr")[1:]

        tr_counter = 0
        
        while tr_counter < len(trs):
            tr = trs[tr_counter]

            if tr_counter % 2 == 0:
                application = PlanningApplication()
                application.date_received = search_date
                application.comment_url = self.comment_email_address

                tds = tr.findAll("td")

                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
                application.council_reference = tr.a.string.strip()

                application.address = tds[1].string.strip()
                application.postcode = getPostcodeFromText(application.address)

                application.description = tds[2].string.strip()

                self._results.addApplication(application)

            tr_counter += 1

        return self._results
Ejemplo n.º 11
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        search_data = urllib.urlencode(
            [("fuseaction", "planapps.list"),
             ("SUBJECTID", "104CC166-3ED1-4D22-B9F1E2FB8438478A"),
             ("src_fromdayRec", day),
             ("src_frommonthRec", month),
             ("src_fromyearRec", year),
             ("src_todayRec", day),
             ("src_tomonthRec", month),
             ("src_toyearRec", year),
             ("findroadworks", "GO"),
             ]
            )
        
        search_url = self.base_url + "?" + search_data

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find("table", id="results")

        # Apart from the first tr, which contains headers, the trs come in pairs for each application

        trs = results_table.findAll("tr")[1:]

        tr_count = 0
        while tr_count < len(trs):
            tr = trs[tr_count]

            if tr_count % 2 == 0:
                application = PlanningApplication()
                application.date_received = search_date
                
                tds = tr.findAll("td")

                application.council_reference = tds[0].a.string.strip()
                application.comment_url = self.comment_url %(application.council_reference)

                application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
                application.address = ', '.join([x.strip() for x in tds[1].findAll(text=True)])
                application.postcode = getPostcodeFromText(application.address)
            else:
                # Get rid of the "Details: " at the beginning.
                application.description = tr.td.string.strip()[9:]

                self._results.addApplication(application)

            tr_count += 1

        return self._results
Ejemplo n.º 12
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        get_request = urllib2.Request(self.base_url)
        get_response = urllib2.urlopen(get_request)
        cookie_jar.extract_cookies(get_response, get_request)
        
        get_soup = BeautifulSoup(get_response.read())

        post_data = (
            ("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]),
            ("pgid", get_soup.find("input", {"name": "pgid"})["value"]),
            ("action", "Search"),
#            ("ApplicationSearch21%3AtbDevAddress", ""),
#            ("ApplicationSearch21%3AtbApplicantName", ""),
#            ("ApplicationSearch21%3AtbAgentName", ""),
            ("ApplicationSearch21:tbDateSubmitted", search_date.strftime(search_date_format)),
            ("ApplicationSearch21:btnDateSubmitted", "Search"),
#            ("ApplicationSearch21%3AtbDateDetermined", ""),
            )

        
        post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data))
        cookie_jar.add_cookie_header(post_request)
        post_response = cookie_handling_opener.open(post_request)

        post_soup = BeautifulSoup(post_response.read())

        # Discard the first <tr>, which contains headers
        trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()
            
            tds = tr.findAll("td")

            application.council_reference = tds[0].string.strip()
            application.address = tds[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = tds[2].string.strip()

            application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6]))
            application.info_url = self.info_url %(application.council_reference)

            # The comment url must be accessed by a POST, so we'll just use the info url for that as well

            application.comment_url = application.info_url

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 13
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url %{"day": day,
                                                   "month": month,
                                                   "year": year,
                                                   })
        soup = BeautifulSoup(response.read())

        trs = soup.findAll("tr", valign="middle")

        count = 0
        for tr in trs:
            # The odd trs are just spacers
            if count % 2 == 0:
                application = PlanningApplication()

                tds = tr.findAll("td")
                
                application.date_received = search_day
                application.council_reference = tds[1].a.string
                application.address = tds[3].a.string
                application.postcode = getPostcodeFromText(application.address)
                
                # All the links in this <tr> go to the same place...
                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])

                # Still looking for description and comment url
                
                # For the description, we'll need the info page
                info_soup = BeautifulSoup(urllib2.urlopen(application.info_url).read())

                application.description = info_soup.find(text="Description").findNext("td").findNext("td").font.string

                # While we're here, lets get the OSGB grid ref
                application.osgb_x, application.osgb_y = info_soup.find(text="Grid Reference").findNext("td").font.string.split("-")

                # We'll have to use an email address for comments
                application.comment_url = self.comments_email_address

                self._results.addApplication(application)

            count += 1

        return self._results
Ejemplo n.º 14
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)
        date_string = search_date.strftime(date_format)
        
        search_data = urllib.urlencode({"reference": "",
                                        "undecided": "yes",
                                        "dateFrom": date_string,
                                        "dateTo": date_string,
                                        "Address": "",
                                        "validate": "true",
                                        })


        request = urllib2.Request(self.base_url, search_data)
        response = urllib2.urlopen(request)

        html =  response.read()

        soup = BeautifulSoup(html)

        tables = soup.findAll("table", {"style": "width:auto;"})

        if not tables:
            return self._results

        # We don't want the first or last tr
        trs = tables[0].findAll("tr")[1:-1]

        for tr in trs:
            app = PlanningApplication()

            tds = tr.findAll("td")

            if len(tds) == 4:
                local_info_url = tds[0].a['href']
                app.info_url = urlparse.urljoin(self.base_url, local_info_url)
                app.council_reference = tds[0].a.string

                app.address = tds[1].string
                app.postcode = getPostcodeFromText(app.address)

                app.description = tds[2].string

                app.comment_url = urlparse.urljoin(self.base_url, comment_url_end %app.council_reference)
                app.date_received = search_date

                self._results.addApplication(app)

        return self._results
Ejemplo n.º 15
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        pagenum = 1

        while pagenum:
            response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format), 
                                                       "pagenum": pagenum}
                                       )
            soup = BeautifulSoup.BeautifulSoup(response.read())

            # This is not a nice way to find the results table, but I can't 
            # see anything good to use, and it works...

            # There are two trs with style attributes per app. This will find all the first ones of the pairs.
            trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2]

            for tr in trs:
                tds = tr.findAll("td")
                date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date()

                # Stop looking through the list if we have found one which is earlier than the date searched for.
                if date_received < search_date:
                    # If we break out, then we won't want the next page
                    pagenum = None
                    break

                application = PlanningApplication()
                application.date_received = date_received

                application.council_reference = tds[0].small.string.strip()

                # The second <td> contains the address, split up with <br/>s
                application.address = ' '.join([x for x in tds[1].contents if isinstance(x, BeautifulSoup.NavigableString)])
                application.postcode = getPostcodeFromText(application.address)

                application.description = tds[2].string.strip()

                application.info_url = urlparse.urljoin(self.base_url, tr.findNext("a")['href'])
                application.comment_url = self.comments_email_address

                self._results.addApplication(application)
            else:
                # If we got through the whole list without breaking out,
                # then we'll want to get the next page.
                pagenum += 1

        return self._results
Ejemplo n.º 16
0
    def getResultsByDayMonthYear(self):
        # Note that we don't take the day, month and year parameters here.

        # First get the search page
        request = urllib2.Request(self.base_url)
        response = urllib2.urlopen(request)

        soup = BeautifulSoup(response.read())

        trs = soup.findAll("tr", {"class": "dbResults"})

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.council_reference = tds[0].a.contents[0].strip()
            application.address = tds[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = tds[2].string.strip()
            application.info_url= urlparse.urljoin(self.base_url, tds[0].a['href'])

# These bits have been commented out for performance reasons. We can't afford to go to every application's details page ten times a day while it is open. Instead, we'll just set the date_received to be the scrape date. The comment url can be got by using the id in the info url
            application.date_received = datetime.datetime.today()
            
            relative_comment_url_template = "PlanAppComment.aspx?appId=%d"

            # Get the appId from the info_url

            app_id = int(cgi.parse_qs(urlparse.urlsplit(application.info_url)[3])['frmId'][0])

            application.comment_url = urlparse.urljoin(self.base_url, relative_comment_url_template %(app_id))


#             # I'm afraid we're going to have to get each info url...
#             this_app_response = urllib2.urlopen(application.info_url)
#             this_app_soup = BeautifulSoup(this_app_response.read())

#             # If there is no received date, for some reason. We'll use the publicicty date instead.
#             date_string = (this_app_soup.find("span", id="lblTrackRecievedDate") or this_app_soup.find("span", id="lblPubDate")).string
#             application.date_received = datetime.datetime(*(time.strptime(date_string, date_format)[0:6]))

#             application.comment_url = urlparse.urljoin(self.base_url, this_app_soup.find("a", id="lnkMakeComment")['href'])

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 17
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url % {"date": search_day.strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Results are shown in a table each. The tables don't have any nice
        # attributes, but they do all contain a NavString "Application",
        # and nothing else does...
        nav_strings = soup.findAll(text="Application")

        for nav_string in nav_strings:
            result_table = nav_string.findPrevious("table")

            application = PlanningApplication()
            application.date_received = search_day

            links = result_table.findAll("a")

            # We can get OSGB coordinates from the link to streetmap
            map_qs_dict = cgi.parse_qs(urlparse.urlsplit(links[0]["href"])[3])

            application.osgb_x = map_qs_dict.get("x")[0]
            application.osgb_y = map_qs_dict.get("y")[0]

            application.council_reference = links[1].string.strip()
            application.info_url = urlparse.urljoin(self.base_url, links[1]["href"])
            application.comment_url = urlparse.urljoin(self.base_url, links[2]["href"])

            application.address = " ".join(links[0].previous.strip().split())
            application.postcode = getPostcodeFromText(application.address)

            application.description = links[2].previous.strip()

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 18
0
    def getResultsByDayMonthYear(self, day, month, year):
        response = urllib2.urlopen(self.base_url)
        soup = BeautifulSoup(response.read())
        
        for details_input in soup.find("table", summary="Planning Applications Received in the last 7 days").findAll("input", alt="Planning Details"):
            application = PlanningApplication()

            first_tr = details_input.findPrevious("tr")

            other_trs = first_tr.findNextSiblings("tr", limit=8)

            application.council_reference = first_tr.find("input", {"name": "refval"})['value']
            application.address = other_trs[0].findAll("td")[1].string.strip()
            application.description = other_trs[1].findAll("td")[1].string.strip()
            application.date_received = datetime.datetime.strptime(other_trs[3].findAll("td")[1].string.strip(), date_format).date()

            # Both the info page and the comment page can only be got to
            # by a POST. The best we can do is give the url of the search page
            application.info_url = application.comment_url = self.search_url

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 19
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # What we actually need is the monday before the date searched for:
        monday_before = search_day - datetime.timedelta(search_day.weekday())

        # Now get the search page
        response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
        soup = BeautifulSoup(response.read())

        result_tables = soup.findAll("table", width="98%", cellpadding="2")

        for table in result_tables:
            application = PlanningApplication()

            trs = table.findAll("tr")
	    application.council_reference = trs[0].strong.string.strip()
            relative_info_url = trs[0].a['href']
            application.info_url = urlparse.urljoin(self.base_url, relative_info_url)

            application.address = trs[1].findAll("td")[1].string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.description = trs[2].findAll("td")[1].string.strip()

	    #There's probably a prettier way to get the date, but with Python, it's easier for me to reinvent the wheel than to find an existing wheel!
	    raw_date_recv = trs[3].findAll("td")[3].string.strip().split("/")
	    #Check whether the application is on the target day. If not, discard it and move on.
	    if int(raw_date_recv[0]) != day:
	      continue
	    application.date_received = datetime.date(int(raw_date_recv[2]), int(raw_date_recv[1]), int(raw_date_recv[0]))

            try:
                relative_comment_url = trs[5].findAll("td")[1].a['href']
                application.comment_url = urlparse.urljoin(self.base_url, relative_comment_url)
            except:
                application.comment_url = "No Comment URL."

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 20
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        next_page_url = self.base_url %{"date": search_date.strftime(date_format)}

        while next_page_url:
            try:
                response = urllib2.urlopen(next_page_url)
            except urllib2.HTTPError:
                # This is what seems to happen if there are no apps
                break

            soup = BeautifulSoup(response.read())

            next = soup.find(text="Next")
            if next:
                next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
            else:
                next_page_url = None

            # There is an <h3> for each app that we can use 
            for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
                application = PlanningApplication()

                application.date_received = search_date
                application.council_reference = h3.string.split(": ")[1]
                application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()

                application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
                application.postcode = getPostcodeFromText(application.address)

                application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])

                application.info_url = self.info_url %(urllib.quote(application.council_reference))

                application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()

                self._results.addApplication(application)

        return self._results
Ejemplo n.º 21
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        response = urllib2.urlopen(self.base_url %(search_day.strftime(search_date_format)))
        soup = BeautifulSoup(response.read())

        # The first <tr> contains headers
        trs = soup.table.findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.date_received = datetime.datetime.strptime(tds[0].string, received_date_format).date()

            application.info_url = urllib.unquote(urllib.quote_plus(urlparse.urljoin(self.base_url, tds[1].a['href'])))
            application.council_reference = tds[1].a.string.strip()
            application.address = tds[2].a.string.strip()
            application.postcode = getPostcodeFromText(application.address)

            # Now fetch the info url

            info_response = urllib.urlopen(application.info_url)
            info_soup = BeautifulSoup(info_response.read())

            application.description = info_soup.find(text="Proposal:").findNext("td").string.strip()

            try:
                application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text="Comment").parent['href'])
            except:
                application.comment_url = "No Comments"

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 22
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        post_data = urllib.urlencode((
                ("type", "app"),
                ("time", "0"),
                ))
                                     
        # Now get the search page
        response = urllib2.urlopen(self.base_url, post_data)
        soup = BeautifulSoup(response.read())

        caseno_strings = soup.findAll(text="Case No:")

        for caseno_string in caseno_strings:
            application = PlanningApplication()

            application.council_reference = caseno_string.findNext("a").string.strip()
            info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])

            # See above for why we can't use the proper info url.
            application.info_url = self.info_url

            # In order to avoid doing a download to find the comment page, we'll
            # get the system key from this url

            syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]

            application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()

            application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()

#http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
            application.comment_url = self.comment_url_template %(application.council_reference, syskey)

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 23
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        search_data = urllib.urlencode(
            [
                ("parent_directory_id", "200"),
                ("nav", "679"),
                ("id", "13266"),
                ("RecStart", "1"),
                ("RecCount", "100"),
                ("SDate", search_date.strftime(date_format)),
                ("EDate", search_date.strftime(date_format)),
                ]
            )

        search_url = self.base_url + "?" + search_data

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find("table", summary="List of planning applications that match your query")

        for tr in results_table.findAll("tr")[1:]:
            application = PlanningApplication()
            
            application.date_received = search_date
            
            tds = tr.findAll("td")

            application.council_reference = tds[0].a.string.strip()
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
            application.comment_url = application.info_url

            application.address = ' '.join(tds[1].string.strip().split())
            application.postcode = getPostcodeFromText(application.address)

            application.description = tds[2].string.strip()

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 24
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        contents = self.get_contents(day, month, year)

        soup = BeautifulSoup.BeautifulSoup(contents)

        results_table = soup.find("table", {"class": "cResultsForm"})

        # First, we work out what column each thing of interest is in from the headings
        headings = [x.string for x in results_table.findAll("th")]

        ref_col = index_or_none(headings, "Application Ref.") or \
            index_or_none(headings, "Case Number") or \
            index_or_none(headings, "Application Number")

        address_col = headings.index("Address")
        description_col = headings.index("Proposal")

        comments_url = urlparse.urljoin(self.base_url, self.comments_url_end)


        for tr in results_table.findAll("tr")[1:]:
            application = PlanningApplication()

            application.date_received = search_date

            tds = tr.findAll(re.compile("t[dh]"))

            application.council_reference = tds[ref_col].string.strip()
            application.address = tds[address_col].string.strip()
            application.description = tds[description_col].string.strip()

            application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])

            # We need the query string from this url to make the comments_url
            query_string = urlparse.urlsplit(application.info_url)[3]

            # This is probably slightly naughty, but I'm just going to add the querystring
            # on to the end manually
            application.comment_url = "%s?%s" %(comments_url, query_string)

            self._results.addApplication(application)

        return self._results
Ejemplo n.º 25
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)
        #- Crawley only allows searches from-to, so:

        next = self.base_url %{
            "dayFrom": day,
            "monthFrom": month,
            "yearFrom": year,
            "dayTo": day,
            "monthTo": month,
            "yearTo": year,
            }
        # Now get the search page
        response = urllib2.urlopen(next)
        soup = BeautifulSoup.BeautifulSoup(response.read())
        
        if soup.table: #- Empty result set has no table
            trs = soup.table.findAll("tr")[1:] # First one is just headers    
            for tr in trs:    
                tds = tr.findAll("td")
                application = PlanningApplication()         
                application.council_reference = tds[0].a.contents[0].strip().replace("&#47;", "/")
                application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])

                info_qs = cgi.parse_qs(urlparse.urlsplit(application.info_url)[3])

                comment_qs = {
                  "pAppNo": application.council_reference,
                  "pAppDocName": info_qs["ssDocName"][0],
                  }
                application.comment_url = self.comment_url_template %comment_qs

                application.address = tds[1].string.strip()
                if tds[2].string: #- if postcode present, append it to the address too
                    application.postcode = tds[2].string.replace("&nbsp;", " ").strip()
                    application.address += ", " + application.postcode
                application.description = tds[3].string.strip()
                application.date_received = datetime.datetime(*(time.strptime(tds[4].string.strip(), date_format)[0:6]))
                self._results.addApplication(application)
        return self._results
Ejemplo n.º 26
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format)})
        soup = BeautifulSoup.BeautifulSoup(response.read())

        if not soup.find(text=re.compile("No Results Found")):
            
            trs = soup.findAll("table", {"class": "dataTable"})[1].findAll("tr")[1:]

            for tr in trs:
                tds = tr.findAll("td")

                application = PlanningApplication()

                # We can fill in the date received without actually looking at the data
                application.date_received = search_date

                application.council_reference = tds[0].a.string.strip()
                application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
                application.address = ', '.join([x for x in tds[1].contents 
                                                 if isinstance(x, BeautifulSoup.NavigableString)])
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[2].string.strip()

                # To get the comment link we need to fetch the info page

                info_response = urllib2.urlopen(application.info_url)
                info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

                base = info_soup.base['href']

                application.comment_url = urlparse.urljoin(base,
                                                           info_soup.find("a", target="Planning Application Consultation Form")['href'])

                self._results.addApplication(application)

        return self._results
Ejemplo n.º 27
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # We want the sunday of the week being searched for.
        # (sunday is at the end of the week).
        friday = search_day - datetime.timedelta(search_day.weekday()) + datetime.timedelta(4)

        # Not using urllib.urlencode as it insists on turning the "+" into "%2B"
        post_data = "WeekEndDate=%d%%2F%d%%2F%d&order=Received+Date&submit=search" %(friday.day, friday.month, friday.year)


        # Now get the search page
        response = urllib2.urlopen(self.base_url, post_data)
        soup = BeautifulSoup(response.read())

        trs = soup.find("table", summary="Planning Application search results table").findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            # Not sure why these are entities. We'll convert them back.
            application.council_reference = tds[0].a.contents[1].strip().replace("&#47;", "/")
            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
            application.comment_url = application.info_url

            application.date_received = datetime.datetime(*(time.strptime(tds[1].string.strip(), date_format)[0:6]))

            application.address = tds[2].string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = tds[3].string.strip()

            self._results.addApplication(application)
        
        return self._results
Ejemplo n.º 28
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

#         post_data = [
#             ("EFNO", ""),
#             ("STName", ""),
#             ("STNUMB", ""),
#             ("ADRSNO", ""),
#             ("WARD", "AllWards"),
#             ("AGT", ""),
#             ("ATCDE", "AllApps"),
#             ("DECDE", "AllDecs"),
#             ("DTErec", search_day.strftime(date_format)),
#             ("DTErecTo", search_day.strftime(date_format)),
#             ("DTEvalid", ""),
#             ("DTEvalidTo", ""),
#             ("APDECDE", "AllAppDecs"),
#             ("submit", "Start+Search"),
#             ]
        post_data = "REFNO=&STName=&STNUMB=&ADRSNO=&WARD=AllWards&AGT=&ATCDE=AllApps&DECDE=AllDecs&DTErec=%(date)s&DTErecTo=%(date)s&DTEvalid=&DTEvalidTo=&APDECDE=AllAppDecs&submit=Start+Search" %{"date": search_day.strftime(date_format)}

        while post_data:
            

            # Now get the search page

#            sys.stderr.write("Fetching: %s\n" %self.base_url)
#            sys.stderr.write("post data: %s\n" %post_data) 
            
            response = urllib2.urlopen(self.base_url, post_data)

#            sys.stderr.write("Got it\n")
            soup = BeautifulSoup(response.read())

#            sys.stderr.write("Created soup\n")

            results_form = soup.find("form", {"name": "currentsearchresultsNext"})

            # Sort out the post_data for the next page, if there is one
            # If there is no next page then there will be no inputs in the form.
            # In this case, post_data will be '', which is false.

#            sys.stderr.write("Found form containing results\n")

            post_data = urllib.urlencode([(x['name'], x['value']) for x in results_form.findAll("input")])

#            sys.stderr.write("Got post data\n")

            # Each result has one link, and they are the only links in the form

            links = results_form.findAll("a")

#            sys.stderr.write("Got list of links\n")

            for link in links:

#                sys.stderr.write("Working on link: %s\n" %link['href'])

                application = PlanningApplication()

                application.date_received = search_day
                application.info_url = urlparse.urljoin(self.base_url, link['href'])
                application.council_reference = link.string.strip()

                application.address = link.findNext("td").string.strip()
                application.postcode = getPostcodeFromText(application.address)

                application.description = link.findNext("tr").findAll("td")[-1].string.strip()

                # To get the comment url, we're going to have to go to each info url :-(

#                sys.stderr.write("Fetching: %s\n" %application.info_url)
                info_response = urllib2.urlopen(application.info_url)
#                sys.stderr.write("Got it\n")

                info_soup = BeautifulSoup(info_response)

                comment_nav_string = info_soup.find(text="Comment on this case")
                if comment_nav_string:
                    application.comment_url = comment_nav_string.parent['href']
                else:
                    application.comment_url = "No Comments"

    #http://publicaccess.westminster.gov.uk/publicaccess/tdc/dcapplication/application_comments_entryform.aspx?caseno=K586GHRP03500

                self._results.addApplication(application)

#                sys.stderr.write("Finished that link\n")


#        sys.stderr.write("Finished while loop, returning stuff.\n")

        return self._results
Ejemplo n.º 29
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        get_response = urllib2.urlopen(self.base_url)
        get_soup = BeautifulSoup(get_response.read())

        # These are the inputs with a default value
        inputs_needed = [(x['id'], x['value']) for x in get_soup.form.findAll("input", value=True, type=lambda x: x != "submit")]

        # Add the submit button
        inputs_needed.append(('cmdWeeklyList', 'Search Database'))

        # We also need to add the date we want to search for.
        # This is the friday after the date searched for.
        # At weekends this will get you the friday before, but that isn't
        # a problem as there are no apps then.
        friday = search_day + datetime.timedelta(4 - search_day.weekday())

        inputs_needed.append(("ddlWeeklyList", friday.strftime(date_format)))

        # We'd like as many results as we can get away with on one page.
        # 50 is the largest option offerend
        inputs_needed.append(("ddlResultsPerPageWeeklyList", "50"))

        post_data = dict(inputs_needed)
        post_url = get_response.url

        # In case something goes wrong here, let's break out of the loop after at most 10 passes
        passes = 0

        while True:
            passes += 1

            post_response = urllib2.urlopen(post_url, urllib.urlencode(post_data))
            post_soup = BeautifulSoup(post_response.read())

            result_tables = post_soup.table.findAll("table")

            for result_table in result_tables:
                application = PlanningApplication()

                application.address = ', '.join(result_table.findPrevious("b").string.strip().split("\r"))
                application.postcode = getPostcodeFromText(application.address)

                trs = result_table.findAll("tr")

                application.council_reference = trs[0].findAll("td")[1].string.strip()
                application.date_received = datetime.datetime.strptime(trs[1].findAll("td")[1].string.strip(), date_format).date()
                application.description = trs[3].findAll("td")[1].string.strip()

                application.info_url = self.info_url %(urllib.quote(application.council_reference))

                # In order to avoid having to do a download for every app,
                # I'm setting the comment url to be the same as the info_url.
                # There is a comment page which can be got to by pressing the button
                application.comment_url = application.info_url

                self._results.addApplication(application)

            # Which page are we on?
            page_no = int(post_soup.find("span", id="lblPageNo").b.string)
            total_pages = int(post_soup.find("span", id="lblTotalPages").b.string)

            if passes > 10 or not page_no < total_pages:
                break

            post_data = [
                ("__EVENTTARGET", "hlbNext"),
                ("__EVENTARGUMENT", ""),
                ("__VIEWSTATE", post_soup.find("input", id="__VIEWSTATE")['value']),
                ("__EVENTVALIDATION", post_soup.find("input", id="__EVENTVALIDATION")['value']),
                 ]

            post_url = urlparse.urljoin(post_response.url, post_soup.find("form")['action'])

        return self._results
Ejemplo n.º 30
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # There's going to be some faffing around here. We need a cookie to say we have agreed to some T&Cs.

        # First get the search page - we'll be redirected somewhere else for not having the cookie

        first_request = urllib2.Request(self.first_url)
        first_response = urllib2.urlopen(first_request)
        cookie_jar.extract_cookies(first_response, first_request)

        first_page_soup = BeautifulSoup.BeautifulSoup(first_response.read())

        first_page_action = urlparse.urljoin(self.first_url, first_page_soup.form['action'])
        
        the_input = first_page_soup.form.input

        second_page_post_data = urllib.urlencode(
            (
                (the_input['name'], the_input['value']),
                )
            )
        
        second_request = urllib2.Request(first_page_action, second_page_post_data)
        cookie_jar.add_cookie_header(second_request)
        second_response = urllib2.urlopen(second_request)
        cookie_jar.extract_cookies(second_response, second_request)

        # Now (finally) get the search page

#ApplicationNumber=&AddressPrefix=&Postcode=&CaseOfficer=&WardMember=&DateReceivedStart=31%2F08%2F2008&DateReceivedEnd=31%2F08%2F2008&DateDecidedStart=&DateDecidedEnd=&Locality=&AgentName=&ApplicantName=&ShowDecided=&DecisionLevel=&Sort1=FullAddressPrefix&Sort2=DateReceived+DESC&Submit=Search

        post_data = urllib.urlencode(
            (
                ("ApplicationNumber", ""),
                ("AddressPrefix", ""),
                ("Postcode", ""),
                ("CaseOfficer", ""),
                ("WardMember", ""),
                ("DateReceivedStart", search_day.strftime(date_format)),
                ("DateReceivedEnd", search_day.strftime(date_format)),
                ("DateDecidedStart", ""),
                ("DateDecidedEnd", ""),
                ("Locality", ""),
                ("AgentName", ""),
                ("ApplicantName", ""),
                ("ShowDecided", ""),
                ("DecisionLevel", ""),
                ("Sort1", "FullAddressPrefix"),
                ("Sort2", "DateReceived DESC"),
                ("Submit", "Search"),
                )
            )

        search_request = urllib2.Request(self.base_url)
        cookie_jar.add_cookie_header(search_request)
        search_response = urllib2.urlopen(search_request, post_data)

        soup = BeautifulSoup.BeautifulSoup(search_response.read())

        app_no_strings = soup.findAll(text="App. No.:")

        for app_no_string in app_no_strings:
            application = PlanningApplication()
            application.date_received = search_day

            application.council_reference = app_no_string.findNext("a").string.strip()
            application.info_url = urlparse.urljoin(self.base_url, app_no_string.findNext("a")['href'])

            application.address = ' '.join([x.strip() for x in app_no_string.findNext(text="Site Address:").findNext("td").contents if type(x) == BeautifulSoup.NavigableString])
            application.postcode = getPostcodeFromText(application.address)

            application.comment_url = urlparse.urljoin(self.base_url, app_no_string.findNext(text="Comment on application").parent['href'])

            application.description = app_no_string.findNext(text="Description:").findNext("td").string.strip()

            self._results.addApplication(application)
        
        return self._results