コード例 #1
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        next = self.base_url %{"day": day,
                               "month": month,
                               "year": year,
                               }

        while next:
            
            # Now get the search page
            response = urllib2.urlopen(next)

            soup = BeautifulSoup.BeautifulSoup(response.read())

            trs = soup.table.findAll("tr")[1:] # First one is just headers

            for tr in trs:
                application = PlanningApplication()

                application.date_received = search_day
                application.council_reference = tr.a.string
                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
                tds = tr.findAll("td")

                application.address = ' '.join([x.replace(" ", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
                application.postcode = getPostcodeFromText(application.address)
                application.description = tds[4].string.replace(" ", " ").strip()

                # Get the info page in order to find the comment url
                # we could do this without a download if it wasn't for the
                # sector parameter - I wonder what that is?
                info_response = urllib2.urlopen(application.info_url)
                info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

                comment_navstring = info_soup.find(text=comment_re)
                
                if comment_navstring:
                    application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
                else:
                    application.comment_url = "No Comments"

                # While we're at it, let's get the OSGB
                application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]

                self._results.addApplication(application)
                
            next_element = soup.find(text="next").parent

            if next_element.name == 'a':
                next = urlparse.urljoin(self.base_url, next_element['href'])
            else:
                next = None

        return self._results
コード例 #2
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        # We'll set the start date to be one day earlier in order to catch the first result on every day at some point - see TODO list
        response = urllib2.urlopen(self.base_url %{"end_date": search_date.strftime(date_format),
                                                   "start_date": (search_date - datetime.timedelta(1)).strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Each app is stored in it's own table
        result_tables = soup.findAll("table", border="1")

        # For the moment, we'll have to ignore the first result (see TODO list).
        for table in result_tables[1:]:
            application = PlanningApplication()

            # It's not clear to me why this next one isn't the string of the next sibling. This works though!
            application.council_reference = table.find(text=re.compile("Reference")).parent.findNextSibling().contents[0]

            application.address = table.find(text="Location").parent.findNextSibling().string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.info_url = urlparse.urljoin(self.base_url, table.a['href'])

            # Let's go to the info_page and get the OSGB and the date_received
            info_request = urllib2.Request(application.info_url)

            # We need to add the language header in order to get UK style dates
            info_request.add_header("Accept-Language", "en-gb,en")
            info_response = urllib2.urlopen(info_request)
            info_soup = BeautifulSoup(info_response.read())
            
            grid_reference_td = info_soup.find(text="Grid Reference").findNext("td")
            x_element = grid_reference_td.font
            
            application.osgb_x = x_element.string.strip()
            application.osgb_y = x_element.nextSibling.nextSibling.string.strip()
            
            date_string = info_soup.find(text="Date Valid").findNext("td").string.strip()

            application.date_received = datetime.datetime(*(time.strptime(date_string, date_format)[0:6]))

            application.description = table.find(text=re.compile("Description of Proposal")).parent.nextSibling.string.strip()


            # There is a link to comment from the info page, though I can't click it.
            application.comment_url = application.info_url

            self._results.addApplication(application)

        return self._results
コード例 #3
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url %{"day": day,
                                                   "month": month,
                                                   "year": year,
                                                   })
        soup = BeautifulSoup(response.read())

        trs = soup.findAll("tr", valign="middle")

        count = 0
        for tr in trs:
            # The odd trs are just spacers
            if count % 2 == 0:
                application = PlanningApplication()

                tds = tr.findAll("td")
                
                application.date_received = search_day
                application.council_reference = tds[1].a.string
                application.address = tds[3].a.string
                application.postcode = getPostcodeFromText(application.address)
                
                # All the links in this <tr> go to the same place...
                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])

                # Still looking for description and comment url
                
                # For the description, we'll need the info page
                info_soup = BeautifulSoup(urllib2.urlopen(application.info_url).read())

                application.description = info_soup.find(text="Description").findNext("td").findNext("td").font.string

                # While we're here, lets get the OSGB grid ref
                application.osgb_x, application.osgb_y = info_soup.find(text="Grid Reference").findNext("td").font.string.split("-")

                # We'll have to use an email address for comments
                application.comment_url = self.comments_email_address

                self._results.addApplication(application)

            count += 1

        return self._results
コード例 #4
0
ファイル: Berwick.py プロジェクト: adrianshort/planningalerts
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        monday_before = search_day - datetime.timedelta(search_day.weekday())

        thursday = monday_before + datetime.timedelta(3)
        if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday
            # We need to add a week
            thursday = thursday + datetime.timedelta(7)

        this_url = self.base_url %(thursday.strftime(search_date_format))
        # Now get the search page
        response = urllib2.urlopen(this_url)
        soup = BeautifulSoup(response.read())

        # Each app is stored in a table of its own. The tables don't have
        # any useful attributes, so we'll find all the NavigableString objects
        # which look like " Application Number:" and then look at the 
        #tables they are in.

        nav_strings = soup.findAll(text=" Application Number:")

        for nav_string in nav_strings:
            application = PlanningApplication()

            application.council_reference = nav_string.findNext("p").string.strip()

            result_table = nav_string.findPrevious("table")

            application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format)

            application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip()
            application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip()

            application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip()
            application.address = result_table.find(text=" Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.info_url = this_url

            application.comment_url = self.comments_email_address

            self._results.addApplication(application)

        return self._results
コード例 #5
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        next_page_url = self.base_url %{"date": search_date.strftime(date_format)}

        while next_page_url:
            try:
                response = urllib2.urlopen(next_page_url)
            except urllib2.HTTPError:
                # This is what seems to happen if there are no apps
                break

            soup = BeautifulSoup(response.read())

            next = soup.find(text="Next")
            if next:
                next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
            else:
                next_page_url = None

            # There is an <h3> for each app that we can use 
            for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
                application = PlanningApplication()

                application.date_received = search_date
                application.council_reference = h3.string.split(": ")[1]
                application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()

                application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
                application.postcode = getPostcodeFromText(application.address)

                application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])

                application.info_url = self.info_url %(urllib.quote(application.council_reference))

                application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()

                self._results.addApplication(application)

        return self._results
コード例 #6
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # Now get the search page
        response = urllib2.urlopen(self.base_url % {"date": search_day.strftime(date_format)})
        soup = BeautifulSoup(response.read())

        # Results are shown in a table each. The tables don't have any nice
        # attributes, but they do all contain a NavString "Application",
        # and nothing else does...
        nav_strings = soup.findAll(text="Application")

        for nav_string in nav_strings:
            result_table = nav_string.findPrevious("table")

            application = PlanningApplication()
            application.date_received = search_day

            links = result_table.findAll("a")

            # We can get OSGB coordinates from the link to streetmap
            map_qs_dict = cgi.parse_qs(urlparse.urlsplit(links[0]["href"])[3])

            application.osgb_x = map_qs_dict.get("x")[0]
            application.osgb_y = map_qs_dict.get("y")[0]

            application.council_reference = links[1].string.strip()
            application.info_url = urlparse.urljoin(self.base_url, links[1]["href"])
            application.comment_url = urlparse.urljoin(self.base_url, links[2]["href"])

            application.address = " ".join(links[0].previous.strip().split())
            application.postcode = getPostcodeFromText(application.address)

            application.description = links[2].previous.strip()

            self._results.addApplication(application)

        return self._results
コード例 #7
0
    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)

        post_data = [
            ("CNPA_ref", ""),
            ("application_number", ""),
            ("LA_id", "%"),
            ("applicant_type", "%"),
            ("applicant_name", ""),
            ("development_address", ""),
            ("agent_name", ""),
            ("status", "%"),
            ("startDay", "%02d" %day),
            ("startMonth", "%02d" %month),
            ("startYear", "%d" %year),
            ("endDay", "%02d" %day),
            ("endMonth", "%02d" %month),
            ("endYear", "%d" %year),
            ]

        first_post_data = "CNPA_ref=&application_number=&applicant_name=&development_address=&agent_name=&applicant_type=%%&LA_id=%%&status=%%&startYear=%(year)d&startMonth=%(month)02d&startDay=%(day)02d&endYear=%(year)d&endMonth=%(month)02d&endDay=%(day)02d" %{"day": day, "month": month, "year": year}

        curlobj = pycurl.Curl()
        curlobj.setopt(pycurl.FOLLOWLOCATION, True)
        curlobj.setopt(pycurl.MAXREDIRS, 10)


        # First we do a normal post, this would happen as an AJAX query 
        # from the browser and just returns the number of applications found.
        fakefile = StringIO.StringIO() 

        curlobj.setopt(pycurl.URL, self.first_post_url %(int(time.time()*1000)))
        curlobj.setopt(pycurl.POST, True)
        curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
        curlobj.setopt(pycurl.POSTFIELDS, first_post_data)

        curlobj.perform()

        app_count = int(fakefile.getvalue())
        fakefile.close()

        if app_count:
            # Now we do another multipart form post
            # This gives us something to use as the callback
            fakefile = StringIO.StringIO() 

            curlobj.setopt(pycurl.URL, self.base_url)
            curlobj.setopt(pycurl.HTTPPOST, post_data)
            curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
            curlobj.setopt(pycurl.REFERER, self.referer)
            curlobj.perform()

            soup = BeautifulSoup(fakefile.getvalue())
            # We may as well free up the memory used by fakefile
            fakefile.close()

            for tr in soup.table.findAll("tr")[1:]:
                application = PlanningApplication()
                application.date_received = search_date
                application.comment_url = self.comments_email_address

                tds = tr.findAll("td")

                application.council_reference = tds[1].string.strip()
                application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])

                application.address = tds[2].string.strip()
                application.postcode = getPostcodeFromText(application.address)

                # We're going to need to get the info page in order to get the description
                # We can't pass a unicode string to pycurl, so we'll have to encode it.
                curlobj.setopt(pycurl.URL, application.info_url.encode())
                curlobj.setopt(pycurl.HTTPGET, True)

                # This gives us something to use as the callback
                fakefile = StringIO.StringIO() 
                curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)

                curlobj.perform()
                info_soup = BeautifulSoup(fakefile.getvalue())
                fakefile.close()

                application.description = info_soup.find(text="Development Details").findNext("td").string.strip()
                application.osgb_x = info_soup.find(text="Grid Ref East").findNext("td").string.strip()
                application.osgb_y = info_soup.find(text="Grid Ref North").findNext("td").string.strip()

                self._results.addApplication(application)

        return self._results