Example #1
0
    def geocode_url(self, url, attempted=None):
        if attempted is None:
            attempted = set()

        util.logger.debug("Fetching %s..." % url)
        page = urlopen(url)
        soup = BeautifulSoup(page)

        rdf_url = self.parse_rdf_link(soup)
        util.logger.debug("Fetching %s..." % rdf_url)
        page = urlopen(rdf_url)

        things, thing = self.parse_rdf(page)
        name = self.get_label(thing)

        attributes = self.get_attributes(thing)
        for attribute, value in attributes:
            latitude, longitude = util.parse_geo(value)
            if None not in (latitude, longitude):
                break

        if None in (latitude, longitude):
            relations = self.get_relations(thing)
            for relation, resource in relations:
                url = things.get(resource, resource)
                if url in tried: # Avoid cyclic relationships.
                    continue
                tried.add(url)
                name, (latitude, longitude) = self.geocode_url(url, tried)
                if None not in (name, latitude, longitude):
                    break

        return (name, (latitude, longitude))
Example #2
0
    def parse_xhtml(self, page):
        soup = isinstance(page, BeautifulSoup) and page or BeautifulSoup(page)

        meta = soup.head.find('meta', {'name': 'geo.placename'})
        name = meta and meta['content'] or None

        meta = soup.head.find('meta', {'name': 'geo.position'})
        if meta:
            position = meta['content']
            latitude, longitude = util.parse_geo(position)
            if latitude == 0 or longitude == 0:
                latitude = longitude = None
        else:
            latitude = longitude = None

        return (name, (latitude, longitude))