Ejemplo n.º 1
0
    def geocode_url(self, url, attempted=None):
        if attempted is None:
            attempted = set()

        util.logger("Fetching %s..." % url)
        page = urlopen(url)
        soup = BeautifulSoup(page)

        rdf_url = self.parse_rdf_link(soup)
        util.logger("Fetching %s..." % rdf_url)
        page = urlopen(rdf_url)

        things, thing = self.parse_rdf(page)
        name = self.get_label(thing)

        attributes = self.get_attributes(thing)
        for attribute, value in attributes:
            latitude, longitude = util.parse_geo(value)
            if None not in (latitude, longitude):
                break

        if None in (latitude, longitude):
            relations = self.get_relations(thing)
            for relation, resource in relations:
                url = things.get(resource, resource)
                if url in tried: # Avoid cyclic relationships.
                    continue
                tried.add(url)
                name, (latitude, longitude) = self.geocode_url(url, tried)
                if None not in (name, latitude, longitude):
                    break

        return (name, (latitude, longitude))
Ejemplo n.º 2
0
    def geocode_url(self, url):
        util.logger("Fetching %s..." % url)
        page = urlopen(url)

        parse = getattr(self, 'parse_' + self.output_format)
        return parse(page)
Ejemplo n.º 3
0
 def geocode_url(self, url, exactly_one=True):
     util.logger("Fetching %s..." % url)
     page = urlopen(url)
     return self.parse_javascript(page, exactly_one)