def geocode_url(self, url, attempted=None): if attempted is None: attempted = set() util.logger("Fetching %s..." % url) page = urlopen(url) soup = BeautifulSoup(page) rdf_url = self.parse_rdf_link(soup) util.logger("Fetching %s..." % rdf_url) page = urlopen(rdf_url) things, thing = self.parse_rdf(page) name = self.get_label(thing) attributes = self.get_attributes(thing) for attribute, value in attributes: latitude, longitude = util.parse_geo(value) if None not in (latitude, longitude): break if None in (latitude, longitude): relations = self.get_relations(thing) for relation, resource in relations: url = things.get(resource, resource) if url in tried: # Avoid cyclic relationships. continue tried.add(url) name, (latitude, longitude) = self.geocode_url(url, tried) if None not in (name, latitude, longitude): break return (name, (latitude, longitude))
def geocode_url(self, url): util.logger("Fetching %s..." % url) page = urlopen(url) parse = getattr(self, 'parse_' + self.output_format) return parse(page)
def geocode_url(self, url, exactly_one=True): util.logger("Fetching %s..." % url) page = urlopen(url) return self.parse_javascript(page, exactly_one)