def __init__(self, a, b): """Initialize a Distance whose length is the distance between the two geodesic points ``a`` and ``b``, using the ``calculate`` method to determine this distance. """ if isinstance(a, basestring): a = util.parse_geo(a) if isinstance(b, basestring): b = util.parse_geo(b) self.a = a self.b = b if a and b: self.calculate()
def geocode_url(self, url, tried=None): if tried is None: tried = set() util.logger.debug("Fetching %s..." % url) page = urlopen(url) soup = BeautifulSoup(page) name, (latitude, longitude) = self.parse_xhtml(soup) if None in (name, latitude, longitude) or self.prefer_semantic: rdf_url = self.parse_rdf_link(soup) util.logger.debug("Fetching %s..." % rdf_url) page = urlopen(rdf_url) things, thing = self.parse_rdf(page) name = self.get_label(thing) attributes = self.get_attributes(thing) for attribute, value in attributes: latitude, longitude = util.parse_geo(value) if None not in (latitude, longitude): break if None in (latitude, longitude): relations = self.get_relations(thing) for relation, resource in relations: url = things.get(resource, resource) if url in tried: # Avoid cyclic relationships. continue tried.add(url) name, (latitude, longitude) = self.geocode_url(url, tried) if None not in (name, latitude, longitude): break return (name, (latitude, longitude))
def geocode_url(self, url, tried=None): if tried is None: tried = set() page = urlopen(url) soup = BeautifulSoup(page) name, (latitude, longitude) = self.parse_xhtml(soup) if None in (name, latitude, longitude) or self.prefer_semantic: rdf_url = self.parse_rdf_link(soup) page = urlopen(rdf_url) things, thing = self.parse_rdf(page) name = self.get_label(thing) attributes = self.get_attributes(thing) for attribute, value in attributes: latitude, longitude = util.parse_geo(value) if None not in (latitude, longitude): break if None in (latitude, longitude): relations = self.get_relations(thing) for relation, resource in relations: url = things.get(resource, resource) if url in tried: # Avoid cyclic relationships. continue tried.add(url) name, (latitude, longitude) = self.geocode_url(url, tried) if None not in (name, latitude, longitude): break return (name, (latitude, longitude))
def parse_xhtml(self, page): soup = isinstance(page, BeautifulSoup) and page or BeautifulSoup(page) meta = soup.head.find('meta', {'name': 'geo.placename'}) name = meta and meta['content'] or None meta = soup.head.find('meta', {'name': 'geo.position'}) if meta: position = meta['content'] latitude, longitude = util.parse_geo(position) if latitude == 0 or longitude == 0: latitude = longitude = None else: latitude = longitude = None return (name, (latitude, longitude))
def parse_xhtml(self, page): soup = isinstance(page, BeautifulSoup) and page or BeautifulSoup(page) meta = soup.head.find("meta", {"name": "geo.placename"}) name = meta and meta["content"] or None meta = soup.head.find("meta", {"name": "geo.position"}) if meta: position = meta["content"] latitude, longitude = util.parse_geo(position) if latitude == 0 or longitude == 0: latitude = longitude = None else: latitude = longitude = None return (name, (latitude, longitude))