def find(self, document): strainer = SoupStrainer(attrs={'class': self.GEO_CLASS}) if not isinstance(document, BeautifulSoup): elements = BeautifulSoup(document, parseOnlyThese=strainer) else: elements = document.findAll(strainer) for element in elements: preformatted = element.name == 'pre' lat_element = element.find(attrs={'class': self.LATITUDE_CLASS}) long_element = element.find(attrs={'class': self.LONGITUDE_CLASS}) latitude = None longitude = None if lat_element and long_element: latitude = self._get_value(lat_element, preformatted) longitude = self._get_value(long_element, preformatted) elif self.shorthand: lat_long = re.split(self.SEP, self._get_value(element), 1) if len(lat_long) == 2: latitude, longitude = lat_long if latitude and longitude: lat_match = FLOAT_RE.match(unescape(latitude)) long_match = FLOAT_RE.match(unescape(longitude)) if lat_match and long_match: latitude = float(lat_match.group(1)) longitude = float(long_match.group(1)) text = unescape(self._get_text(element).strip()) name = re.sub('\s+', ' ', text) yield Location(name, (latitude, longitude))
def find(self, document): strainer = SoupStrainer('meta', attrs={'name': self.META_NAME}) if not isinstance(document, BeautifulSoup): elements = BeautifulSoup(document, parseOnlyThese=strainer) else: elements = document.findAll(strainer) for element in elements: lat_long = element.get('content') if lat_long or not self.ignore_invalid: try: point = Point(unescape(lat_long)) except (TypeError, ValueError): if not self.ignore_invalid: raise else: yield Location(None, point)
def find(self, document): strainer = SoupStrainer('meta', attrs={'name': self.META_NAME}) if not isinstance(document, BeautifulSoup): elements = BeautifulSoup(document, parseOnlyThese=strainer) else: elements = document.findAll(strainer) attrs = {} for element in elements: meta_name = element['name'] attr_name = re.match(self.META_NAME, meta_name).group(1) value = element.get('content') if attr_name in attrs: location = self._get_location(attrs) if location is not None: yield location attrs.clear() attrs[attr_name] = value and unescape(value) location = self._get_location(attrs) if location is not None: yield location