Ejemplo n.º 1
0
    def find(self, document):
        strainer = SoupStrainer(attrs={'class': self.GEO_CLASS})
        if not isinstance(document, BeautifulSoup):
            elements = BeautifulSoup(document, parseOnlyThese=strainer)
        else:
            elements = document.findAll(strainer)

        for element in elements:
            preformatted = element.name == 'pre'
            lat_element = element.find(attrs={'class': self.LATITUDE_CLASS})
            long_element = element.find(attrs={'class': self.LONGITUDE_CLASS})
            latitude = None
            longitude = None
            if lat_element and long_element:
                latitude = self._get_value(lat_element, preformatted)
                longitude = self._get_value(long_element, preformatted)
            elif self.shorthand:
                lat_long = re.split(self.SEP, self._get_value(element), 1)
                if len(lat_long) == 2:
                    latitude, longitude = lat_long
            if latitude and longitude:
                lat_match = FLOAT_RE.match(unescape(latitude))
                long_match = FLOAT_RE.match(unescape(longitude))
                if lat_match and long_match:
                    latitude = float(lat_match.group(1))
                    longitude = float(long_match.group(1))
                    text = unescape(self._get_text(element).strip())
                    name = re.sub('\s+', ' ', text)
                    yield Location(name, (latitude, longitude))
Ejemplo n.º 2
0
 def find(self, document):
     strainer = SoupStrainer(attrs={'class': self.GEO_CLASS})
     if not isinstance(document, BeautifulSoup):
         elements = BeautifulSoup(document, parseOnlyThese=strainer)
     else:
         elements = document.findAll(strainer)
     
     for element in elements:
         preformatted = element.name == 'pre'
         lat_element = element.find(attrs={'class': self.LATITUDE_CLASS})
         long_element = element.find(attrs={'class': self.LONGITUDE_CLASS})
         latitude = None
         longitude = None
         if lat_element and long_element:
             latitude = self._get_value(lat_element, preformatted)
             longitude = self._get_value(long_element, preformatted)
         elif self.shorthand:
             lat_long = re.split(self.SEP, self._get_value(element), 1)
             if len(lat_long) == 2:
                 latitude, longitude = lat_long
         if latitude and longitude:
             lat_match = FLOAT_RE.match(unescape(latitude))
             long_match = FLOAT_RE.match(unescape(longitude))
             if lat_match and long_match:
                 latitude = float(lat_match.group(1))
                 longitude = float(long_match.group(1))
                 text = unescape(self._get_text(element).strip())
                 name = re.sub('\s+', ' ', text)
                 yield Location(name, (latitude, longitude))
Ejemplo n.º 3
0
    def find(self, document):
        strainer = SoupStrainer('meta', attrs={'name': self.META_NAME})
        if not isinstance(document, BeautifulSoup):
            elements = BeautifulSoup(document, parseOnlyThese=strainer)
        else:
            elements = document.findAll(strainer)

        for element in elements:
            lat_long = element.get('content')
            if lat_long or not self.ignore_invalid:
                try:
                    point = Point(unescape(lat_long))
                except (TypeError, ValueError):
                    if not self.ignore_invalid:
                        raise
                else:
                    yield Location(None, point)
Ejemplo n.º 4
0
 def find(self, document):
     strainer = SoupStrainer('meta', attrs={'name': self.META_NAME})
     if not isinstance(document, BeautifulSoup):
         elements = BeautifulSoup(document, parseOnlyThese=strainer)
     else:
         elements = document.findAll(strainer)
     
     for element in elements:
         lat_long = element.get('content')
         if lat_long or not self.ignore_invalid:
             try:
                 point = Point(unescape(lat_long))
             except (TypeError, ValueError):
                 if not self.ignore_invalid:
                     raise
             else:
                 yield Location(None, point)
Ejemplo n.º 5
0
    def find(self, document):
        strainer = SoupStrainer('meta', attrs={'name': self.META_NAME})
        if not isinstance(document, BeautifulSoup):
            elements = BeautifulSoup(document, parseOnlyThese=strainer)
        else:
            elements = document.findAll(strainer)

        attrs = {}
        for element in elements:
            meta_name = element['name']
            attr_name = re.match(self.META_NAME, meta_name).group(1)
            value = element.get('content')
            if attr_name in attrs:
                location = self._get_location(attrs)
                if location is not None:
                    yield location
                attrs.clear()
            attrs[attr_name] = value and unescape(value)

        location = self._get_location(attrs)
        if location is not None:
            yield location
Ejemplo n.º 6
0
 def find(self, document):
     strainer = SoupStrainer('meta', attrs={'name': self.META_NAME})
     if not isinstance(document, BeautifulSoup):
         elements = BeautifulSoup(document, parseOnlyThese=strainer)
     else:
         elements = document.findAll(strainer)
     
     attrs = {}
     for element in elements:
         meta_name = element['name']
         attr_name = re.match(self.META_NAME, meta_name).group(1)
         value = element.get('content')
         if attr_name in attrs:
             location = self._get_location(attrs)
             if location is not None:
                 yield location
             attrs.clear()
         attrs[attr_name] = value and unescape(value)
     
     location = self._get_location(attrs)
     if location is not None:
         yield location