def query_places_at_location(self, location, radius=1, limit=10):
     query = WIKIDATA_PLACE_QUERY.format(latitude=location[0],
                                         longitude=location[1],
                                         max_radius=radius,
                                         limit=limit)
     results = self.__query(query)
     places = []
     for result in results:
         wikidata_id = result.get('a', {}).get('value', '').split('/')[-1]
         name = result.get('aLabel', {}).get('value', None)
         # Only process results with a sensible label
         if not name or len(name) == 0 or name == wikidata_id:
             continue
         location = result.get('location', {}).get('value', None)
         if not location or not location.startswith('Point(') or not location.endswith(')'):
             continue
         location = location[6:-1].split(' ')
         if len(location) != 2:
             continue
         try:
             lat = float(location[1])
             lon = float(location[0])
         except ValueError:
             continue
         distance = result.get('dist', {}).get('value', None)
         distance = float(distance) if distance else None
         place = Place(None, name, lat, lon, wikidata_id)
         places.append((place, distance))
     return places
 def query_places_by_name(self, name, limit=10):
     query = WIKIDATA_PLACE_BY_NAME_QUERY.format(name=name.lower(),
                                                 limit=limit)
     results = self.__query(query)
     places = []
     for result in results:
         wikidata_id = result.get('a', {}).get('value', '').split('/')[-1]
         name = result.get('aLabel', {}).get('value', None)
         # Only process results with a sensible label
         if not name or len(name) == 0 or name == wikidata_id:
             continue
         location = result.get('location', {}).get('value', None)
         if not location or not location.startswith('Point(') or not location.endswith(')'):
             continue
         location = location[6:-1].split(' ')
         if len(location) != 2:
             continue
         try:
             lat = float(location[1])
             lon = float(location[0])
         except ValueError:
             continue
         place = Place(None, name, lat, lon, wikidata_id)
         places.append(place)
     return places
Example #3
0
def obj_parse(fname):
    name, url, body = __load_obj(fname)
    tree = lxml.html.fromstring(body)
    try:
        lat = float(
            tree.xpath('//head/meta[@name="mapLat"]/@content')[0].strip())
        lon = float(
            tree.xpath('//head/meta[@name="mapLng"]/@content')[0].strip())
    except ValueError:
        lat = None
        lon = None
    except IndexError:
        lat = None
        lon = None
    try:
        title = tree.xpath('//head/title/text()')[0].strip()
    except IndexError:
        title = None

    wikidata_id = None
    if lat is not None and lon is not None:
        wikidata = WikidataSource()
        time.sleep(1)
        results = wikidata.query_name((lat, lon), radius=1, limit=30)
        best = __find_best_wikidata(name, results, threshold=0.1)
        if best is not None:
            wikidata_id, dist = best
            print(name, wikidata_id, dist)

    # Abuse title as place name; may be helpful for fuzzy matching
    return Place(None, title, lat, lon, wikidata_id)
 def patch_place(self, id_: int, place: models.place.Place) -> models.place.Place:
     p = Place(id=id_,
               name=place.name,
               latitude=place.latitude,
               longitude=place.longitude,
               wikidata_id=place.wikidata_id)
     return models.place.Place(id=self._db.update_place(p))
 def put_place(self, place: models.place.Place) -> models.place.Place:
     p = Place(id=None,
               name=place.name,
               latitude=place.latitude,
               longitude=place.longitude,
               wikidata_id=place.wikidata_id)
     return models.place.Place(id=self._db.insert_place(p))
Example #6
0
 def fetch_place(self, id_) -> Optional[Place]:
     query = '''
     SELECT p_id, p_name, p_wikidata, p_lat, p_lon
     FROM places
     WHERE p_id = %s
     '''
     with self._db.transaction() as c:
         c.execute(query, (id_,))
         for row in c.fetchall():
             p_id, p_name, p_wikidata, p_lat, p_lon = row
             return Place(id=p_id, name=p_name, latitude=p_lat, longitude=p_lon, wikidata_id=p_wikidata)
     return None
Example #7
0
 def fetch_places_by_name(self, name, limit=10):
     query = '''
     SELECT p_id, p_name, p_wikidata, p_lat, p_lon
     FROM places
     WHERE p_name LIKE %s
     LIMIT %s
     '''
     results = []
     with self._db.transaction() as c:
         qname = f'%{name.lower()}%'
         c.execute(query, (
             qname,
             limit
         ))
         for row in c.fetchall():
             p_id, p_name, p_wikidata, p_lat, p_lon = row
             place = Place(p_id, p_name, p_lat, p_lon, p_wikidata)
             results.append(place)
     return results
Example #8
0
 def fetch_places_at_location(self, location, radius=1.0, limit=10):
     r2ll = radius / 111.0
     r2 = r2ll * r2ll
     query = '''
     SELECT p_id, p_name, p_wikidata, p_lat, p_lon,
       @dist2 := ((p_lat - %s) * (p_lat - %s) + (p_lon - %s) * (p_lon - %s))
     FROM places
     WHERE @dist2 <= %s
     ORDER BY @dist2 DESC
     LIMIT %s
     '''
     results = []
     with self._db.transaction() as c:
         c.execute(query, (
             location[0], location[0], location[1], location[1],
             r2,
             limit
         ))
         for row in c.fetchall():
             p_id, p_name, p_wikidata, p_lat, p_lon, dist2 = row
             dist = math.sqrt(dist2)
             place = Place(p_id, p_name, p_lat, p_lon, p_wikidata)
             results.append((place, dist))
     return results
            wikipedia_url = WIKIPEDIA_ARTICLE_API.format(title=page)
            response = self.__query_wikipedia(wikipedia_url)
            for article in response:
                article_url = f'https://de.wikipedia.org/wiki/{urllib.parse.quote(article["title"])}'
                doc = Document(id=None,
                               title=article['title'],
                               author=None,
                               year=None,
                               text=article['extract'],
                               source=article_url)
                documents.append(doc)
        return documents

    def query_images_for_place(self, place):
        if not place.wikidata_id:
            return []
        query = WIKIDATA_IMAGE_QUERY.format(wikidata_id=place.wikidata_id)
        results = self.__query(query)
        images = []
        for result in results:
            url = result.get('img', {}).get('value', None)
            images.append(url)
        return images


if __name__ == '__main__':
    s = WikidataSource()
    print(s.query_documents_for_place(Place(None, None, None, None, 'Q381834')))
    print(s.query_images_for_place(Place(None, None, None, None, 'Q381834')))