def query_places_at_location(self, location, radius=1, limit=10): query = WIKIDATA_PLACE_QUERY.format(latitude=location[0], longitude=location[1], max_radius=radius, limit=limit) results = self.__query(query) places = [] for result in results: wikidata_id = result.get('a', {}).get('value', '').split('/')[-1] name = result.get('aLabel', {}).get('value', None) # Only process results with a sensible label if not name or len(name) == 0 or name == wikidata_id: continue location = result.get('location', {}).get('value', None) if not location or not location.startswith('Point(') or not location.endswith(')'): continue location = location[6:-1].split(' ') if len(location) != 2: continue try: lat = float(location[1]) lon = float(location[0]) except ValueError: continue distance = result.get('dist', {}).get('value', None) distance = float(distance) if distance else None place = Place(None, name, lat, lon, wikidata_id) places.append((place, distance)) return places
def query_places_by_name(self, name, limit=10): query = WIKIDATA_PLACE_BY_NAME_QUERY.format(name=name.lower(), limit=limit) results = self.__query(query) places = [] for result in results: wikidata_id = result.get('a', {}).get('value', '').split('/')[-1] name = result.get('aLabel', {}).get('value', None) # Only process results with a sensible label if not name or len(name) == 0 or name == wikidata_id: continue location = result.get('location', {}).get('value', None) if not location or not location.startswith('Point(') or not location.endswith(')'): continue location = location[6:-1].split(' ') if len(location) != 2: continue try: lat = float(location[1]) lon = float(location[0]) except ValueError: continue place = Place(None, name, lat, lon, wikidata_id) places.append(place) return places
def obj_parse(fname): name, url, body = __load_obj(fname) tree = lxml.html.fromstring(body) try: lat = float( tree.xpath('//head/meta[@name="mapLat"]/@content')[0].strip()) lon = float( tree.xpath('//head/meta[@name="mapLng"]/@content')[0].strip()) except ValueError: lat = None lon = None except IndexError: lat = None lon = None try: title = tree.xpath('//head/title/text()')[0].strip() except IndexError: title = None wikidata_id = None if lat is not None and lon is not None: wikidata = WikidataSource() time.sleep(1) results = wikidata.query_name((lat, lon), radius=1, limit=30) best = __find_best_wikidata(name, results, threshold=0.1) if best is not None: wikidata_id, dist = best print(name, wikidata_id, dist) # Abuse title as place name; may be helpful for fuzzy matching return Place(None, title, lat, lon, wikidata_id)
def patch_place(self, id_: int, place: models.place.Place) -> models.place.Place: p = Place(id=id_, name=place.name, latitude=place.latitude, longitude=place.longitude, wikidata_id=place.wikidata_id) return models.place.Place(id=self._db.update_place(p))
def put_place(self, place: models.place.Place) -> models.place.Place: p = Place(id=None, name=place.name, latitude=place.latitude, longitude=place.longitude, wikidata_id=place.wikidata_id) return models.place.Place(id=self._db.insert_place(p))
def fetch_place(self, id_) -> Optional[Place]: query = ''' SELECT p_id, p_name, p_wikidata, p_lat, p_lon FROM places WHERE p_id = %s ''' with self._db.transaction() as c: c.execute(query, (id_,)) for row in c.fetchall(): p_id, p_name, p_wikidata, p_lat, p_lon = row return Place(id=p_id, name=p_name, latitude=p_lat, longitude=p_lon, wikidata_id=p_wikidata) return None
def fetch_places_by_name(self, name, limit=10): query = ''' SELECT p_id, p_name, p_wikidata, p_lat, p_lon FROM places WHERE p_name LIKE %s LIMIT %s ''' results = [] with self._db.transaction() as c: qname = f'%{name.lower()}%' c.execute(query, ( qname, limit )) for row in c.fetchall(): p_id, p_name, p_wikidata, p_lat, p_lon = row place = Place(p_id, p_name, p_lat, p_lon, p_wikidata) results.append(place) return results
def fetch_places_at_location(self, location, radius=1.0, limit=10): r2ll = radius / 111.0 r2 = r2ll * r2ll query = ''' SELECT p_id, p_name, p_wikidata, p_lat, p_lon, @dist2 := ((p_lat - %s) * (p_lat - %s) + (p_lon - %s) * (p_lon - %s)) FROM places WHERE @dist2 <= %s ORDER BY @dist2 DESC LIMIT %s ''' results = [] with self._db.transaction() as c: c.execute(query, ( location[0], location[0], location[1], location[1], r2, limit )) for row in c.fetchall(): p_id, p_name, p_wikidata, p_lat, p_lon, dist2 = row dist = math.sqrt(dist2) place = Place(p_id, p_name, p_lat, p_lon, p_wikidata) results.append((place, dist)) return results
wikipedia_url = WIKIPEDIA_ARTICLE_API.format(title=page) response = self.__query_wikipedia(wikipedia_url) for article in response: article_url = f'https://de.wikipedia.org/wiki/{urllib.parse.quote(article["title"])}' doc = Document(id=None, title=article['title'], author=None, year=None, text=article['extract'], source=article_url) documents.append(doc) return documents def query_images_for_place(self, place): if not place.wikidata_id: return [] query = WIKIDATA_IMAGE_QUERY.format(wikidata_id=place.wikidata_id) results = self.__query(query) images = [] for result in results: url = result.get('img', {}).get('value', None) images.append(url) return images if __name__ == '__main__': s = WikidataSource() print(s.query_documents_for_place(Place(None, None, None, None, 'Q381834'))) print(s.query_images_for_place(Place(None, None, None, None, 'Q381834')))