class POI(object): def __init__(self): self._logger = logging.getLogger('POI') self._session = requests.session() self._geo = Geo() def _fetch_and_parse(self, url): resp = self._session.get(url) if resp.status_code != 200: raise Exception("HTTP request <%s> returned status code %d", url, resp.status_code) return html.fromstring(resp.text.encode("utf8")) def get_points(self, category_name, url): self._logger.info('Category: {}'.format(category_name)) tree = self._fetch_and_parse(url) if tree.xpath('//div[@class="Paragraph"]//li/p//a'): res = self._get_points_from_old_tree(tree) elif tree.xpath('//article[@class="object"]'): res = self._get_points_from_new_tree(tree) else: raise Exception('Unknown POI page format: <{}>'.format(url)) points = [] self._logger.info('Points: {}'.format(len(res))) for name, address in res: name = name.text.strip() address = address.text.strip() street = re.split('[,\(-]', address)[0].strip() # brak adresu, miejsce poza Poznaniem if address == '' or ('Pozna' not in address and "\n" in address): self._logger.info("Skipping! - %s: %s", name, address) continue self._logger.debug('%s - %s', name, street) pos = self._geo.query(street + u', Poznań') points.append({ "name": name, "address": street, "lat": pos['lat'] if pos is not None else False, "lon": pos['lon'] if pos is not None else False, }) return points @staticmethod def _get_points_from_old_tree(tree): """ @see http://www.poznan.pl/mim/inwestycje/biurowce,poi,4661/ [stary format] """ names = tree.xpath('//div[@class="Paragraph"]//li/p//a') addresses = tree.xpath('//div[@class="Paragraph"]//li/p[2]') return zip(names, addresses) @staticmethod def _get_points_from_new_tree(tree): """ @see http://www.poznan.pl/mim/osiedla/muzea-w-poznaniu,poi,202,12/ [nowy format] """ names = tree.xpath('//article[contains(@class, "object")]//h2') addresses = tree.xpath('//article[contains(@class, "object")]//p[1]') return zip(names, addresses)