コード例 #1
0
ファイル: places.py プロジェクト: macbre/pyrabot
class POI(object):
    def __init__(self):
        self._logger = logging.getLogger('POI')
        self._session = requests.session()
        self._geo = Geo()

    def _fetch_and_parse(self, url):
        resp = self._session.get(url)

        if resp.status_code != 200:
            raise Exception("HTTP request <%s> returned status code %d", url, resp.status_code)

        return html.fromstring(resp.text.encode("utf8"))

    def get_points(self, category_name, url):
        self._logger.info('Category: {}'.format(category_name))

        tree = self._fetch_and_parse(url)

        if tree.xpath('//div[@class="Paragraph"]//li/p//a'):
            res = self._get_points_from_old_tree(tree)
        elif tree.xpath('//article[@class="object"]'):
            res = self._get_points_from_new_tree(tree)
        else:
            raise Exception('Unknown POI page format: <{}>'.format(url))

        points = []

        self._logger.info('Points: {}'.format(len(res)))

        for name, address in res:
            name = name.text.strip()
            address = address.text.strip()

            street = re.split('[,\(-]', address)[0].strip()

            # brak adresu, miejsce poza Poznaniem
            if address == '' or ('Pozna' not in address and "\n" in address):
                self._logger.info("Skipping! - %s: %s", name, address)
                continue

            self._logger.debug('%s - %s', name, street)

            pos = self._geo.query(street + u', Poznań')

            points.append({
                "name": name,
                "address": street,
                "lat": pos['lat'] if pos is not None else False,
                "lon": pos['lon'] if pos is not None else False,
            })

        return points

    @staticmethod
    def _get_points_from_old_tree(tree):
        """
        @see http://www.poznan.pl/mim/inwestycje/biurowce,poi,4661/ [stary format]
        """
        names = tree.xpath('//div[@class="Paragraph"]//li/p//a')
        addresses = tree.xpath('//div[@class="Paragraph"]//li/p[2]')

        return zip(names, addresses)

    @staticmethod
    def _get_points_from_new_tree(tree):
        """
        @see http://www.poznan.pl/mim/osiedla/muzea-w-poznaniu,poi,202,12/ [nowy format]
        """
        names = tree.xpath('//article[contains(@class, "object")]//h2')
        addresses = tree.xpath('//article[contains(@class, "object")]//p[1]')

        return zip(names, addresses)
コード例 #2
0
class POI(object):
    def __init__(self):
        self._logger = logging.getLogger('POI')
        self._session = requests.session()
        self._geo = Geo()

    def _fetch_and_parse(self, url):
        resp = self._session.get(url)

        if resp.status_code != 200:
            raise Exception("HTTP request <%s> returned status code %d", url,
                            resp.status_code)

        return html.fromstring(resp.text.encode("utf8"))

    def get_points(self, category_name, url):
        self._logger.info('Category: {}'.format(category_name))

        tree = self._fetch_and_parse(url)

        if tree.xpath('//div[@class="Paragraph"]//li/p//a'):
            res = self._get_points_from_old_tree(tree)
        elif tree.xpath('//article[@class="object"]'):
            res = self._get_points_from_new_tree(tree)
        else:
            raise Exception('Unknown POI page format: <{}>'.format(url))

        points = []

        self._logger.info('Points: {}'.format(len(res)))

        for name, address in res:
            name = name.text.strip()
            address = address.text.strip()

            street = re.split('[,\(-]', address)[0].strip()

            # brak adresu, miejsce poza Poznaniem
            if address == '' or ('Pozna' not in address and "\n" in address):
                self._logger.info("Skipping! - %s: %s", name, address)
                continue

            self._logger.debug('%s - %s', name, street)

            pos = self._geo.query(street + u', Poznań')

            points.append({
                "name": name,
                "address": street,
                "lat": pos['lat'] if pos is not None else False,
                "lon": pos['lon'] if pos is not None else False,
            })

        return points

    @staticmethod
    def _get_points_from_old_tree(tree):
        """
        @see http://www.poznan.pl/mim/inwestycje/biurowce,poi,4661/ [stary format]
        """
        names = tree.xpath('//div[@class="Paragraph"]//li/p//a')
        addresses = tree.xpath('//div[@class="Paragraph"]//li/p[2]')

        return zip(names, addresses)

    @staticmethod
    def _get_points_from_new_tree(tree):
        """
        @see http://www.poznan.pl/mim/osiedla/muzea-w-poznaniu,poi,202,12/ [nowy format]
        """
        names = tree.xpath('//article[contains(@class, "object")]//h2')
        addresses = tree.xpath('//article[contains(@class, "object")]//p[1]')

        return zip(names, addresses)