コード例 #1
0
    def parse_stop_route(self):
        stop_id = self.id
        type = self.get_node_text(self.root, "//div[@class='transportType']/h3").lower()

        # get route info in this stop
        route_nodes = self.root.xpath("//div[@class='lineInformationInner']/div/ul/li")

        for node in route_nodes:
            route_name = self.get_node_text(node, 'a')
            route_id = self.get_node(node, 'a').get('href').split('/')[-1]
            db.update_table('route', 'id', route_id, name=route_name, type=type)

            title=link=line_id=None

            for ttn in node.xpath('ul'):
                title = self.get_node_text(ttn, 'li')
                link = self.get_node(ttn, 'li/ul/li/a').get('href')
                groups = self.timetable_url_pattern.findall(link)
                if (len(groups) > 0):
                    line_id = groups[0][0]

            tt_id = utils.get_timetable_id(route_id, stop_id, line_id)
            print "  ", "parsed timetable '%s' %s" % (title, tt_id)
            db.update_table('timetable_index', 'id', tt_id,
                route_id=route_id, stop_id=stop_id, line_id=line_id,
                title=title, link=link)
コード例 #2
0
    def parse_timetable(self):
        tt_node = self.get_node(self.root, "//div[@class='timetablesInner']/ul")
        li_nodes = tt_node.xpath(".//li")
        for node in li_nodes:
            title = self.get_node_text(node, "./a")
            link = self.get_node(node, "./a").get('href')
            if link=="": continue

            line_id = self.timetable_lineid_pattern.findall(link)[0]
            tt_id = utils.get_timetable_id(route_id=self.id, line_id=line_id)
            print "  ", "parsed timetable '%s' %s" % (title, tt_id)

            db.update_table('timetable_index', 'id', tt_id,
                route_id=self.id, line_id=line_id, link=link, title=title)