def parse_location_info(self):
        result = {}

        name = self.get_node_text(self.root, "//div[@class='top_left']/h1")
        print '  ', name
        if name is None:
            raise Exception('%s is not a valid location' % self.id)
        result['name'] = name.strip()

        db.cur.execute("SELECT * from location WHERE id=?", (self.id,))
        row = db.cur.fetchone()

        # parse the line map
        map_id = self.parse_map()
        if map_id:
            result['map_id'] = map_id

        # parse suburbs
        suburbs = self.get_node_text(self.root, "//div[@class='suburbsInner']/p")
        if suburbs is None:
            print "empty suburbs, should not happen"
        result['suburbs'] = ' '.join([re.sub('\s+', ' ', x).strip() for x in suburbs.split('\n') if re.sub('\s*', '', x) != ''])


        # set the parsed mark and update it to database
        result['parsed'] = 'T'
        db.update_table_with_dict('location', 'id', self.id, result)
    def parse_route_info(self):
        result = {}

        title = self.get_node_text(self.root, "//h1[@class='WheelChairAccess']")
        print '  ', title
        if title is None:
            raise Exception('%s is not a valid route' % self.id)
        title = re.sub('\s+', ' ', title).strip()
        result['title'] = title

        db.cur.execute("SELECT * from route WHERE id=?", (self.id,))
        row = db.cur.fetchone()

        if row:
            result.update(self.parse_line_info(row['type'], title))

        # parse the line map
        map_id = self.parse_map()
        if map_id:
            result['map_id'] = map_id

        # parse description
        desc_node = self.get_node(self.root, "//div[@class='routeDescriptionInner']")
        s = ''
        it = desc_node.itertext()
        try:
            s += it + "\n"
        except:
            pass

        result['desc'] = '\n'.join([re.sub('\s+', ' ', x) for x in s.split('\n') if re.sub('\s*', '', x) != ''])

        # set the parsed mark and update it to database
        result['parsed'] = 'T'
        db.update_table_with_dict('route', 'id', self.id, result)
    def parse_stop_info(self):
        result = {}
        table_node = self.get_node(self.root, "//table[@id='stopInfo']")
        td_nodes = table_node.xpath("//td")

        #get title
        title = self.get_node_text(self.root, "//h1[@class='fn org']")
        if title is None:
            raise Exception('not a valid stop, title is empty')

        try:
            result['name'] = title[0:title.rindex('-')].strip()
        except:
            result['name'] = title

        result['address_street'] = self.get_node_text(td_nodes[0], "//span[@class='street-address']")
        result['address_locality'] = self.get_node_text(td_nodes[0], "//span[@class='locality']")
        result['address_postalcode'] = self.get_node_text(td_nodes[0], "//span[@class='postal-code']")

        location_name = self.get_node_text(td_nodes[1], "a")
        location_id = self.get_node(td_nodes[1], "a").get('href').split('/')[-1]
        if (location_id != '' and location_name != ''):
            db.update_table('location', 'id', location_id, name=location_name)
            result['location_id'] = location_id
        else:
            raise Exception('not a valid stop, location is empty,')

        result['tickets'] = ','.join([n.text for n in td_nodes[3].xpath("ul/li")])

        result['waiting_indoor'] = self.get_node_text(td_nodes[20].xpath("dl/dd")[0])
        result['waiting_sheltered'] = self.get_node_text(td_nodes[20].xpath("dl/dd")[1])

        result['bicycle_racks'] = self.get_node_text(td_nodes[22].xpath("dl/dd")[0])
        result['bicycle_lockers'] = self.get_node_text(td_nodes[22].xpath("dl/dd")[1])
        result['bicycle_cage'] = self.get_node_text(td_nodes[22].xpath("dl/dd")[2])

        result['geo_latitude'] = self.get_node_text(self.root, "//span[@class='latitude']")
        result['geo_longitude'] = self.get_node_text(self.root, "//span[@class='longitude']")

        fields_index = {
            'phone_lostproperty':2,
            'phone_feedback':4,
            'staff_available':5,
            'phone_station':6,
            'accessible':7,
            'metcard_ticket_machine':8,
            'myki_machine':9,
            'myki_checks':10,
            'vline_booking':11,
            'seating':12,
            'lighting':13,
            'stairs':14,
            'escalator':15,
            'lifts':16,
            'lockers':17,
            'public_phone':18,
            'public_toilet':19,
            'car_parking':21,
            'taxi_rank':23,
            'tactile_paths':24,
            'hearing_loop':25,
        }

        for k, v in fields_index.items():
            try:
                result[k] = self.get_node_text(td_nodes[v])
            except:
                print "Parse stop info error on %s, order %d" % (k,v)
                raise

        result['parsed'] = 'T'

    #     print "parsed stop info:\n\t", '\n\t'.join(["%s = %s" % (k,v) for k,v in result.items()])
        db.update_table_with_dict('stop', 'id', self.id, result)

        return result