def __parse_phones(self, item): if len(item('td')) > 1: more_info_html = replace_br(item('td:last').html(), ';;;') more_info_items = map(strip, PQ(more_info_html).text().split(';;;')) if more_info_items and more_info_items[0].startswith('('): return normalize_phones(more_info_items[0].split(',')) return []
def __parse_base(self, item, city, point_type): point = Point() point.prov = self.uid point.type = point_type point.name = normalize_text(item('.b-map-side>h5').text()) point.address, point.place = split_address_place(u'г. %s, %s' % (city, item('.b-map-side>p span:eq(0)').text())) coordinates = item('.b-map-side>p span:eq(1)').text() if coordinates: point.lat, point.lng = map(strip, coordinates.split(',')) text_html = replace_br(item('.b-map-side-more').html(), ';;;') time_items = [] for sub_item in map(normalize_text, PQ(text_html).text().split(';;;')): if not sub_item: continue if sub_item.startswith(u'Телефон:'): point.phones = normalize_phones(sub_item[len(u'Телефон:')].split(',')) continue time_items.append(sub_item) point.time = normalize_time(', '.join(time_items)) point.check_information = CHECK_OFFICIAL if point.lat and point.lng: point.check_coordinates = CHECK_OFFICIAL else: warning_not_official_coordinates(point) return point
def get_offices(self): points = [] items_tree = ET.fromstring(get_url(self.__offices_xml_url)) for item in items_tree.iter('item'): point = self.__parse_office(item) if point: points.append(point) page = PQ(get_url(self.__regional_offices_page_url)) point = None for item in map(PQ, page('#content_internal span:eq(0)').children()): if item[0].tag not in self.__regional_offices_tags: continue if item[0].tag == 'h2': point = Point() point.prov = self.uid point.type = TYPE_OFFICE point.name = trim_spaces_and_commas(normalize_text(item.text())) point.check_information = CHECK_OFFICIAL continue if not point: continue item_html = replace_br(item.html(), ';;;') sub_items = PQ(item_html).text().split(';;;') point.address, point.place = split_address_place(sub_items[0]) for sub_item in map(normalize_text, sub_items[1:]): if sub_item.startswith(u'т.ф.:'): point.phone = normalize_phones(sub_item[len(u'т.ф.:'):].split(',')) warning_not_official_coordinates(point) points.append(point) point = None return points
def __parse_base_office_exchange(self, item, map_points, point_type, start_names): point = Point() point.prov = self.uid point.type = point_type point.name = normalize_text(item('.name').text()) if not point.name.startswith(start_names): return None point.address, point.place = split_address_place(item('.addres strong').text()) sub_item = item('.item_block tr:last') point.phones = normalize_phones(sub_item('td:eq(0)').text().split(',')) mon_thu = u'пн-чт: ' + sub_item('td:eq(2)').text() fri = u'пт: ' + sub_item('td:eq(3)').text() sat = u'сб: ' + sub_item('td:eq(4)').text() sun = u'вс: ' + sub_item('td:eq(5)').text() point.time = normalize_time(', '.join([mon_thu, fri, sat, sun])) point.check_information = CHECK_OFFICIAL for lng, lat, name, address, place in map_points: if (point.name in name if point.name and name else True) and\ (point.address and address and point.address in address) and\ (point.place in place if point.place and place else True): point.lat = lat point.lng = lng point.check_coordinates = CHECK_OFFICIAL break else: warning_not_official_coordinates(point) return point
def __parse_base_office_exchange(self, item): point = Point() point.prov = self.uid point.name = normalize_text(item('td:eq(1)').text()) point.address, point.place = split_address_place(item('td:eq(2)').text()) point.time = normalize_time(item('td:eq(3)').text()) point.phones = normalize_phones(item('td:eq(4)').text().split(',')) point.check_information = CHECK_OFFICIAL return point
def __parse_office(self, item): point = Point() point.prov = self.uid point.type = TYPE_OFFICE point.name = normalize_text(item('td:eq(0)').text()) address_items = item('td:eq(1)').text().split(u'тел.') address = address_items[0] point.address, point.place = split_address_place(address) if len(address_items) > 1: phone = address_items[1] phones_items = phone.split(u'доб') point.phones = normalize_phones(phones_items[0].split(',')) point.time = self.__parse_time(item) point.check_information = CHECK_OFFICIAL warning_not_official_coordinates(point) return point
def __parse_office(self, item): point = Point() point.prov = self.uid point.type = TYPE_OFFICE point.name = normalize_text(item('h1').text()) point.address, point.place = split_address_place(item('tr:eq(2) td:eq(1)').text()) phones = [] phone_html = replace_br(item('tr:eq(5) td:eq(1)').html(), ';;;') if phone_html: phones += map(strip, PQ(phone_html).text().split(';;;')) phone_html = replace_br(item('tr:eq(6) td:eq(1)').html(), ';;;') if phone_html: phones += map(strip, PQ(phone_html).text().split(';;;')) point.phones = normalize_phones(filter(lambda phone: phone.startswith((u'+', u'тел')), phones)) point.time = normalize_time(item('tr:eq(8) td:eq(1)').text()) point.check_information = CHECK_OFFICIAL warning_not_official_coordinates(point) return point
def __parse_office(self, item, city): point = Point() point.prov = self.uid point.type = TYPE_OFFICE point.name = normalize_text(item('th .pointShowMaps span:eq(0)').text()) address = item('th .pointShowMaps span:eq(1)').text() point.address, point.place = split_address_place(u'г. %s, %s' % (city, address)) time_html = replace_br(item('td:eq(0)').html(), ', ') point.time = normalize_time(PQ(time_html).text()) phones_html = replace_br(item('td:eq(1)').html(), ', ') point.phones = normalize_phones(PQ(phones_html).text().split(',')) point.lat = normalize_text(item('th .item_coords .coord1').text()) point.lng = normalize_text(item('th .item_coords .coord2').text()) point.check_information = CHECK_OFFICIAL if point.lat and point.lng: point.check_coordinates = CHECK_OFFICIAL else: warning_not_official_coordinates(point) return point
def __parse_office_exchange(self, item, city, coordinates, point_type, point_keywords): point = Point() point.prov = self.uid point.type = point_type point.name = normalize_text(item('th:eq(0)').text()) if not point.name.startswith(point_keywords): return None address_html = replace_br(item('td:eq(0)').html(), ';;;') address_items = PQ(address_html).text().split(';;;', 1) point.address = normalize_address(u'%s, %s' % (city, address_items[0])) if len(address_items) > 1: point.place = normalize_text(address_items[1]) item('td:eq(1) ul, td:eq(1) li').remove() point.time = normalize_time(item('td:eq(1)').text()) point.phones = normalize_phones(map(lambda phone_item: PQ(phone_item).text(), item('td:eq(2) p') or item('td:eq(2)'))) point.check_information = CHECK_OFFICIAL point.lat, point.lng = self.__get_point_coordinate(point, coordinates) if point.lat and point.lng: point.check_coordinates = CHECK_OFFICIAL else: warning_not_official_coordinates(point) return point
def __parse_exchange(self, item): point = Point() point.prov = self.uid point.type = TYPE_EXCHANGE point.name = normalize_text(item.find('name').text) city = item.find('region').text if item.find('region') else u'Минск' address = item.find('address').text point.address = normalize_address(u'г. %s, %s' % (city.title(), address)) point.place = normalize_text(item.find('location').text) point.lat = item.find('lattitude').text point.lng = item.find('longitude').text point.time = normalize_time(item.find('time').text) if item.find('phones').text: point.phones = normalize_phones(item.find('phones').text.split(',')) point.check_information = CHECK_OFFICIAL if point.lat and point.lng: point.check_coordinates = CHECK_OFFICIAL else: warning_not_official_coordinates(point) return point
def __parse_office(self, item, city_name): point = self.__parse_base(item, city_name, TYPE_OFFICE) point.name = item('td:eq(1) a').text() point.phones = normalize_phones(self.__phone_splitter.split(item('td:eq(1) i').text() or '')) point.time = self.__parse_time(item('td:eq(2)')) return point