Ejemplo n.º 1
0
 def __parse_address(self, city, address):
     address = address.replace(u'п/о', u'пос.')
     if city in self.__full_settlement_names:
         if city not in address:
             return split_address_place(u'%s, %s' % (city, address))
         return split_address_place(address)
     return split_address_place(u'г. %s, %s' % (city, address))
Ejemplo n.º 2
0
    def __parse_base_office_exchange(self, item, map_points, point_type, start_names):
        point = Point()
        point.prov = self.uid
        point.type = point_type
        point.name = normalize_text(item('.name').text())
        if not point.name.startswith(start_names):
            return None

        point.address, point.place = split_address_place(item('.addres strong').text())
        sub_item = item('.item_block tr:last')
        point.phones = normalize_phones(sub_item('td:eq(0)').text().split(','))
        mon_thu = u'пн-чт: ' + sub_item('td:eq(2)').text()
        fri = u'пт: ' + sub_item('td:eq(3)').text()
        sat = u'сб: ' + sub_item('td:eq(4)').text()
        sun = u'вс: ' + sub_item('td:eq(5)').text()
        point.time = normalize_time(', '.join([mon_thu, fri, sat, sun]))
        point.check_information = CHECK_OFFICIAL

        for lng, lat, name, address, place in map_points:
            if (point.name in name if point.name and name else True) and\
               (point.address and address and point.address in address) and\
               (point.place in place if point.place and place else True):
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)

        return point
Ejemplo n.º 3
0
    def __parse_terminal(self, item):
        point = Point()
        point.prov = self.uid
        point.type = TYPE_TERMINAL

        city = normalize_text(item('td:eq(0)').text())
        address = normalize_text(item('td:eq(2)').text())
        point.address, point.place = split_address_place(u'г. %s, %s' % (city.title(), address))
        point.place = normalize_text(item('td:eq(1)').text())
        point.time = normalize_time(item('td:eq(3)').text())
        point.check_information = CHECK_OFFICIAL

        for lat, lng, type_id, description in self.__get_coordinates():
            if u'Минск' not in point.address or type_id != '2':
                continue
            for token in description.split():
                if token not in point.address:
                    break
            else:
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)
        return point
Ejemplo n.º 4
0
    def __parse_base_office_exchange(self, item, point_type, name_keywords):
        point = Point()
        point.prov = self.uid
        point.type = point_type
        point.name = normalize_text(item('th:eq(0) a:eq(0)').text())
        if not point.name.startswith(name_keywords):
            return None

        city = normalize_text(item('td:eq(1)').text())
        address = normalize_text(item('td:eq(2)').text())
        point.address, point.place = split_address_place(u'г. %s, %s' % (city, address))
        point.check_information = CHECK_OFFICIAL

        for lat, lng, type_id, description in self.__get_coordinates():
            if u'Минск' not in point.address or type_id != '1':
                continue
            for token in description.split():
                if token not in point.address and token not in point.name:
                    break
            else:
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)
        return point
Ejemplo n.º 5
0
 def __parse_base(self, item, city, point_type):
     point = Point()
     point.prov = self.uid
     point.type = point_type
     point.name = normalize_text(item('.b-map-side>h5').text())
     point.address, point.place = split_address_place(u'г. %s, %s' % (city, item('.b-map-side>p span:eq(0)').text()))
     coordinates = item('.b-map-side>p span:eq(1)').text()
     if coordinates:
         point.lat, point.lng = map(strip, coordinates.split(','))
     text_html = replace_br(item('.b-map-side-more').html(), ';;;')
     time_items = []
     for sub_item in map(normalize_text, PQ(text_html).text().split(';;;')):
         if not sub_item:
             continue
         if sub_item.startswith(u'Телефон:'):
             point.phones = normalize_phones(sub_item[len(u'Телефон:')].split(','))
             continue
         time_items.append(sub_item)
     point.time = normalize_time(', '.join(time_items))
     point.check_information = CHECK_OFFICIAL
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Ejemplo n.º 6
0
 def get_offices(self):
     points = []
     items_tree = ET.fromstring(get_url(self.__offices_xml_url))
     for item in items_tree.iter('item'):
         point = self.__parse_office(item)
         if point:
             points.append(point)
     page = PQ(get_url(self.__regional_offices_page_url))
     point = None
     for item in map(PQ, page('#content_internal span:eq(0)').children()):
         if item[0].tag not in self.__regional_offices_tags:
             continue
         if item[0].tag == 'h2':
             point = Point()
             point.prov = self.uid
             point.type = TYPE_OFFICE
             point.name = trim_spaces_and_commas(normalize_text(item.text()))
             point.check_information = CHECK_OFFICIAL
             continue
         if not point:
             continue
         item_html = replace_br(item.html(), ';;;')
         sub_items = PQ(item_html).text().split(';;;')
         point.address, point.place = split_address_place(sub_items[0])
         for sub_item in map(normalize_text, sub_items[1:]):
             if sub_item.startswith(u'т.ф.:'):
                 point.phone = normalize_phones(sub_item[len(u'т.ф.:'):].split(','))
         warning_not_official_coordinates(point)
         points.append(point)
         point = None
     return points
Ejemplo n.º 7
0
 def __parse_base_office_exchange(self, item):
     point = Point()
     point.prov = self.uid
     point.name = normalize_text(item('td:eq(1)').text())
     point.address, point.place = split_address_place(item('td:eq(2)').text())
     point.time = normalize_time(item('td:eq(3)').text())
     point.phones = normalize_phones(item('td:eq(4)').text().split(','))
     point.check_information = CHECK_OFFICIAL
     return point
Ejemplo n.º 8
0
 def __parse_terminal(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_TERMINAL
     point.address, point.place = split_address_place(item('td:eq(1)').text())
     point.time = normalize_time(item('td:eq(2)').text())
     point.deposit = u'Пополнение карточки наличными' in item('td:eq(3)').text()
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 9
0
 def __parse_atm(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_ATM
     point.address, point.place = split_address_place(item('td:eq(1)').text())
     point.time = normalize_time(item('td:eq(2)').text())
     point.currency = map(strip, item('td:eq(3)').text().split(','))
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 10
0
 def __parse_exchange(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_EXCHANGE
     sub_items = item.text().split(u'—')
     point.name = normalize_text(sub_items[0])
     point.address, point.place = split_address_place(sub_items[1])
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 11
0
 def __parse_terminal(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_TERMINAL
     point.name = normalize_text(item('td:eq(0)').text())
     point.address, point.place = split_address_place(item('td:eq(1)').text())
     point.place = point.name
     point.time = normalize_time(item('td:eq(2)').text())
     point.deposit = normalize_text(item('td:eq(3)').text()).lower() == u'есть'
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 12
0
 def __parse_exchange(self, item, city):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_EXCHANGE
     point.name = normalize_text(item('td:eq(0)').text())
     point.address, point.place = split_address_place(u'г. %s, %s' % (city, item('td:eq(1)').text()))
     if len(item('td')) == 4:
         point.time = normalize_time(item('td:eq(2)').text())
     else:
         point.time = normalize_time(item('td:eq(2)').text().split(u'Операции:')[0])
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 13
0
 def __get_map_points(self, template, id, is_office):
     url = template.format(id)
     if url not in self.__map_points:
         page = PQ(get_url(url).decode('cp1251'))
         map_points = []
         is_place = False
         lat, lng = None, None
         name, address, place = None, None, None
         start_place_token = 'var placemark = new YMaps.Placemark(new YMaps.GeoPoint('
         end_place_token = 'map.addOverlay(placemark);'
         start_description_token = 'placemark.description = "'
         end_description_token = '";'
         for line in map(strip, page('script').text().splitlines()):
             if is_place:
                 if line.startswith(end_place_token):
                     map_points.append((lat, lng, name, address, place,))
                     is_place = False
                     lat, lng = None, None
                     name, address, place = None, None, None
                     continue
                 if line.startswith(start_description_token):
                     description = line[len(start_description_token):-len(end_description_token)]
                     info_page = PQ(description)
                     if is_office:
                         name = normalize_text(info_page('h1').text())
                         address, place = split_address_place(info_page('p:eq(0)').text())
                     else:
                         name = info_page('h1').text().strip().split()[0]
                         address, place = split_address_place(' '.join(info_page('h1').text().strip().split()[1:]))
                         place = normalize_text(info_page('p:eq(0)').text())
                     continue
             elif line.startswith(start_place_token):
                 is_place = True
                 lat, lng = map(strip, line[len(start_place_token):line.find(')', len(start_place_token))].split(','))
                 continue
         self.__map_points[url] = map_points
     return self.__map_points[url]
Ejemplo n.º 14
0
 def __parse_terminal(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_TERMINAL
     point.address, point.place = split_address_place(item('td:eq(2)').text())
     point.place = normalize_text(item('td:eq(1)').text())
     point.currency = map(strip, item('td:eq(4)').text().split(','))
     if point.currency:
         point.deposit = True
     else:
         point.deposit = False
     point.time = normalize_time(item('td:eq(3)').text())
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 15
0
 def __parse_office(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_OFFICE
     point.name = normalize_text(item('td:eq(0)').text())
     address_items = item('td:eq(1)').text().split(u'тел.')
     address = address_items[0]
     point.address, point.place = split_address_place(address)
     if len(address_items) > 1:
         phone = address_items[1]
         phones_items = phone.split(u'доб')
         point.phones = normalize_phones(phones_items[0].split(','))
     point.time = self.__parse_time(item)
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 16
0
 def __parse_office(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_OFFICE
     point.name = normalize_text(item('h1').text())
     point.address, point.place =  split_address_place(item('tr:eq(2) td:eq(1)').text())
     phones = []
     phone_html = replace_br(item('tr:eq(5) td:eq(1)').html(), ';;;')
     if phone_html:
         phones += map(strip, PQ(phone_html).text().split(';;;'))
     phone_html = replace_br(item('tr:eq(6) td:eq(1)').html(), ';;;')
     if phone_html:
         phones += map(strip, PQ(phone_html).text().split(';;;'))
     point.phones = normalize_phones(filter(lambda phone: phone.startswith((u'+', u'тел')), phones))
     point.time = normalize_time(item('tr:eq(8) td:eq(1)').text())
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Ejemplo n.º 17
0
 def __parse_office(self, item, city):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_OFFICE
     point.name = normalize_text(item('th .pointShowMaps span:eq(0)').text())
     address = item('th .pointShowMaps span:eq(1)').text()
     point.address, point.place = split_address_place(u'г. %s, %s' % (city, address))
     time_html = replace_br(item('td:eq(0)').html(), ', ')
     point.time = normalize_time(PQ(time_html).text())
     phones_html = replace_br(item('td:eq(1)').html(), ', ')
     point.phones = normalize_phones(PQ(phones_html).text().split(','))
     point.lat = normalize_text(item('th .item_coords .coord1').text())
     point.lng = normalize_text(item('th .item_coords .coord2').text())
     point.check_information = CHECK_OFFICIAL
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Ejemplo n.º 18
0
    def __parse_base_atm_terminals(self, item, map_points, point_type, start_names):
        point = Point()
        point.prov = self.uid
        point.type = point_type
        if not item('.name').text().split()[0].startswith(start_names):
            return None

        point.address, point.place = split_address_place(' '.join(item('.name').text().strip().split()[1:]))
        point.place = trim_spaces_and_commas(normalize_text(item('.addres strong').text()))
        point.check_information = CHECK_OFFICIAL

        for lat, lng, name, address, place in map_points:
            if (name in start_names) and\
               (point.address and address and point.address in address) and\
               (point.place in place if point.place and place else True):
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)
        return point
Ejemplo n.º 19
0
    def __parse_base_offices_exchanges(self, item, point_type, keywords_names):
        point = Point()
        point.prov = self.uid
        point.type = point_type
        point.name = normalize_text(item('.first').text())
        if not point.name.startswith(keywords_names):
            return None

        city = item('.field-field-city').text()
        if city:
            city = u'г. ' + city
        else:
            city = item('.field-field-index').text()
        address = item('.field-field-adress').text()
        point.address, point.place = split_address_place(u'%s, %s' % (city, address))
        phone = item('.field-field-phone').text()
        if phone:
            point.phones = [normalize_phone(phone[len(u'тел.:')])]
        point.time = normalize_time(item('.field-field-work-time .field-item').text())
        point.check_information = CHECK_OFFICIAL
        warning_not_official_coordinates(point)
        return point
Ejemplo n.º 20
0
 def __parse_atm(self, item, city):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_ATM
     address = item('th .pointShowMaps span').remove().text()
     place = normalize_text(item('th .pointShowMaps').text())
     point.address, point.place = split_address_place(u'г. %s, %s' % (city, address))
     point.place = place
     currency = item('td:eq(0)').text()
     for from_token, to_token in self.__currency_replaces:
         currency = currency.replace(from_token, to_token)
     point.currency = map(strip, currency.split(','))
     time_html = replace_br(item('td:eq(1)').html(), ', ')
     point.time = normalize_time(PQ(time_html).text())
     point.lat = normalize_text(item('th .item_coords .coord1').text())
     point.lng = normalize_text(item('th .item_coords .coord2').text())
     point.check_information = CHECK_OFFICIAL
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Ejemplo n.º 21
0
 def __get_address(self, city_name, short_address):
     return split_address_place(u'г. %s, %s' % (city_name, short_address))
Ejemplo n.º 22
0
 def __parse_address(self, item):
     town = normalize_text(item("td:eq(2)").text())
     short_address = normalize_text(item("td:eq(3)").text())
     return split_address_place(u"г. %s, %s" % (town, short_address))
Ejemplo n.º 23
0
 def get_offices(self):
     points = []
     coordinates = self.__get_coordinates()
     regions_page = PQ(get_url(self.__regions_url).decode('cp1251'))
     for region_item in map(PQ, regions_page('#ctl47_Panel_Viewer .rsf-content-menu a')):
         region_url = self.site + region_item.attr('href')
         offices_page = PQ(get_url(region_url).decode('cp1251'))
         for item in map(PQ, offices_page('#Print_2_ctl39 table')):
             if not normalize_text(item.text()):
                 continue
             is_main = u'Центральный офис' in item('tr:eq(0)').text()
             offices_url = self.site + '/' + item('tr:eq(2) a').attr('href')
             page = PQ(get_url(offices_url).decode('cp1251'))
             if is_main:
                 point = self.__parse_office_main(coordinates)
                 if point:
                     points.append(point)
             else:
                 point = None
                 start_times = False
                 time_items = []
                 for item in map(PQ, page('#Print_2_ctl39 tr:gt(1)')):
                     if len(item('td')) >= 4:
                         if point:
                             point.time = normalize_time(', '.join(time_items))
                             point.lat, point.lng = self.__get_point_coordinate(point.address, coordinates)
                             if point.lat and point.lng:
                                 point.check_coordinates = CHECK_OFFICIAL
                             else:
                                 warning_not_official_coordinates(point)
                             points.append(point)
                             point = None
                             start_times = False
                             time_items = []
                         name = normalize_text(item('td:eq(0)').text())
                         if not name:
                             continue
                         point = Point()
                         point.prov = self.uid
                         point.type = TYPE_OFFICE
                         point.check_information = CHECK_OFFICIAL
                         point.name = u'№%s' % name
                         address_html = item('td:eq(1) p:eq(0)').html()
                         if address_html:
                             address_html = address_html.strip()
                         if not address_html:
                             address_html = item('td:eq(1)').html()
                         image_tag_start = address_html.find('<img')
                         if image_tag_start > 0:
                             address_html = address_html[:image_tag_start]
                         atm_text_start = address_html.find(u'Банкомат')
                         if atm_text_start > 0:
                             address_html = address_html[:atm_text_start]
                         point.address, point.place = split_address_place(PQ(address_html).text().split(';;;')[0])
                         if point.address.endswith(u'А'):
                             point.address = point.address[:-1] + u'а'
                         if point.address.endswith(u'Б'):
                             point.address = point.address[:-1] + u'б'
                         if point.address.endswith(u'-а'):
                             point.address = point.address[:-2] + u'а'
                         for from_token, to_token in self.__address_replaces:
                             point.address = point.address.replace(from_token, to_token)
                         item('td:eq(0), td:eq(1)').remove()
                         next_sub_item = normalize_text(item('td:eq(0)').text()).lower()
                         if not next_sub_item.startswith(self.__start_offices_keywords) and\
                            not next_sub_item.startswith(self.__stop_offices_keywords):
                             start_times = True
                             if start_times and len(item('td')) >= 2:
                                 time_items.append(u'%s: %s' % (item('td:eq(0)').text(), item('td:eq(1)').text()))
                             point.phones = self.__parse_phones(item)
                     if not point:
                         continue
                     item_text = normalize_text(item.text())
                     if  not item_text or item_text.startswith(u'г.'):
                         continue
                     for sub_item in map(PQ, item('td')):
                         if not normalize_text(sub_item.text()):
                             sub_item.remove()
                     next_sub_item = normalize_text(item('td:eq(0)').text()).lower()
                     if start_times and next_sub_item.startswith(self.__stop_offices_keywords):
                         start_times = False
                         continue
                     if not start_times and next_sub_item.startswith(self.__start_offices_keywords):
                         start_times = True
                         point.phones += self.__parse_phones(item)
                         continue
                     if start_times and len(item('td')) >= 2:
                         time_items.append(u'%s: %s' % (item('td:eq(0)').text(), item('td:eq(1)').text()))
                         continue
                 if point:
                     point.time = normalize_time(', '.join(time_items))
                     point.lat, point.lng = self.__get_point_coordinate(point.address, coordinates)
                     if point.lat and point.lng:
                         point.check_coordinates = CHECK_OFFICIAL
                     else:
                         warning_not_official_coordinates(point)
                     points.append(point)
     return points