Esempio n. 1
0
    def __parse_time(self, item):
        start_times = map(lambda sub_item: normalize_text(PQ(sub_item).text()), item('td:eq(2) p') or item('td:eq(2)'))
        end_times = map(lambda sub_item: normalize_text(PQ(sub_item).text()), item('td:eq(3) p') or item('td:eq(3)'))
        break_times = map(lambda sub_item: normalize_text(PQ(sub_item).text()), item('td:eq(4) p') or item('td:eq(4)'))
        weekends = normalize_text(item('td:eq(5)').text())

        time = u''
        if len(start_times) == 1:
            sub_start_day, sub_start_time = self.__split_day_time(start_times[0])
            sub_break_day, sub_break_time = self.__split_day_time(break_times[0]) if break_times[0] != u'без обеда' else (None, None)
            for end_time in end_times:
                sub_end_day, sub_end_time = self.__split_day_time(end_time)
                if sub_end_day:
                    time += u'%s: %s-%s, ' % (sub_end_day, sub_start_time, sub_end_time)
                else:
                    time += u'%s-%s, ' % (sub_start_time, sub_end_time)
                if sub_break_time:
                    time += u'обед: %s, ' % sub_break_time
        else:
            for start_time in start_times:
                sub_start_day, sub_start_time = self.__split_day_time(start_time)
                for end_time in end_times:
                    sub_end_day, sub_end_time = self.__split_day_time(end_time)
                    if not sub_end_day or sub_start_day == sub_end_day:
                        time += u'%s: %s-%s, ' % (sub_start_day, sub_start_time, sub_end_time)
                        for break_time in break_times:
                            sub_break_day, sub_break_time = self.__split_day_time(break_time) if break_time != u'без обеда' else (None, None)
                            if sub_break_time and sub_start_day == sub_break_day:
                                time += u'обед: %s, ' % sub_break_time
        if weekends != u'без выходных':
            time += u'%s: выходной' % weekends
        return normalize_time(time)
Esempio n. 2
0
 def __parse_base_atm_terminal(self, row, point_type, coordinates, deposit=False):
     point = Point()
     point.prov = self.uid
     point.type = point_type
     point.name = normalize_text(u'№' + str(int(row[1])))
     city = row[2]
     if u'р-н' not in row[2]:
         city = u'г. %s' % city
     point.address = normalize_address(u'%s, %s' % (city, row[3]))
     point.place = normalize_text(row[4])
     if u'только безнал.платежи' in row[5]:
         point.currency = []
         if deposit:
             point.deposit = False
     else:
         point.currency = map(strip, row[5].split(','))
         if deposit:
             point.deposit = True
     point.time = normalize_time(row[6])
     point.check_information = CHECK_OFFICIAL
     point.lat, point.lng = self.__get_point_coordinate(point.address, coordinates)
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Esempio n. 3
0
    def __parse_terminal(self, item):
        point = Point()
        point.prov = self.uid
        point.type = TYPE_TERMINAL

        city = normalize_text(item('td:eq(0)').text())
        address = normalize_text(item('td:eq(2)').text())
        point.address, point.place = split_address_place(u'г. %s, %s' % (city.title(), address))
        point.place = normalize_text(item('td:eq(1)').text())
        point.time = normalize_time(item('td:eq(3)').text())
        point.check_information = CHECK_OFFICIAL

        for lat, lng, type_id, description in self.__get_coordinates():
            if u'Минск' not in point.address or type_id != '2':
                continue
            for token in description.split():
                if token not in point.address:
                    break
            else:
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)
        return point
Esempio n. 4
0
    def __parse_base_office_exchange(self, item, point_type, name_keywords):
        point = Point()
        point.prov = self.uid
        point.type = point_type
        point.name = normalize_text(item('th:eq(0) a:eq(0)').text())
        if not point.name.startswith(name_keywords):
            return None

        city = normalize_text(item('td:eq(1)').text())
        address = normalize_text(item('td:eq(2)').text())
        point.address, point.place = split_address_place(u'г. %s, %s' % (city, address))
        point.check_information = CHECK_OFFICIAL

        for lat, lng, type_id, description in self.__get_coordinates():
            if u'Минск' not in point.address or type_id != '1':
                continue
            for token in description.split():
                if token not in point.address and token not in point.name:
                    break
            else:
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)
        return point
Esempio n. 5
0
 def __get_offices(self, url, city_name=''):
     points = []
     page = PQ(get_url(url).decode('utf8'))
     time = None
     for item in map(PQ, page('#oo__content_value table tr:gt(0)')):
         if item('td').attr('colspan') == '3':
             continue
         point = Point()
         point.prov = self.uid
         point.type = TYPE_OFFICE
         point.name = normalize_text(item('td:eq(0)').text())
         point.address = normalize_address(city_name + item('td:eq(1) p:eq(0)').text())
         place = item('td:eq(1) p:eq(2)').text()
         if not place:
             place = item('td:eq(1) p:eq(1)').text()
         if place:
             point.place = normalize_text(place)
         new_time = item('td:eq(2)').text()
         if new_time:
             time = new_time
         point.time = normalize_time(time)
         point.check_information = CHECK_OFFICIAL
         if point.address in self.__addresses:
             point.lat, point.lng = self.__addresses[point.address]
             point.check_coordinates = CHECK_OFFICIAL
         else:
             warning_not_official_coordinates(point)
         points.append(point)
     return points
Esempio n. 6
0
 def __parse_terminal(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_TERMINAL
     city = u'г. %s' % normalize_text(item('td:eq(0)').text()).title()
     point.address = normalize_address(u'%s, %s' % (city, item('td:eq(1)').text()))
     point.place = normalize_text(item('td:eq(2)').text())
     point.time = normalize_time(item('td:eq(3)').text())
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Esempio n. 7
0
 def __parse_atm(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_ATM
     city = u'г. %s' % normalize_text(item('td:eq(0)').text()).title()
     point.address = normalize_address(u'%s, %s' % (city, item('td:eq(1)').text()))
     point.place = normalize_text(item('td:eq(2)').text())
     point.currency = map(strip, item('td:eq(3)').text().split(','))
     point.time = normalize_time(item('td:eq(4)').text())
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Esempio n. 8
0
 def __parse_terminal(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_TERMINAL
     point.name = normalize_text(item('td:eq(0)').text())
     point.address, point.place = split_address_place(item('td:eq(1)').text())
     point.place = point.name
     point.time = normalize_time(item('td:eq(2)').text())
     point.deposit = normalize_text(item('td:eq(3)').text()).lower() == u'есть'
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Esempio n. 9
0
 def __get_coordinates(self):
     if not self.__coordinates:
         tree = ET.fromstring(get_url(self.__markers_url))
         for marker in tree.iter('marker'):
             lat = normalize_text(marker.attrib['lat'])
             lng = normalize_text(marker.attrib['lng'])
             address = normalize_address(marker.attrib['address'])
             for from_token, to_token in self.__address_replaces:
                 address = address.replace(from_token, to_token)
             place = normalize_text(marker.attrib['place'])
             self.__coordinates.append((lat, lng, address, place))
     return self.__coordinates
Esempio n. 10
0
 def __parse_base(self, item, city, point_type):
     point = Point()
     point.prov = self.uid
     point.type = point_type
     point.name = normalize_text(item('.b-map-side>h5').text())
     point.address, point.place = split_address_place(u'г. %s, %s' % (city, item('.b-map-side>p span:eq(0)').text()))
     coordinates = item('.b-map-side>p span:eq(1)').text()
     if coordinates:
         point.lat, point.lng = map(strip, coordinates.split(','))
     text_html = replace_br(item('.b-map-side-more').html(), ';;;')
     time_items = []
     for sub_item in map(normalize_text, PQ(text_html).text().split(';;;')):
         if not sub_item:
             continue
         if sub_item.startswith(u'Телефон:'):
             point.phones = normalize_phones(sub_item[len(u'Телефон:')].split(','))
             continue
         time_items.append(sub_item)
     point.time = normalize_time(', '.join(time_items))
     point.check_information = CHECK_OFFICIAL
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Esempio n. 11
0
 def __parse_exchange(self, item):
     if not normalize_text(item('td:eq(1)').text()).startswith(self.__exchange_keywords):
         return None
     point = self.__parse_base_office_exchange(item)
     point.type = TYPE_EXCHANGE
     warning_not_official_coordinates(point)
     return point
Esempio n. 12
0
 def __parse_office(self, item, city_name):
     point = self.__parse_base(item, city_name, TYPE_OFFICE)
     for sub_item in map(PQ, item('.content_table table tbody tr')):
         if normalize_text(sub_item('td:eq(0)').text()) == u'Кассы':
             time_items = [normalize_text(PQ(replace_br(sub_item('td:eq(1)').html(), ',')).text())]
             break_time = normalize_text(sub_item('td:eq(2)').text())
             if break_time:
                 time_items.append(u'перерыв: ' + break_time)
             day_off = normalize_text(sub_item('td:eq(3)').text())
             if day_off:
                 time_items.append(u'выходной: ' + day_off)
             point.time = normalize_time(', '.join(time_items))
             break
     else:
         return None
     return point
Esempio n. 13
0
    def __parse_base(self, item, city_name, point_type):
        point = Point()
        point.prov = self.uid
        point.type = point_type

        point.phones = [normalize_phone(item('.content_table table tbody tr:eq(0) td:eq(0) .office_phone').remove().text())]
        name_address_html = replace_br(item('.content_table table tbody tr:eq(0) td:eq(0)').remove().html(), ',')
        name, address = PQ(name_address_html).text().split(',', 1)
        point.name = normalize_text(name)
        point.address, point.place = self.__get_address(city_name, address)
        point.check_information = CHECK_OFFICIAL

        script_text = item('.ya_map script:eq(1)').text()
        for line in map(strip, script_text.splitlines()):
            if line.startswith('BX_GMapAddPlacemark('):
                lat_token = "'LAT':'"
                lat_start_index = line.find(lat_token) + len(lat_token)
                lat_end_index = line.find("'", lat_start_index)
                point.lat = line[lat_start_index:lat_end_index]
                lng_token = "'LON':'"
                lng_start_index = line.find(lng_token) + len(lng_token)
                lng_end_index = line.find("'", lng_start_index)
                point.lng = line[lng_start_index:lng_end_index]
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)
        return point
Esempio n. 14
0
    def __parse_base_office_exchange(self, item, map_points, point_type, start_names):
        point = Point()
        point.prov = self.uid
        point.type = point_type
        point.name = normalize_text(item('.name').text())
        if not point.name.startswith(start_names):
            return None

        point.address, point.place = split_address_place(item('.addres strong').text())
        sub_item = item('.item_block tr:last')
        point.phones = normalize_phones(sub_item('td:eq(0)').text().split(','))
        mon_thu = u'пн-чт: ' + sub_item('td:eq(2)').text()
        fri = u'пт: ' + sub_item('td:eq(3)').text()
        sat = u'сб: ' + sub_item('td:eq(4)').text()
        sun = u'вс: ' + sub_item('td:eq(5)').text()
        point.time = normalize_time(', '.join([mon_thu, fri, sat, sun]))
        point.check_information = CHECK_OFFICIAL

        for lng, lat, name, address, place in map_points:
            if (point.name in name if point.name and name else True) and\
               (point.address and address and point.address in address) and\
               (point.place in place if point.place and place else True):
                point.lat = lat
                point.lng = lng
                point.check_coordinates = CHECK_OFFICIAL
                break
        else:
            warning_not_official_coordinates(point)

        return point
Esempio n. 15
0
 def get_offices(self):
     points = []
     items_tree = ET.fromstring(get_url(self.__offices_xml_url))
     for item in items_tree.iter('item'):
         point = self.__parse_office(item)
         if point:
             points.append(point)
     page = PQ(get_url(self.__regional_offices_page_url))
     point = None
     for item in map(PQ, page('#content_internal span:eq(0)').children()):
         if item[0].tag not in self.__regional_offices_tags:
             continue
         if item[0].tag == 'h2':
             point = Point()
             point.prov = self.uid
             point.type = TYPE_OFFICE
             point.name = trim_spaces_and_commas(normalize_text(item.text()))
             point.check_information = CHECK_OFFICIAL
             continue
         if not point:
             continue
         item_html = replace_br(item.html(), ';;;')
         sub_items = PQ(item_html).text().split(';;;')
         point.address, point.place = split_address_place(sub_items[0])
         for sub_item in map(normalize_text, sub_items[1:]):
             if sub_item.startswith(u'т.ф.:'):
                 point.phone = normalize_phones(sub_item[len(u'т.ф.:'):].split(','))
         warning_not_official_coordinates(point)
         points.append(point)
         point = None
     return points
Esempio n. 16
0
 def __get_cities_ids(self):
     if not self.__cities_ids:
         page = PQ(get_url(self.__cities_url))
         for item in map(PQ, page('.b-cities-list a *')):
             url = item.attr('id').replace('city_', '')
             city = normalize_text(item.text())
             self.__cities_ids.append((url, city))
     return self.__cities_ids
Esempio n. 17
0
 def __get_cities(self):
     if not self.__cities:
         page = PQ(get_url(self.__cities_url))
         for item in map(PQ, page('#chooseCity .oblast a')):
             url = self.__cities_url + item.attr('href')
             city = normalize_text(item.text())
             self.__cities.append((url, city))
     return self.__cities
Esempio n. 18
0
 def __parse_base_office_exchange(self, item):
     point = Point()
     point.prov = self.uid
     point.name = normalize_text(item('td:eq(1)').text())
     point.address, point.place = split_address_place(item('td:eq(2)').text())
     point.time = normalize_time(item('td:eq(3)').text())
     point.phones = normalize_phones(item('td:eq(4)').text().split(','))
     point.check_information = CHECK_OFFICIAL
     return point
Esempio n. 19
0
 def __get_offices_urls(self):
     urls = []
     page = PQ(get_url(self.__cities_url))
     for item in map(PQ, page('#menuLeft>ul>li>span>a') + page('#menuLeft>ul>li>a')):
         if normalize_text(item.text()) == u'Банкоматы':
             continue
         url = self.site + item.attr('href')
         city = item.text()
         urls.append((url, city))
     return urls
Esempio n. 20
0
 def __parse_exchange(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_EXCHANGE
     sub_items = item.text().split(u'—')
     point.name = normalize_text(sub_items[0])
     point.address, point.place = split_address_place(sub_items[1])
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Esempio n. 21
0
 def __parse_office(self, item, city):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_OFFICE
     point.name = normalize_text(item('th .pointShowMaps span:eq(0)').text())
     address = item('th .pointShowMaps span:eq(1)').text()
     point.address, point.place = split_address_place(u'г. %s, %s' % (city, address))
     time_html = replace_br(item('td:eq(0)').html(), ', ')
     point.time = normalize_time(PQ(time_html).text())
     phones_html = replace_br(item('td:eq(1)').html(), ', ')
     point.phones = normalize_phones(PQ(phones_html).text().split(','))
     point.lat = normalize_text(item('th .item_coords .coord1').text())
     point.lng = normalize_text(item('th .item_coords .coord2').text())
     point.check_information = CHECK_OFFICIAL
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Esempio n. 22
0
 def __get_cities_info(self, url):
     if url not in self.__cities:
         cities = []
         page = PQ(get_url(url))
         for item in map(PQ, page('.content .selectForDropDownMenu select option')):
             city_id = item.attr('value')
             city_name = normalize_text(item.text())
             cities.append((city_id, city_name,))
         self.__cities[url] = cities
     return self.__cities[url]
Esempio n. 23
0
 def __parse_base(self, item):
     point = Point()
     point.prov = self.uid
     point.name = normalize_text(item("td:eq(0)").text())
     point.address, point.place = self.__parse_address(item)
     more_url = self.site + item("td:eq(0) a").attr("href")
     more = PQ(get_url(more_url))
     point.time = None
     point.phones = []
     return point, more
Esempio n. 24
0
 def get_terminals(self):
     points = []
     page = PQ(get_url(self.__parse_data_terminals_url))
     for item in map(PQ, page('.tbl tr:gt(0)')):
         if item('td:eq(0)').attr('colspan') == '4' or not normalize_text(item.text()):
             continue
         point = self.__parse_terminal(item)
         if point:
             points.append(point)
     return points
Esempio n. 25
0
 def __parse_office(self, item):
     point, more = self.__parse_base(item)
     point.type = TYPE_OFFICE
     point.check_information = CHECK_OFFICIAL
     for section in map(PQ, more(".content .section")):
         section_type = normalize_text(section(".name").text())
         if section_type not in self.__office_sections_types:
             continue
         section_value = section(".text")
         type = self.__office_sections_types[section_type]
         if type == "time":
             time_html = replace_br(section_value.html(), ",")
             time_text = ", ".join([item.text() for item in map(PQ, PQ(time_html)("td"))])
             point.time = normalize_time(time_text)
         elif type == "phone":
             phones_html = replace_br(section_value.html(), ",")
             phones_text = normalize_text(PQ(phones_html).text())
             point.phones = filter_empty(map(normalize_phone, phones_text.split(",")))
     warning_not_official_coordinates(point)
     return point
Esempio n. 26
0
 def __parse_atm(self, item):
     point = Point()
     point.prov = self.uid
     point.type = TYPE_ATM
     point.name = normalize_text(item('td:eq(0)').text())
     point.address, point.place = split_address_place(item('td:eq(1)').text())
     point.place = point.name
     point.time = normalize_time(item('td:eq(2)').text())
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Esempio n. 27
0
 def get_exchanges(self):
     points = []
     page = PQ(get_url(self.__parse_list_exchange_url).decode('utf8'))
     for item in map(PQ, page('#oo__content_value table tr:gt(0)')):
         point = Point()
         point.prov = self.uid
         point.type = TYPE_EXCHANGE
         add_city_literal = (u'Минск', u'Витебск')
         address = normalize_text(item('td:eq(0)').text())
         point.address = normalize_address((u'г. ' + address) if address.startswith(add_city_literal) else address)
         point.place = normalize_text(item('td:eq(1)').text())
         point.time = normalize_time(item('td:eq(2)').text())
         point.check_information = CHECK_OFFICIAL
         if point.address in self.__addresses:
             point.lat, point.lng = self.__addresses[point.address]
             point.check_coordinates = CHECK_OFFICIAL
         else:
             warning_not_official_coordinates(point)
         points.append(point)
     return points
Esempio n. 28
0
 def __parse_base_office_exchange(self, item, point_type):
     point = Point()
     point.prov = self.uid
     point.type = point_type
     point.name = normalize_text(item('h2').text())
     point.address = normalize_address(item('.itemFilialIn>p:eq(0)').text()[len(u'Почтовый адрес:') + 1:])
     is_phone = False
     phones_items = self.__phone_splitter.split(item('.itemFilialIn>p:eq(1)').text() or '')
     for sub_item in phones_items:
         sub_item = normalize_text(sub_item).lower()
         if sub_item == u'телефон':
             is_phone = True
             continue
         if sub_item == u'факс':
             is_phone = False
             continue
         if is_phone:
             point.phones.append(normalize_phone(sub_item))
     point.time = normalize_time(', '.join(map(lambda sub_item: PQ(sub_item).text(), item('.workTime p'))))
     point.check_information = CHECK_OFFICIAL
     warning_not_official_coordinates(point)
     return point
Esempio n. 29
0
 def __parse_office_exchange(self, item, city, coordinates, point_type, point_keywords):
     point = Point()
     point.prov = self.uid
     point.type = point_type
     point.name = normalize_text(item('th:eq(0)').text())
     if not point.name.startswith(point_keywords):
         return None
     address_html = replace_br(item('td:eq(0)').html(), ';;;')
     address_items = PQ(address_html).text().split(';;;', 1)
     point.address = normalize_address(u'%s, %s' % (city, address_items[0]))
     if len(address_items) > 1:
         point.place = normalize_text(address_items[1])
     item('td:eq(1) ul, td:eq(1) li').remove()
     point.time = normalize_time(item('td:eq(1)').text())
     point.phones = normalize_phones(map(lambda phone_item: PQ(phone_item).text(), item('td:eq(2) p') or item('td:eq(2)')))
     point.check_information = CHECK_OFFICIAL
     point.lat, point.lng = self.__get_point_coordinate(point, coordinates)
     if point.lat and point.lng:
         point.check_coordinates = CHECK_OFFICIAL
     else:
         warning_not_official_coordinates(point)
     return point
Esempio n. 30
0
 def __get_offices_or_exchanges(self, parse_item, coordinates):
     points = []
     page = PQ(get_url(self.__parse_data_offices_exchanges_url))
     city = None
     for city_item in map(PQ, page('.b-ugc>*:gt(0)')):
         if city_item[0].tag == 'h2':
             city = normalize_text(city_item.text())
             continue
         for item in map(PQ, city_item('tr:gt(0)')):
             point = parse_item(item, city, coordinates)
             if point:
                 points.append(point)
     return points