def run(self): partner_name = 'ИльДэБотэ' actions_data = [] lock = threading.Lock() url = 'https://iledebeaute.ru/company/actions' page = helper.get_page_use_request(url) divs = page.find_all("div", class_='news_block') self.queue.put(f'set {len(divs)}') for div in divs: name = div.h2.text try: start = helper.get_start_date_in_date( div.find("p", class_='date').text.strip(), False) except Exception: start = helper.DATA_NOW end = (datetime.now() + timedelta(days=3)).strftime('%d.%m.%Y') desc = div.find("p", class_='desc').text.strip() code = 'Не требуется' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Philips' actions_data = [] page = helper.get_page_use_request('https://www.shop.philips.ru/hot_offers') divs = page.find_all("div", class_='col-md-4') self.queue.put(f'set {len(divs)}') for div in divs: url = div.a.get('href').strip() name = div.find('div', class_='title').text.strip() try: date = div.find('span', class_='date-format').text.strip() date = re.search(r'(\d+)\/(\d+)\/(\d+)', date) start = helper.DATA_NOW end = f'{date.group(3)}.{date.group(2)}.{date.group(1)}' except AttributeError: start = helper.get_first_day_month() end = helper.get_date_end_month() desc = name code = 'Не требуется' short_desc = '' action_type = div.find('div', class_='caption').text.strip() if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'МаксиПро' actions_data = [] lock = threading.Lock() main_url = 'https://maxipro.ru/sales/' base_url = 'https://maxipro.ru' page = helper.get_page_use_request(main_url) divs = page.find_all("div", class_='sale-card-wrapper') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.find("a").get('href') name = div.find("div", class_='sale-card-title').text.strip() desc = div.find("div", class_='sale-card-text d-none d-md-block').text.strip() try: date = div.find("div", class_='sale-card-text -grey-').text.strip() start, end = helper.search_data_in_text_without_year(date) except AttributeError: start = helper.DATA_NOW end = helper.get_date_plus_days(30) code = 'Не требуется' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Toy' actions_data = [] base_url = 'https://www.toy.ru' self.queue.put(f'set 10') for i in range(1, 11): main_url = f'https://www.toy.ru/company/akcii/?PAGEN_5={i}' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='my-2') for div in divs: if div.find('img') is None: continue if 'monohrome' in div.find('img', class_='img-fluid').get('class'): print('Устаревшая акция') continue url = base_url + div.find('a').get('href') name = div.find('img', class_='img-fluid').get('title').strip() start, end = helper.get_date_now_to_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'ФармКосметика' actions_data = [] base_url = 'https://www.pharmacosmetica.ru' for i in range(3): main_url = f'https://www.pharmacosmetica.ru/podarki-dlya-vas/?page={i}' try: page = helper.get_page_use_request(main_url) except: continue divs = page.find_all('a', class_='podarok') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.get('href') name = div.find('div', class_='textpod').text.strip() name = re.sub(r'\n', ' ', name).strip() start, end = helper.get_date_now_to_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type,short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Labirint' actions_data = [] base_url = 'https://www.labirint.ru' self.queue.put(f'set 2') for i in range(1, 3): main_url = f'https://www.labirint.ru/actions/?page={i}' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='need-watch') for div in divs: url = base_url + div.find('a').get('href') name = div.find('a').get('title').strip() date = div.find('div', class_='news-item__dates').text.strip() start, end = helper.search_data_in_text(date) code = "Не требуется" desc = div.find('div', class_='news-item__anons').text.strip() short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Ривгош' actions_data = [] base_url = 'http://www.rivegauche.ru' for i in range(0, 2): main_url = f'http://www.rivegauche.ru/action?page={i}' try: page = helper.get_page_use_request(main_url) except: continue divs = page.find_all('div', class_='name') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.find('a').get('href') name = div.text.strip() name = re.sub(r'\n', ' ', name).strip() start = helper.get_first_day_month() end = helper.get_date_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Республика' page = helper.get_page_use_request(self.link) common_block = page.findAll('div')[14] test = re.sub(r'(?s)<script>.*?</script>', '', str(common_block)) page2 = BeautifulSoup(test, 'lxml') name = page2.find('div', class_='rd-promo-show_col-right').h1.text.strip() short_desc = page2.find('div', class_='rd-promo-show_title').text.strip() desc = page2.find('div', class_='rd-promo-show_text').text.strip() desc = re.sub(r'\s{2,}', ' ', desc).strip() code = "Не требуется" try: start, end = helper.search_data_in_text(short_desc) except: start = helper.DATA_NOW end = helper.get_date_end_month() action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') return if not self.ignore: with self.lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') return action = helper.generate_action(partner_name, name, start, end, desc, code, self.link, action_type, short_desc) with self.lock: self.actions_data.append(action) self.queue.put('progress')
def run(self): partner_name = 'Все инструменты' actions_data = [] page = helper.get_page_use_webdriver( 'https://www.vseinstrumenti.ru/our_actions/aktsii') divs = page.find_all("div", class_='action_main') lock = threading.Lock() self.queue.put(f'set {len(divs)}') for div in divs: name = div.find('div', class_='action_header').a.text.strip() code = 'Не требуется' url = 'https://www.vseinstrumenti.ru/our_actions/aktsii' try: desc = div.find( 'div', class_='act_descr').find_all('p')[3].text.strip() except: try: desc = div.find('div', class_='act_descr').text.strip() desc = re.search(r'.*\n.*\n.*\n(.*)', desc).group(1).strip() except: desc = div.find('div', class_='act_descr').find('p').text.strip() try: incoming_date = div.find( 'div', class_='act_descr').find_all('p')[0].text.strip() except: incoming_date = div.find( 'div', class_='act_descr').find_all('div')[0].text.strip() incoming_date = re.search(r'(\d.*)\–\s(.*)', incoming_date.lower()) try: start, end = helper.get_double_date(incoming_date.group(1), incoming_date.group(2)) except: start, end = helper.get_date_now_to_end_month() if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): link = self.main_url[:-5] + self.link['href'][1:] request = requests.get(link) page = BeautifulSoup(request.text, 'lxml') div = page.find('div', class_='b-news-detailed') if div: all_p = page.find_all('p') desc = '' for p in all_p: desc += p.text desc = re.sub(r'\s{2,}', '\n', desc).strip() desc = re.sub(r'\xa0', '\n', desc).strip() if len(desc) < 2500: try: range = helper.get_range_date(desc) start, end = helper.convert_list_to_date(range) except Exception: try: start, end = helper.get_start_date_in_date(desc, True) except Exception: self.queue.put('progress') return url = link name = page.h1.text desc = desc.replace( "На этот номер телефона будет отправлено sms с кодом восстановления:Войди или" " зарегистрируйся, чтобы получить все преимущества постоянного покупателя!", '').strip() partner_name = 'Sephora' code = "Не требуется" if helper.promotion_is_outdated(end): self.queue.put('progress') return short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with self.lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') return action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) with self.lock: self.actions_data.append(action) self.queue.put('progress') else: self.queue.put('progress') else: self.queue.put('progress')
def run(self): partner_name = 'Розетка' actions_data = [] self.queue.put(f'set 20') s = requests.Session() cookie = s.get( 'https://rozetka.com.ua/news-articles-promotions/promotions/' ).request.headers.get('cookie') # .replace('slang=ua', 'slang=ru') s.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36', 'Cookie': cookie }) s.cookies.set('slang', 'ru') for i in range(1, 21): main_url = f'https://rozetka.com.ua/news-articles-promotions/promotions/page={i}/' request = s.get(main_url) page = BeautifulSoup(request.text, 'lxml') divs = page.find_all('li', class_='promo-grid__cell') for div in divs: if div.get('name') == 'more_promotions': continue url = div.find('a').get('href') name = div.find( 'img', class_='promo-tile__picture').get('title').strip() try: date = div.find('time', class_='promo-tile__period').text.strip() date = re.sub(r'\xa0', ' ', date).strip() date = date.split('—') start = helper.get_one_date(date[0]) end = helper.get_one_date(date[1]) except: start, end = helper.get_date_now_to_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Связной' actions_data = [] main_url = 'https://www.svyaznoy.ru/special-offers' page, driver = helper.get_page_use_webdriver(main_url, quit=False) divs = page.find_all('div', class_='b-article-preview__inner') links = [] for div in divs: links.append( div.find('a', class_='b-article-preview__link').get('href')) self.queue.put(f'set {len(links)}') for link in links: driver.get(link) page = BeautifulSoup(driver.page_source, 'lxml') name = page.h1.text date = page.find('div', class_='b-event-info__item').find_all( 'span', class_='b-event-info__date') if len(date) == 2: start = helper.get_one_date(date[0].text.strip()) end = helper.get_one_date(date[1].text.strip()) elif len(date) == 1: start = helper.get_one_date(date[0].text.strip()) end = helper.get_date_end_month() else: print(date) url = link desc = page.find('div', class_='b-article').text.strip() desc = re.sub(r'\s{2,}', ' ', desc).strip() code = helper.find_promo_code(desc) short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') driver.quit() helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Эльдорадо' actions_data = [] base_url = 'https://www.eldorado.ru' main_url = 'https://www.eldorado.ru/actions.php?type=online' page = helper.get_page_use_request(main_url) divs = page.find_all('a', class_='promotion__promotion') self.queue.put(f'set {len(divs)}') for div in divs: try: url = str(div.get('href')) url = div.get( 'href') if 'www' in url else base_url + div.get('href') except TypeError: print("Отсутствуют данные по акции") self.queue.put('progress') continue name = div.find('div', class_='promotion__promotion-title').text.strip() start = helper.DATA_NOW try: end = div.find( 'div', class_='promotion__promotion-date').get('data-date') end = datetime.strptime(end, '%Y-%m-%d').strftime('%d.%m.%Y') except ValueError: print("Отсутствует дата окончания акции") self.queue.put('progress') continue code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Бетховен' actions_data = [] base_url = 'https://www.bethowen.ru' main_url = 'https://www.bethowen.ru/sale' page = helper.get_page_use_request(main_url) divs = page.find_all('a', class_='no-decor') self.queue.put(f'set {len(divs)}') for div in divs: try: url = str(div.get('href')) url = div.get( 'href') if 'www' in url else base_url + div.get('href') except TypeError: self.queue.put('Отсутствуют данные по акции') self.queue.put('progress') continue name = div.find('img').get('title') name = re.sub('_.*$', '', name).strip() date = div.find('div', class_='text-period').text.strip() if "остал" in date.lower(): days = re.search(r'(\d+)', date.lower()).group(1) start = helper.DATA_NOW end = helper.get_date_plus_days(int(days)) else: start, end = helper.convert_list_to_date( helper.get_range_date(date)) code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): s = requests.Session() s.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0' } request = s.get(self.url) page = BeautifulSoup(request.text, 'lxml') main_div = page.find('div', class_='article-tiles') partner_name = 'Колеса Даром' try: desc = main_div.find('div', class_=False).find_all('p') desc = desc[0].text.strip() + desc[1].text.strip() except IndexError: desc = main_div.find('div', class_=False).find_all('p') if desc: desc = desc[0].text.strip()[:300] else: desc = main_div.find('div', class_=False).text.strip()[:300] desc = re.sub(r'\s{2,}', ' ', desc).strip() desc = re.sub(r'\xa0', '\n', desc).strip() desc = re.sub(r' ', ' ', desc).strip() start = helper.DATA_NOW code = 'Не требуется' if helper.promotion_is_outdated(self.end): self.queue.put('progress') return short_desc = '' action_type = helper.check_action_type(code, self.name, desc) if not self.ignore: with self.lock: if actions_exists_in_db(partner_name, self.name, start, self.end): self.queue.put('progress') return action = helper.generate_action(partner_name, self.name, start, self.end, desc, code, self.url, action_type, short_desc) with self.lock: self.actions_data.append(action) self.queue.put('progress')
def run(self): partner_name = '1С_Интерес' actions_data = [] page, driver = helper.get_page_use_webdriver( 'https://www.1c-interes.ru/special_actions/', quit=False) for i in range(0, 5): next_btn = helper.check_exists_by_css(driver, '.news-next-btn') if next_btn: next_btn.click() sleep(1) page = BeautifulSoup(driver.page_source, 'lxml') divs = page.find_all("div", class_='main-holder') begin_url = 'https://www.1c-interes.ru' self.queue.put(f'set {len(divs)}') for div in divs: url = begin_url + div.a.get('href').strip() name = div.h2.text.strip() try: date = div.find('div', class_='preorder-active-to').text.strip() start, end = helper.get_do_period(date) except: start = helper.DATA_NOW end = helper.get_date_end_month() desc = div.find('div', class_='h2 tile-hide').text.strip() code = 'Не требуется' short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Book24' actions_data = [] lock = threading.Lock() page = helper.get_page_use_webdriver('https://book24.ru/sales/', scroll=True, hidden=True) begin_url = 'https://book24.ru' divs = page.find_all('div', class_='stock-list-item__container') self.queue.put(f'set {len(divs)}') for div in divs: if not div.find('div', class_='stock-list-item__countdown'): self.queue.put('progress') continue name = div.find('p', class_='stock-list-item__title').text.strip() date = div.find('span', class_='info-list__text').text.strip() \ if len(div.findAll('div', class_='info-list__item')) < 2 \ else div.findAll('span', class_='info-list__text')[1].text.strip() start, end = helper.get_do_period(date) desc = div.find('div', class_='stock-list-item__desc').text.strip() \ if div.find('div', class_='stock-list-item__desc') else name url = begin_url + div.find( "div", class_='stock-list-item__more').a.get('href') code = 'Не требуется' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Холодильник' actions_data = [] page = helper.get_page_use_request( 'https://ulyanovsk.holodilnik.ru/action/') divs = page.find_all("div", class_='col-4') begin_url = 'https://holodilnik.ru' self.queue.put(f'set {len(divs)}') for div in divs: url = begin_url + div.a.get('href').strip() name = div.find('span', class_='link').text.strip() date = div.find('span', class_='text-data').text.strip() date = date.split(' - ') if len(date[0]) > 1: start = re.search(r'(\d+\.\d+\.\d+)', date[0]).group(1) else: print(f'{name} нет даты date') self.queue.put('progress') continue if len(date) == 2: end = re.search(r'(\d+\.\d+\.\d+)', date[1]).group(1) else: end = helper.get_date_month_ahead(start) desc = name code = 'Не требуется' short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = '220Volt' actions_data = [] main_url = 'https://ulyanovsk.220-volt.ru/share/0/' page, driver = helper.get_page_use_webdriver(main_url, quit=False) divs = page.find_all('div', class_='actionContainer rel') self.queue.put(f'set {len(divs)}') for div in divs: date = div.find('div', class_='actionPeriod').text.strip() start, end = helper.convert_text_date(date) name = div.find('div', class_='actionText').h4.text.strip() url = 'https://220-volt.ru' + div.find( 'a', class_='activeButton').get('href') driver.get(url) action_page = BeautifulSoup(driver.page_source, 'lxml') try: desc_block = action_page.find('div', class_='seoText') desc = desc_block.text.strip() except Exception: print(f'Не удаолось открыть страницу {url}') self.queue.put('progress') continue desc = re.sub(r'\s{2,}', ' ', desc).strip() code = 'Не требуется' short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') driver.quit() helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'СантехникаТут' actions_data = [] lock = threading.Lock() self.queue.put(f'set 2') for i in range(1, 3): main_url = f'https://santehnika-tut.ru/actions/page-{i}.html' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='col-xs-12 col-sm-6 col-md-3') for div in divs: try: name = div.find('span', class_='title').text.strip() except Exception as exc: self.queue.put(f'{exc}') continue url = 'https://santehnika-tut.ru' + div.find('a').get('href') try: start, end = helper.search_end_data_in_text( div.find('span', class_='date').text.strip()) except Exception: start = helper.DATA_NOW end = helper.get_date_end_month() desc = name code = 'Не требуется' if helper.promotion_is_outdated(end): continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): persent = self.div.find( "span", class_='banner-sale-list-item-discount-percent').text.strip() end = self.div.find("strong", class_='date').text.strip() end = helper.get_one_date(end) start = datetime.now().strftime('%d.%m.%Y') link = self.div.find('a').get('href') request = requests.get(link) partner_name = 'Акушерство' url = 'https://www.akusherstvo.ru/sale.php' action_page = BeautifulSoup(request.text, 'lxml') name = action_page.h1.text.strip() descs = action_page.find('table', class_='centre_header') desc = '' code = 'Не требуется' name = f'Скидки {persent} на {name}' try: desc = descs.find_all('p')[0].text.strip() desc = re.sub(r'\n', '', desc) desc = re.sub(r'\r', '', desc) except Exception: pass if helper.promotion_is_outdated(end): self.queue.put('progress') return short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with self.lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') return action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) with self.lock: self.queue.put('progress') self.actions_data.append(action)
def run(self): partner_name = 'Домовой' actions_data = [] base_url = 'https://tddomovoy.ru/actions' self.queue.put(f'set 3') for i in range(1, 4): main_url = f'https://tddomovoy.ru/actions/?PAGEN_1={i}' page = helper.get_page_use_request(main_url) divs = page.select('.bx_news li') for div in divs: url = base_url + div.find('a').get('href') name = div.find('img').get('title').strip() try: date = div.find('div', class_='date').text.strip() start, end = helper.convert_list_to_date( helper.get_range_date(date)) except: start, end = helper.get_date_now_to_end_month() code = "Не требуется" try: desc = div.find('div', class_='text').text.strip() except: desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Котофото' actions_data = [] base_url = 'https://kotofoto.ru' main_url = 'https://kotofoto.ru/promotion/' page = helper.get_page_use_request(main_url) divs = page.select('.media-object') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.find('h4').find('a').get('href') name = div.find('h4').text.strip() try: date = div.find('span').text.strip() start, end = helper.convert_text_date(date) except: start, end = helper.get_date_now_to_end_month() code = "Не требуется" try: desc = div.find('p').text.strip() except: desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'МТС' page = helper.get_page_use_request(self.url) name = page.h1.text.strip() try: data_text = page.find_all('div', class_='wrapper')[0].find('p').text.strip() start, end = helper.search_data_in_text(data_text) except: start, end = helper.get_date_now_to_end_month() desc = name code = 'Не требуется' short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') return if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') return action = helper.generate_action(partner_name, name, start, end, desc, code, self.url, action_type,short_desc) with self.lock: self.actions_data.append(action) self.queue.put('progress')
def parser(self): """Сбор и форамтирование информации об акциях""" self.driver.switch_to_window(self.dt_window) lock = threading.Lock() for window in self.driver.window_handles: if window != self.dt_window and window != self.ad_window: self.driver.switch_to.window(window) if len(self.driver.window_handles) == 2: self.driver.switch_to.window(self.ad_window) page = BeautifulSoup(self.driver.page_source, 'lxml') actions = page.findAll('div', class_='coupon') self.queue.put(f'Всего будет обработано акций {len(actions)}') partner = '' if actions: self.gui.change_progress_signal.emit(len(actions)) for act in actions: partner = act.findAll('b', text=True)[1].text.strip() name = act.find('p', {'class': 'h3-name'}).text.strip() now = datetime.now() try: full_date = act.find( "b", text=re.compile('.*\s*(\d+.\d+.\d+)')).text.strip() except AttributeError: end = now + timedelta(days=180) full_date = str(now.strftime( '%d.%m.%Y')) + "-" + end.strftime('%d.%m.%Y') temp = ''.join(str(full_date).split()) url = '' code = 'Не требуется' short_desc = '' start = datetime.strptime( re.search(r'^(\d+.\d+.\d{4})', temp).group(1), '%d.%m.%Y') end = datetime.strptime( re.search(r'-(\d+.\d+.\d{4})', temp).group(1), '%d.%m.%Y') diff_date = end - start if diff_date.days > 180: end = start + timedelta(days=180) start = start.strftime('%d.%m.%Y') end = end.strftime('%d.%m.%Y') action_type = 'скидка' desc = act.findAll('p', text=True)[1].text.strip() if \ len(act.findAll('p', text=True)) > 1 else '' if not self.ignore.isChecked(): with lock: if actions_exists_in_db(partner, name, start, end): continue action = helper.generate_action(partner, name, start, end, desc, code, url, action_type, short_desc) self.actions_data.append(action) self.queue.put('progress') if len(self.actions_data) == 0: self.queue.put(f'Акции по {partner} не найдены ') else: self.queue.put(helper.write_csv(self.actions_data)) self.queue.put((partner, )) self.queue.put(self.actions_data) if self.driver.current_window_handle != self.ad_window and \ self.driver.current_window_handle != self.dt_window: self.driver.close() self.driver.switch_to.window(self.ad_window) self.actions_data.clear() else: self.queue.put('Нужно зайти на страницу с акциями')
def run(self): partner_name = 'Бутик' actions_data = [] session = requests.Session() result = session.post(auth.butic_auth_url, data=auth.butic_payload) if result.status_code != 200: raise RuntimeError result_data = result.json() if not result_data.get('token'): raise RuntimeError bearer_value = 'Bearer ' + result_data['token'] auth_header = {'Authorization': bearer_value} promo_data = {"operationName": "getPromotions", "variables": {"where": {"status": {"$ne": 3}}, "limit": 100, "order": "reverse:created"}, "query": "query getPromotions($limit: Int!, $where: SequelizeJSON, $order: String!) {promotions(limit: $limit, where: $where, order: $order) {\n rows {\n id\n url\n title\n status\n preview\n description\n image\n start\n end\n created\n updated\n __typename\n }\n __typename\n }\n}"} result = session.post(auth.butic_main_url, headers=auth_header, json=promo_data) if result.status_code != 200: raise RuntimeError if len(result.json()['data']['promotions']['rows']) == 0: self.queue.put(f'Акции по {partner_name} не найдены ') self.queue.put('progress') return for action in result.json()['data']['promotions']['rows']: action_id = int(action['id']) name = action['title'] start = datetime.strptime(action['start'], '%Y-%m-%d').strftime('%d.%m.%Y') end = datetime.strptime(action['end'], '%Y-%m-%d').strftime('%d.%m.%Y') full_description = action['description'] try: code = re.search(r'([a-zA-Z]+.*)', action['preview']).group(1).strip() except Exception: code = "Не требуется" url_woman = re.search(r'для женщин:.*(https.*)', full_description).group(1).strip() url_man = re.search(r'для мужчин:.*(https.*)', full_description).group(1).strip() desc = re.search(r'(?s)Подробные условия:(.*)', full_description).group(1).strip() desc = re.sub(r'\*', '', desc).strip() if helper.promotion_is_outdated(end): continue short_desc = '' action_type = helper.check_action_type(code, name, desc) lock = threading.Lock() if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): continue action_man = helper.generate_action(partner_name, name, start, end, desc, code, url_man, action_type, short_desc) action_woman = helper.generate_action(partner_name, name, start, end, desc, code, url_woman, action_type, short_desc) actions_data.append(action_man) actions_data.append(action_woman) banner_data = {"operationName": "getBanners", "variables": {"where": {"promotionId": action_id}, "limit": 1000}, "query": "query getBanners($limit: Int!, $where: JSON) {\n banners(limit: $limit, where: $where) {\n rows {\n id\n promotionId\n name\n path\n width\n height\n __typename\n }\n __typename\n }\n}\n"} banner_result = session.post(auth.butic_main_url, headers=auth_header, json=banner_data) if banner_result.status_code != 200: raise RuntimeError begin_url_banner = "https://partners.butik.ru/api/static" banner_result = banner_result.json() banners_links = [] for banner in banner_result['data']['banners']['rows']: link = begin_url_banner + banner['path'] banners_links.append(link) helper.banner_downloader(banners_links, self.queue) helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'La Roche posay' actions_data = [] base_url = 'https://www.laroche-posay.ru' main_url = 'https://www.laroche-posay.ru/special-offers/' page = helper.get_page_use_request(main_url) divs1 = page.findAll('div', class_='special-offers-banner') divs1 = [div for div in divs1 if div.get('style') is None] self.queue.put(f'set {len(divs1)}') for div in divs1: url = main_url text = div.findAll('div', class_='special-offers-banner__text') name = text[0].text.strip() try: date = text[1].text.strip() start, end = helper.convert_list_to_date( helper.get_range_date(date)) except Exception: start, end = helper.get_date_now_to_end_month() try: code = div.find( 'div', class_='special-offers-banner__code').text.strip() except Exception: code = 'Не требуется' desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') divs2 = page.findAll('div', class_='special-offers-promo') divs2 = [div for div in divs2 if div.get('style') is None] self.queue.put(f'set {len(divs2)}') for div in divs2: url = base_url + div.find('a').get('href') name = div.find(class_='special-offers-promo__text').text.strip() start = helper.DATA_NOW end = helper.get_date_end_month() code = 'Не требуется' desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): actions_data = [] partner_name = 'КупиВип' lock = threading.Lock() page = helper.get_page_use_request( 'https://www.kupivip.ru/campaigns?showIn=FEMALE&filter=ALL') divs = page.find_all("div", attrs={'data-banner': 'campaign'}) self.queue.put(f'set {len(divs)}') # Акции дня for div in divs: percent_actions = '' desc = '' try: name = div.find("div", class_='brands').text.strip() except: self.queue.put("Пропущена одна акция без названия") self.queue.put('progress') continue try: percent_actions = div.find("div", class_='percent').text.strip() except Exception: pass try: desc = div.find("div", class_='name').text.strip() except Exception: pass if percent_actions: name += f'. Скидки до {percent_actions}%' start = datetime.now().strftime('%d.%m.%Y') end = datetime.now().strftime('%d.%m.%Y') code = 'Не требуется' if 'промокод' in name.lower(): code = re.search(r'код\s(.*)\s?', name).group(1) url = 'https://www.kupivip.ru/' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') # Акции с баннера на главной старнице s = requests.Session() s.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36', "Cookie": "showIn=FEMALE;", }) request = s.get("https://www.kupivip.ru/", verify=False) page = BeautifulSoup(request.text, 'lxml') divs = page.find_all('div', class_="banner-primary") self.queue.put(f'set {len(divs)}') for div in divs: name = div.get("data-id") try: percent = div.find('div', class_="discount").text.strip() except: percent = "" try: title = div.find('div', class_="title").text.strip() except: title = "" try: desc = div.find('div', class_="text").text.strip() except: desc = "" if title == name: title = '' if percent: name += f'. Скидки до {percent}' start = datetime.now().strftime('%d.%m.%Y') end = datetime.now().strftime('%d.%m.%Y') code = 'Не требуется' desc = title + ' ' + desc url = "https://www.kupivip.ru/" + div.find('a').get("href") if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)