def run(self): partner_name = 'Toy' actions_data = [] base_url = 'https://www.toy.ru' self.queue.put(f'set 10') for i in range(1, 11): main_url = f'https://www.toy.ru/company/akcii/?PAGEN_5={i}' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='my-2') for div in divs: if div.find('img') is None: continue if 'monohrome' in div.find('img', class_='img-fluid').get('class'): print('Устаревшая акция') continue url = base_url + div.find('a').get('href') name = div.find('img', class_='img-fluid').get('title').strip() start, end = helper.get_date_now_to_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Labirint' actions_data = [] base_url = 'https://www.labirint.ru' self.queue.put(f'set 2') for i in range(1, 3): main_url = f'https://www.labirint.ru/actions/?page={i}' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='need-watch') for div in divs: url = base_url + div.find('a').get('href') name = div.find('a').get('title').strip() date = div.find('div', class_='news-item__dates').text.strip() start, end = helper.search_data_in_text(date) code = "Не требуется" desc = div.find('div', class_='news-item__anons').text.strip() short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Республика' page = helper.get_page_use_request(self.link) common_block = page.findAll('div')[14] test = re.sub(r'(?s)<script>.*?</script>', '', str(common_block)) page2 = BeautifulSoup(test, 'lxml') name = page2.find('div', class_='rd-promo-show_col-right').h1.text.strip() short_desc = page2.find('div', class_='rd-promo-show_title').text.strip() desc = page2.find('div', class_='rd-promo-show_text').text.strip() desc = re.sub(r'\s{2,}', ' ', desc).strip() code = "Не требуется" try: start, end = helper.search_data_in_text(short_desc) except: start = helper.DATA_NOW end = helper.get_date_end_month() action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') return if not self.ignore: with self.lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') return action = helper.generate_action(partner_name, name, start, end, desc, code, self.link, action_type, short_desc) with self.lock: self.actions_data.append(action) self.queue.put('progress')
def run(self): partner_name = 'ИльДэБотэ' actions_data = [] lock = threading.Lock() url = 'https://iledebeaute.ru/company/actions' page = helper.get_page_use_request(url) divs = page.find_all("div", class_='news_block') self.queue.put(f'set {len(divs)}') for div in divs: name = div.h2.text try: start = helper.get_start_date_in_date( div.find("p", class_='date').text.strip(), False) except Exception: start = helper.DATA_NOW end = (datetime.now() + timedelta(days=3)).strftime('%d.%m.%Y') desc = div.find("p", class_='desc').text.strip() code = 'Не требуется' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Philips' actions_data = [] page = helper.get_page_use_request('https://www.shop.philips.ru/hot_offers') divs = page.find_all("div", class_='col-md-4') self.queue.put(f'set {len(divs)}') for div in divs: url = div.a.get('href').strip() name = div.find('div', class_='title').text.strip() try: date = div.find('span', class_='date-format').text.strip() date = re.search(r'(\d+)\/(\d+)\/(\d+)', date) start = helper.DATA_NOW end = f'{date.group(3)}.{date.group(2)}.{date.group(1)}' except AttributeError: start = helper.get_first_day_month() end = helper.get_date_end_month() desc = name code = 'Не требуется' short_desc = '' action_type = div.find('div', class_='caption').text.strip() if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'ФармКосметика' actions_data = [] base_url = 'https://www.pharmacosmetica.ru' for i in range(3): main_url = f'https://www.pharmacosmetica.ru/podarki-dlya-vas/?page={i}' try: page = helper.get_page_use_request(main_url) except: continue divs = page.find_all('a', class_='podarok') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.get('href') name = div.find('div', class_='textpod').text.strip() name = re.sub(r'\n', ' ', name).strip() start, end = helper.get_date_now_to_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type,short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'МаксиПро' actions_data = [] lock = threading.Lock() main_url = 'https://maxipro.ru/sales/' base_url = 'https://maxipro.ru' page = helper.get_page_use_request(main_url) divs = page.find_all("div", class_='sale-card-wrapper') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.find("a").get('href') name = div.find("div", class_='sale-card-title').text.strip() desc = div.find("div", class_='sale-card-text d-none d-md-block').text.strip() try: date = div.find("div", class_='sale-card-text -grey-').text.strip() start, end = helper.search_data_in_text_without_year(date) except AttributeError: start = helper.DATA_NOW end = helper.get_date_plus_days(30) code = 'Не требуется' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Ривгош' actions_data = [] base_url = 'http://www.rivegauche.ru' for i in range(0, 2): main_url = f'http://www.rivegauche.ru/action?page={i}' try: page = helper.get_page_use_request(main_url) except: continue divs = page.find_all('div', class_='name') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.find('a').get('href') name = div.text.strip() name = re.sub(r'\n', ' ', name).strip() start = helper.get_first_day_month() end = helper.get_date_end_month() code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Sephora' actions_data = [] lock = threading.Lock() main_url = 'https://sephora.ru/news/' page = helper.get_page_use_request(main_url) links = page.find_all("a", class_='b-news-thumb__title') threads = [ Sephora_thread(actions_data, main_url, link, lock, self.queue, self.ignore) for link in links ] self.queue.put(f'set {len(threads)}') helper.start_join_threads(threads) helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Утконос' actions_data = [] lock = threading.Lock() page = helper.get_page_use_request('https://www.utkonos.ru/action') divs = page.find_all("utk-action-list-item") self.queue.put(f'set {len(divs)}') for div in divs: try: name = div.find('div', class_='template__content-text').text.strip() except AttributeError as exc: self.queue.put(f'{exc}') print('') continue code = 'Не требуется' desc = '' url = 'https://www.utkonos.ru' + div.a.get('href') try: incoming_date = div.find( 'div', class_='template__content-status').text.strip() except: incoming_date = '' if incoming_date != '': if "остал" in incoming_date.lower(): days = re.search(r'(\d+)', incoming_date.lower()).group(1) start = helper.DATA_NOW end = helper.get_date_plus_days(int(days)) else: start, end = helper.get_do_period(incoming_date) else: start, end = helper.get_date_now_to_end_month() if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Эльдорадо' actions_data = [] base_url = 'https://www.eldorado.ru' main_url = 'https://www.eldorado.ru/actions.php?type=online' page = helper.get_page_use_request(main_url) divs = page.find_all('a', class_='promotion__promotion') self.queue.put(f'set {len(divs)}') for div in divs: try: url = str(div.get('href')) url = div.get( 'href') if 'www' in url else base_url + div.get('href') except TypeError: print("Отсутствуют данные по акции") self.queue.put('progress') continue name = div.find('div', class_='promotion__promotion-title').text.strip() start = helper.DATA_NOW try: end = div.find( 'div', class_='promotion__promotion-date').get('data-date') end = datetime.strptime(end, '%Y-%m-%d').strftime('%d.%m.%Y') except ValueError: print("Отсутствует дата окончания акции") self.queue.put('progress') continue code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Бетховен' actions_data = [] base_url = 'https://www.bethowen.ru' main_url = 'https://www.bethowen.ru/sale' page = helper.get_page_use_request(main_url) divs = page.find_all('a', class_='no-decor') self.queue.put(f'set {len(divs)}') for div in divs: try: url = str(div.get('href')) url = div.get( 'href') if 'www' in url else base_url + div.get('href') except TypeError: self.queue.put('Отсутствуют данные по акции') self.queue.put('progress') continue name = div.find('img').get('title') name = re.sub('_.*$', '', name).strip() date = div.find('div', class_='text-period').text.strip() if "остал" in date.lower(): days = re.search(r'(\d+)', date.lower()).group(1) start = helper.DATA_NOW end = helper.get_date_plus_days(int(days)) else: start, end = helper.convert_list_to_date( helper.get_range_date(date)) code = "Не требуется" desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'МТС' lock = threading.Lock() actions_data = [] base_url = 'https://shop.mts.ru' threads = [] for i in range(1, 3): main_url = f'https://shop.mts.ru/actions/{i}/' try: page = helper.get_page_use_request(main_url) except: continue divs = page.find_all('div', class_='news-block') for div in divs: threads.append(MtsThread(actions_data, lock, self.queue, base_url + div.find('a').get('href'), self.ignore)) self.queue.put(f'set {len(threads)}') helper.start_join_threads(threads) helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Холодильник' actions_data = [] page = helper.get_page_use_request( 'https://ulyanovsk.holodilnik.ru/action/') divs = page.find_all("div", class_='col-4') begin_url = 'https://holodilnik.ru' self.queue.put(f'set {len(divs)}') for div in divs: url = begin_url + div.a.get('href').strip() name = div.find('span', class_='link').text.strip() date = div.find('span', class_='text-data').text.strip() date = date.split(' - ') if len(date[0]) > 1: start = re.search(r'(\d+\.\d+\.\d+)', date[0]).group(1) else: print(f'{name} нет даты date') self.queue.put('progress') continue if len(date) == 2: end = re.search(r'(\d+\.\d+\.\d+)', date[1]).group(1) else: end = helper.get_date_month_ahead(start) desc = name code = 'Не требуется' short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Республика' actions_data = [] lock = threading.Lock() main_url = 'https://www.respublica.ru/promotions' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='rd-promo-item') divs_work = [] links = [] for div in divs: if div.find('a', text='Подробнее'): divs_work.append(div) for div in divs_work: links.append('https://www.respublica.ru/' + div.find('a', text='Подробнее').get('href')) threads = [ Respulica_thread(actions_data, link, lock, self.ignore, self.queue) for link in links ] self.queue.put(f'set {len(threads)}') helper.start_join_threads(threads) helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'СантехникаТут' actions_data = [] lock = threading.Lock() self.queue.put(f'set 2') for i in range(1, 3): main_url = f'https://santehnika-tut.ru/actions/page-{i}.html' page = helper.get_page_use_request(main_url) divs = page.find_all('div', class_='col-xs-12 col-sm-6 col-md-3') for div in divs: try: name = div.find('span', class_='title').text.strip() except Exception as exc: self.queue.put(f'{exc}') continue url = 'https://santehnika-tut.ru' + div.find('a').get('href') try: start, end = helper.search_end_data_in_text( div.find('span', class_='date').text.strip()) except Exception: start = helper.DATA_NOW end = helper.get_date_end_month() desc = name code = 'Не требуется' if helper.promotion_is_outdated(end): continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'Котофото' actions_data = [] base_url = 'https://kotofoto.ru' main_url = 'https://kotofoto.ru/promotion/' page = helper.get_page_use_request(main_url) divs = page.select('.media-object') self.queue.put(f'set {len(divs)}') for div in divs: url = base_url + div.find('h4').find('a').get('href') name = div.find('h4').text.strip() try: date = div.find('span').text.strip() start, end = helper.convert_text_date(date) except: start, end = helper.get_date_now_to_end_month() code = "Не требуется" try: desc = div.find('p').text.strip() except: desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'МТС' page = helper.get_page_use_request(self.url) name = page.h1.text.strip() try: data_text = page.find_all('div', class_='wrapper')[0].find('p').text.strip() start, end = helper.search_data_in_text(data_text) except: start, end = helper.get_date_now_to_end_month() desc = name code = 'Не требуется' short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') return if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') return action = helper.generate_action(partner_name, name, start, end, desc, code, self.url, action_type,short_desc) with self.lock: self.actions_data.append(action) self.queue.put('progress')
def run(self): partner_name = 'Домовой' actions_data = [] base_url = 'https://tddomovoy.ru/actions' self.queue.put(f'set 3') for i in range(1, 4): main_url = f'https://tddomovoy.ru/actions/?PAGEN_1={i}' page = helper.get_page_use_request(main_url) divs = page.select('.bx_news li') for div in divs: url = base_url + div.find('a').get('href') name = div.find('img').get('title').strip() try: date = div.find('div', class_='date').text.strip() start, end = helper.convert_list_to_date( helper.get_range_date(date)) except: start, end = helper.get_date_now_to_end_month() code = "Не требуется" try: desc = div.find('div', class_='text').text.strip() except: desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): actions_data = [] partner_name = 'КупиВип' lock = threading.Lock() page = helper.get_page_use_request( 'https://www.kupivip.ru/campaigns?showIn=FEMALE&filter=ALL') divs = page.find_all("div", attrs={'data-banner': 'campaign'}) self.queue.put(f'set {len(divs)}') # Акции дня for div in divs: percent_actions = '' desc = '' try: name = div.find("div", class_='brands').text.strip() except: self.queue.put("Пропущена одна акция без названия") self.queue.put('progress') continue try: percent_actions = div.find("div", class_='percent').text.strip() except Exception: pass try: desc = div.find("div", class_='name').text.strip() except Exception: pass if percent_actions: name += f'. Скидки до {percent_actions}%' start = datetime.now().strftime('%d.%m.%Y') end = datetime.now().strftime('%d.%m.%Y') code = 'Не требуется' if 'промокод' in name.lower(): code = re.search(r'код\s(.*)\s?', name).group(1) url = 'https://www.kupivip.ru/' if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') # Акции с баннера на главной старнице s = requests.Session() s.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36', "Cookie": "showIn=FEMALE;", }) request = s.get("https://www.kupivip.ru/", verify=False) page = BeautifulSoup(request.text, 'lxml') divs = page.find_all('div', class_="banner-primary") self.queue.put(f'set {len(divs)}') for div in divs: name = div.get("data-id") try: percent = div.find('div', class_="discount").text.strip() except: percent = "" try: title = div.find('div', class_="title").text.strip() except: title = "" try: desc = div.find('div', class_="text").text.strip() except: desc = "" if title == name: title = '' if percent: name += f'. Скидки до {percent}' start = datetime.now().strftime('%d.%m.%Y') end = datetime.now().strftime('%d.%m.%Y') code = 'Не требуется' desc = title + ' ' + desc url = "https://www.kupivip.ru/" + div.find('a').get("href") if helper.promotion_is_outdated(end): self.queue.put('progress') continue short_desc = '' action_type = helper.check_action_type(code, name, desc) if not self.ignore: with lock: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)
def run(self): partner_name = 'La Roche posay' actions_data = [] base_url = 'https://www.laroche-posay.ru' main_url = 'https://www.laroche-posay.ru/special-offers/' page = helper.get_page_use_request(main_url) divs1 = page.findAll('div', class_='special-offers-banner') divs1 = [div for div in divs1 if div.get('style') is None] self.queue.put(f'set {len(divs1)}') for div in divs1: url = main_url text = div.findAll('div', class_='special-offers-banner__text') name = text[0].text.strip() try: date = text[1].text.strip() start, end = helper.convert_list_to_date( helper.get_range_date(date)) except Exception: start, end = helper.get_date_now_to_end_month() try: code = div.find( 'div', class_='special-offers-banner__code').text.strip() except Exception: code = 'Не требуется' desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') divs2 = page.findAll('div', class_='special-offers-promo') divs2 = [div for div in divs2 if div.get('style') is None] self.queue.put(f'set {len(divs2)}') for div in divs2: url = base_url + div.find('a').get('href') name = div.find(class_='special-offers-promo__text').text.strip() start = helper.DATA_NOW end = helper.get_date_end_month() code = 'Не требуется' desc = name short_desc = '' action_type = helper.check_action_type(code, name, desc) if helper.promotion_is_outdated(end): self.queue.put('progress') continue if not self.ignore: if actions_exists_in_db(partner_name, name, start, end): self.queue.put('progress') continue action = helper.generate_action(partner_name, name, start, end, desc, code, url, action_type, short_desc) actions_data.append(action) self.queue.put('progress') helper.filling_queue(self.queue, actions_data, partner_name)