def butik_product_collection(self,final_link): html = self.get_html(url=final_link) if html: soup = BeautifulSoup(html, 'html.parser') raw_data = soup.find_all('script')[-1].contents[0] data_all = self.preparation_json(raw_data) current_dir = data_all['data']['catalogs'][0]['value']['value']['data'] if current_dir == []: return False else: for item in current_dir: print(item) try: butik_product_dict = {} butik_product_dict['product_store'] = 'Butik.ru' butik_product_dict['name'] = self.product_name(item) butik_product_dict['product_url'] = self.product_url(item) current_product = self.get_parser_url(url_product=butik_product_dict['product_url']) butik_product_dict['id'] = current_product['code'] butik_product_dict['price'] = self.product_prise_full(item) butik_product_dict['product_discount'] = self.product_prise_discount(item) butik_product_dict['brand'] = self.product_brand(item) butik_product_dict['category'] = current_product['category'] butik_product_dict['category_detailed'] = current_product['category_detailed'] butik_product_dict['color'] = current_product['color'] butik_product_dict['size'] = str(self.product_size(item)) butik_product_dict['product_image'] = current_product['image'] butik_product_dict['gender'] = self.product_gender(final_link) butik_product_dict['delivery'] = self.product_delivery() butik_product_dict['other'] = self.product_other() print(butik_product_dict) db_functions.save_data_product(product_dict= butik_product_dict) except(KeyError, TypeError): pass
def get_full_butik(): """ Функция запускает функции : pages_in_category и get_items (иными словами, парсит ссылки на все товары магазина Бутик.ру) Список urls точно определен. """ full_butik = [ #'https://www.butik.ru/catalog/zhenshchinam/odezhda/', 'https://www.butik.ru/catalog/zhenshchinam/sumki/', 'https://www.butik.ru/catalog/zhenshchinam/obuv/', 'https://www.butik.ru/catalog/zhenshchinam/aksessuary/', 'https://www.butik.ru/catalog/muzhchinam/odezhda/', 'https://www.butik.ru/catalog/muzhchinam/sumki/', 'https://www.butik.ru/catalog/muzhchinam/obuv/', 'https://www.butik.ru/catalog/muzhchinam/aksessuary/' ] for category in full_butik: pages = pages_in_category(category) for p in range(1, pages + 1): final_link = category + '?page=' + str(p) + '&per_page=100' print('\nCategory :' + final_link + ' ' + str(p) + '/' + str(pages)) links = get_items(final_link) for link in links: try: dict = get_butik_product(link) save_data_product(dict) print(f" !!!!!!!!!!!!!!!!!!Сохранено {link}") except: print('I cant get data from ' + link)
def get_full_randevu(): """ Функция запускает функции : pages_in_category и get_items (иными словами, парсит ссылки на все товары магазина Рандеву.ру) Список urls точно определен. """ full_randevu = ['https://www.rendez-vous.ru/catalog/female/', 'https://www.rendez-vous.ru/catalog/bags_female/', 'https://www.rendez-vous.ru/catalog/zhenskaya_odezhda/', 'https://www.rendez-vous.ru/catalog/accessories_female/', 'https://www.rendez-vous.ru/catalog/tools/', 'https://www.rendez-vous.ru/catalog/male/', 'https://www.rendez-vous.ru/catalog/bags_male/', 'https://www.rendez-vous.ru/catalog/muzhskaya_odezhda/', 'https://www.rendez-vous.ru/catalog/accessories_male/', 'https://www.rendez-vous.ru/catalog/tools/', 'https://www.rendez-vous.ru/catalog/girls/', 'https://www.rendez-vous.ru/catalog/boys/'] for category in full_randevu: pages = pages_in_category(url=category) print(pages) for p in range(1, pages + 1): final_link = category + 'page/' + str(p) + '/' print('\nCategory :' + final_link + ' ' + str(p) + '/' + str(pages)) links = get_items(final_link) for link in links: try: dict = get_randevu_product(link) save_data_product(dict) print(f" ++++++Сохранено {link}") except: print('I cant get data from ' + link)
def randevuze_product_collection(final_link): html = get_html(url=final_link) if html: soup = BeautifulSoup(html, 'html.parser') product_all = soup.find('ul', class_="list-items list-items-catalog list-view-1 js-list-items").\ find_all('li', class_="item") for product in product_all: # print(product) try: product_randevoyz_all = ast.literal_eval( product['data-productinfo']) product_randevoyz = dict_cliner(product_randevoyz_all) product_url = prod_url(product) product_randevoyz['category'] = prod_category( product_randevoyz_all) current_product = get_store_randevu(url=product_url) product_randevoyz['name'] = product_name(product) # print(current_product) product_randevoyz['product_store'] = prod_store() product_randevoyz['color'] = current_product['color'] product_randevoyz['category'] = current_product['category'] product_randevoyz['category_detailed'] = current_product[ 'category_detailed'] product_randevoyz['product_url'] = product_url product_randevoyz['size'] = str(current_product['sizes']) product_randevoyz['product_image'] = current_product['image'] product_randevoyz['product_discount'] = prod_discount(product) product_randevoyz['gender'] = product_gender(final_link) print(product_randevoyz) time.sleep(3.0) save_data_product(product_randevoyz) except (KeyError, TypeError): continue
def index(): # # locale.setlocale(locale.LC_ALL, "ru_RU.utf8") try: link = request.form['link'] info = get_info(link) print(info) with app.app_context(): print('______________________________________') # Сохранение данных в общую базу данных save_data_product(info) # Сохранение данных в клиентскую базу данных size_interesting = request.form['size'] print('______________________________________') if current_user.is_anonymous is True: email = request.form['email'] else: email = current_user_mail() print(email) save_interesting_product(product_dict=info, email=email, price_interesting=None, color_interesting=None, size_interesting=size_interesting) print('_______________________________________') img_list = info['product_image'] # print(type(img_list)) # if len(img_list)>200: # img_list_all = ast.literal_eval(img_list) # else: # img_list_all = info['product_image'] if len(img_list) > 60: img_clear_1 = (img_list.strip('[')).strip(']') img_clear_2 = img_clear_1.split(',') all_img = [] for img in img_clear_2: img_clear = (img.strip("'")).strip(" '") # print(img_clear) all_img.append(img_clear) info['product_image'] = all_img print(info) user = current_user.get_id() if user != None: query = User.query.filter_by(id=user).first() name = query.username print(name) except: info = None all_product_count = page_count() return render_template('index.html', info=info, count_product_all=all_product_count)
def add_to_products_url_randevu(url): """ Функция добавляет в базу данных Product товар Рандеву по ссылке """ app = create_app() with app.app_context(): dict = get_randevu_product(url) save_data_product(dict)
def add_to_products_url_butik(url): """ Функция добавляет в базу данных Product товар Бутик.ру по ссылке """ app = create_app() with app.app_context(): dict = get_butik_product(url) save_data_product(dict)
def butik(): app = create_app() with app.app_context(): butik_status = get_butik_data() if butik_status is not False: get_full_butik() save_data_product(get_full_butik()) else: print('Бутик был недоступен, обновление базы не выполнено') pass
def rand(): app = create_app() with app.app_context(): randezvous_status = get_randevu_data() if randezvous_status is not False: get_full_randevu() save_data_product(get_full_randevu()) else: print('Рандеву был недоступен, обновление базы не выполнено') pass
def ali_product_collection(self, final_link): html = get_query.get_html_all(url=final_link) if html: soup = BeautifulSoup(html, 'html.parser') print(soup) try: raw_data_1 = soup.find_all('script', type="text/javascript")[-2].contents[0] data_all = self.preparation_json(raw_data_1) for item in data_all['items']: try: ali_product_dict = {} ali_product_dict['product_store'] = 'Aliexpress' ali_product_dict['product_url'] = self.product_url(item) current_product = self.parser_product_result(ali_product_dict['product_url']) ali_product_dict['name'] = self.product_name(item) ali_product_dict['id'] = self.product_id_store(item) ali_product_dict['price'] = str(current_product['price']) ali_product_dict['product_discount'] = str(current_product['product_discount']) ali_product_dict['brand'] = current_product['brand'] ali_product_dict['delivery'] = str(current_product['delivery']) ali_product_dict['category'] = current_product['category'] ali_product_dict['category_detailed'] = current_product['category_detailed'] ali_product_dict['color'] = str(current_product['color']) ali_product_dict['size'] = str(current_product['size']) ali_product_dict['product_image'] = str(current_product['product_image']) ali_product_dict['other'] = str(self.product_store_other(item)) ali_product_dict['gender'] = self.product_gender(link = final_link) print('------------------------------------------------') print(ali_product_dict) print('------------------------------------------------') save_data_product(product_dict=ali_product_dict) # time.sleep(2) except(KeyError, TypeError): continue except IndexError: #() time.sleep(120) self.count_one += 1 if self.count_one > 100: print('Ошибка парсинга, парсинг остановлен') return else: print("повтоная попытка получить страницу") self.ali_product_collection(final_link)