Example #1
0
    def butik_product_collection(self,final_link):
        html = self.get_html(url=final_link)
        if html:
            soup = BeautifulSoup(html, 'html.parser')
            raw_data = soup.find_all('script')[-1].contents[0]
            data_all = self.preparation_json(raw_data)
            current_dir = data_all['data']['catalogs'][0]['value']['value']['data']
            if current_dir == []:
                return False
            else:
                for item in current_dir:
                    print(item)
                    try:
                        butik_product_dict = {}

                        butik_product_dict['product_store'] = 'Butik.ru'
                        butik_product_dict['name'] = self.product_name(item)
                        butik_product_dict['product_url'] = self.product_url(item)
                        current_product = self.get_parser_url(url_product=butik_product_dict['product_url'])
                        butik_product_dict['id'] = current_product['code']
                        butik_product_dict['price'] = self.product_prise_full(item)
                        butik_product_dict['product_discount'] = self.product_prise_discount(item)
                        butik_product_dict['brand'] = self.product_brand(item)
                        butik_product_dict['category'] = current_product['category']
                        butik_product_dict['category_detailed'] = current_product['category_detailed']
                        butik_product_dict['color'] = current_product['color']
                        butik_product_dict['size'] = str(self.product_size(item))
                        butik_product_dict['product_image'] = current_product['image']
                        butik_product_dict['gender'] = self.product_gender(final_link)
                        butik_product_dict['delivery'] = self.product_delivery()
                        butik_product_dict['other'] = self.product_other()
                        print(butik_product_dict)
                        db_functions.save_data_product(product_dict= butik_product_dict)
                    except(KeyError, TypeError):
                        pass
Example #2
0
def get_full_butik():
    """
    Функция запускает функции : pages_in_category и  get_items (иными словами, парсит ссылки на все товары магазина Бутик.ру)
    Список urls точно определен.
    """

    full_butik = [  #'https://www.butik.ru/catalog/zhenshchinam/odezhda/',
        'https://www.butik.ru/catalog/zhenshchinam/sumki/',
        'https://www.butik.ru/catalog/zhenshchinam/obuv/',
        'https://www.butik.ru/catalog/zhenshchinam/aksessuary/',
        'https://www.butik.ru/catalog/muzhchinam/odezhda/',
        'https://www.butik.ru/catalog/muzhchinam/sumki/',
        'https://www.butik.ru/catalog/muzhchinam/obuv/',
        'https://www.butik.ru/catalog/muzhchinam/aksessuary/'
    ]

    for category in full_butik:
        pages = pages_in_category(category)
        for p in range(1, pages + 1):
            final_link = category + '?page=' + str(p) + '&per_page=100'
            print('\nCategory :' + final_link + ' ' + str(p) + '/' +
                  str(pages))
            links = get_items(final_link)
            for link in links:
                try:
                    dict = get_butik_product(link)
                    save_data_product(dict)
                    print(f" !!!!!!!!!!!!!!!!!!Сохранено {link}")
                except:
                    print('I cant get data from ' + link)
Example #3
0
def get_full_randevu():
    """
    Функция запускает функции : pages_in_category и  get_items (иными словами, парсит ссылки на все товары магазина Рандеву.ру)
    Список urls точно определен.
    """
    full_randevu = ['https://www.rendez-vous.ru/catalog/female/',
                    'https://www.rendez-vous.ru/catalog/bags_female/',
                    'https://www.rendez-vous.ru/catalog/zhenskaya_odezhda/',
                    'https://www.rendez-vous.ru/catalog/accessories_female/',
                    'https://www.rendez-vous.ru/catalog/tools/',
                    'https://www.rendez-vous.ru/catalog/male/',
                    'https://www.rendez-vous.ru/catalog/bags_male/',
                    'https://www.rendez-vous.ru/catalog/muzhskaya_odezhda/',
                    'https://www.rendez-vous.ru/catalog/accessories_male/',
                    'https://www.rendez-vous.ru/catalog/tools/',
                    'https://www.rendez-vous.ru/catalog/girls/',
                    'https://www.rendez-vous.ru/catalog/boys/']

    for category in full_randevu:
        pages = pages_in_category(url=category)
        print(pages)
        for p in range(1, pages + 1):
            final_link = category + 'page/' + str(p) + '/'
            print('\nCategory :' + final_link + ' ' + str(p) + '/' + str(pages))
            links = get_items(final_link)
            for link in links:
                try:
                    dict = get_randevu_product(link)
                    save_data_product(dict)
                    print(f" ++++++Сохранено {link}")
                except:
                    print('I cant get data from ' + link)
def randevuze_product_collection(final_link):
    html = get_html(url=final_link)
    if html:
        soup = BeautifulSoup(html, 'html.parser')
        product_all = soup.find('ul', class_="list-items list-items-catalog list-view-1 js-list-items").\
            find_all('li', class_="item")
        for product in product_all:
            # print(product)
            try:
                product_randevoyz_all = ast.literal_eval(
                    product['data-productinfo'])
                product_randevoyz = dict_cliner(product_randevoyz_all)
                product_url = prod_url(product)
                product_randevoyz['category'] = prod_category(
                    product_randevoyz_all)
                current_product = get_store_randevu(url=product_url)
                product_randevoyz['name'] = product_name(product)
                # print(current_product)
                product_randevoyz['product_store'] = prod_store()
                product_randevoyz['color'] = current_product['color']
                product_randevoyz['category'] = current_product['category']
                product_randevoyz['category_detailed'] = current_product[
                    'category_detailed']
                product_randevoyz['product_url'] = product_url
                product_randevoyz['size'] = str(current_product['sizes'])
                product_randevoyz['product_image'] = current_product['image']
                product_randevoyz['product_discount'] = prod_discount(product)
                product_randevoyz['gender'] = product_gender(final_link)
                print(product_randevoyz)
                time.sleep(3.0)
                save_data_product(product_randevoyz)
            except (KeyError, TypeError):
                continue
Example #5
0
    def index():
        #
        # locale.setlocale(locale.LC_ALL, "ru_RU.utf8")
        try:
            link = request.form['link']
            info = get_info(link)
            print(info)

            with app.app_context():
                print('______________________________________')
                # Сохранение данных в общую базу данных
                save_data_product(info)

                # Сохранение данных в клиентскую базу данных
                size_interesting = request.form['size']
                print('______________________________________')
                if current_user.is_anonymous is True:
                    email = request.form['email']
                else:
                    email = current_user_mail()

                print(email)

                save_interesting_product(product_dict=info,
                                         email=email,
                                         price_interesting=None,
                                         color_interesting=None,
                                         size_interesting=size_interesting)
                print('_______________________________________')

                img_list = info['product_image']
                # print(type(img_list))
                # if len(img_list)>200:
                #     img_list_all = ast.literal_eval(img_list)
                # else:
                #     img_list_all = info['product_image']
                if len(img_list) > 60:
                    img_clear_1 = (img_list.strip('[')).strip(']')
                    img_clear_2 = img_clear_1.split(',')
                    all_img = []
                    for img in img_clear_2:
                        img_clear = (img.strip("'")).strip(" '")
                        # print(img_clear)
                        all_img.append(img_clear)

                    info['product_image'] = all_img
                    print(info)

                user = current_user.get_id()
                if user != None:
                    query = User.query.filter_by(id=user).first()
                    name = query.username
                    print(name)
        except:
            info = None
        all_product_count = page_count()
        return render_template('index.html',
                               info=info,
                               count_product_all=all_product_count)
Example #6
0
def add_to_products_url_randevu(url):
    """
    Функция добавляет в базу данных Product товар Рандеву по ссылке
    """
    app = create_app()
    with app.app_context():
        dict = get_randevu_product(url)
        save_data_product(dict)
Example #7
0
def add_to_products_url_butik(url):
    """
    Функция добавляет в базу данных Product товар Бутик.ру по ссылке
    """
    app = create_app()
    with app.app_context():
        dict = get_butik_product(url)
        save_data_product(dict)
Example #8
0
def butik():
    app = create_app()
    with app.app_context():
        butik_status = get_butik_data()
        if butik_status is not False:
            get_full_butik()
            save_data_product(get_full_butik())
        else:
            print('Бутик был недоступен, обновление базы не выполнено')
            pass
Example #9
0
def rand():
    app = create_app()
    with app.app_context():
        randezvous_status = get_randevu_data()
        if randezvous_status is not False:
            get_full_randevu()
            save_data_product(get_full_randevu())
        else:
            print('Рандеву был недоступен, обновление базы не выполнено')
            pass
Example #10
0
 def ali_product_collection(self, final_link):
     html = get_query.get_html_all(url=final_link)
     if html:
         soup = BeautifulSoup(html, 'html.parser')
         print(soup)
         try:
             raw_data_1 = soup.find_all('script', type="text/javascript")[-2].contents[0]
             data_all = self.preparation_json(raw_data_1)
             for item in data_all['items']:
                 try:
                     ali_product_dict = {}
                     ali_product_dict['product_store'] = 'Aliexpress'
                     ali_product_dict['product_url'] = self.product_url(item)
                     current_product = self.parser_product_result(ali_product_dict['product_url'])
                     ali_product_dict['name'] = self.product_name(item)
                     ali_product_dict['id'] = self.product_id_store(item)
                     ali_product_dict['price'] = str(current_product['price'])
                     ali_product_dict['product_discount'] = str(current_product['product_discount'])
                     ali_product_dict['brand'] = current_product['brand']
                     ali_product_dict['delivery'] = str(current_product['delivery'])
                     ali_product_dict['category'] = current_product['category']
                     ali_product_dict['category_detailed'] = current_product['category_detailed']
                     ali_product_dict['color'] = str(current_product['color'])
                     ali_product_dict['size'] = str(current_product['size'])
                     ali_product_dict['product_image'] = str(current_product['product_image'])
                     ali_product_dict['other'] = str(self.product_store_other(item))
                     ali_product_dict['gender'] = self.product_gender(link = final_link)
                     print('------------------------------------------------')
                     print(ali_product_dict)
                     print('------------------------------------------------')
                     save_data_product(product_dict=ali_product_dict)
                     # time.sleep(2)
                 except(KeyError, TypeError):
                     continue
         except IndexError:   #()
             time.sleep(120)
             self.count_one += 1
             if self.count_one > 100:
                 print('Ошибка парсинга, парсинг остановлен')
                 return
     else:
         print("повтоная попытка получить страницу")
         self.ali_product_collection(final_link)