Esempio n. 1
0
def main():
    start_time = datetime.now()
    n = 0

    while True:
        product_ = MySQL().get_product_main()

        if product_:
            response = get_soup(url=product_['link_product'])
            sku = product_['sku']
            id_product = product_['id']
            id_category = product_['id_category']

            images = get_images(response=response)

            ftp_result = None
            if images:
                ftp_result = send_to_ftp(files=images,
                                         sku=sku,
                                         id_category=id_category)

            if ftp_result:
                MySQL().write_images(images=ftp_result)
                MySQL().set_product_main_ready(id_product=id_product)
            elif ftp_result is False:
                MySQL().set_product_main_bad(id_product=id_product)
            else:
                MySQL().set_product_main_ready(id_product=id_product)

        n += 1
        print('\rCount', n, 'Time', datetime.now() - start_time, end='')
Esempio n. 2
0
def main():
    start_time = datetime.now()
    n = 0

    while True:
        product_ = MySQL().get_product_spec()
        if product_:

            link_ = product_['link_product']
            if link_.endswith('/'):
                link = link_ + 'spec/'
            else:
                link = link_ + '/spec/'

            response = get_soup(url=link)
            sku = product_['sku']
            id_product = product_['id']

            spec = get_spec(response=response, sku=sku)
            print(spec)
            if spec:
                MySQL().write_spec(spec=spec)
            MySQL().set_product_spec_ready(id_product=id_product)

            n += 1
            print('\rCount', n, 'Time', datetime.now() - start_time, end='')
            sleep(0.5)
Esempio n. 3
0
def get_products_from_category(category):
    if category['links'].endswith('/'):
        page_link = category['links']
    else:
        page_link = category['links'] + '/'

    for n in range(1, 100000):

        if n > 1:
            link = page_link + f"page-{n}/"
        else:
            link = page_link
        print('Page', link)
        pl = ProductList(page_link=link, id_category=category['categories_5'])

        if not pl.this_is_404_page:
            if pl.this_is_product_list:
                if pl.contains_product:
                    pl.get_and_write_products()
                else:
                    MySQL().set_category_ready(id_category=category['id'])
                    print('Category', category['id'], 'READY NOT PRODUCTS')
                    break
            else:
                MySQL().set_category_bad(id_category=category['id'])
                print('Category', category['id'], 'BAD')
                break
        else:
            MySQL().set_category_ready(id_category=category['id'])
            print('Category', category['id'], 'READY 404')
            break
Esempio n. 4
0
def main():
    start_time = datetime.now()

    while True:
        categories = MySQL().get_categories()
        count_category = len(categories)

        if count_category > 0:
            category = categories[0]

            MySQL().set_category_in_process(id_category=category['id'])
            print('----->', datetime.now() - start_time)
            print('Get category', category['categories_5'], 'from',
                  count_category, category['links'])

            get_products_from_category(category)
Esempio n. 5
0
 def write_product(self):
     for index, product in enumerate(self.products, start=0):
         try:
             link_product = domain + self.soup.find('a', attrs={'data-product-sku': product['id']})['href']
             self.products[index]['link'] = link_product
             MySQL().write_product(product=product, link_product=link_product, id_category=self.id_category)
         except:
             pass
     return self.products
Esempio n. 6
0
def main():
    start_time = datetime.now()
    n = 0

    while True:
        product_ = MySQL().get_product_main()
        # print('----------------->>>')
        # print(product_['sku'])
        # MySQL().set_product_main_in_process(id_product=product_['id'])

        if product_:
            response = get_soup(url=product_['link_product'])
            sku = product_['sku']
            id_product = product_['id']
            id_category = product_['id_category']

            description = get_description(response=response)
            # images = get_images(response=response)

            result = True
            ftp_result = None
            # if images:
            #     ftp_result = save_to_disk(files=images, sku=sku, id_category=id_category)
            # if images is False:
            #     result = False

            if result and description:
                # print('IF')
                if ftp_result:
                    MySQL().write_images(images=ftp_result)
                MySQL().write_description(sku=sku, description=description)
                MySQL().set_product_main_ready(id_product=id_product)
            else:
                # print('ELSE')
                MySQL().set_product_main_ready(id_product=id_product)
                # MySQL().set_product_main_bad(id_product=id_product)

        n += 1
        print('\rCount', n, 'Time', datetime.now() - start_time, end='')
        sleep(0.5)
Esempio n. 7
0
 def get_and_write_products(self):
     print('Count products', len(self.products))
     for index, product in enumerate(self.products, start=0):
         try:
             link_product = domain + self.soup.find(
                 'a', attrs={'data-product-sku': product['id']})['href']
             print(link_product)
             self.products[index]['link'] = link_product
             MySQL().write_product(product=product,
                                   link_product=link_product,
                                   id_category=self.id_category)
         except:
             pass
     return self.products
Esempio n. 8
0
def main():
    start_time = datetime.now()
    n = 0

    while True:
        sleep(5)
        print("Перезапуск браузера")

        chromeOptions = Options()
        chromeOptions.add_argument('--headless')
        chromeOptions.add_argument('--no-sandbox')
        prefs = {"profile.managed_default_content_settings.images": 2}
        chromeOptions.add_experimental_option('prefs', prefs)
        driver = webdriver.Chrome(chrome_options=chromeOptions)
        # driver = webdriver.Chrome(options=chromeOptions)

        driver.get('https://www.w3.org/People/mimasa/test/')
        while True:
            try:
                product_ = MySQL().get_product_reviews()

                if product_:

                    sku = product_['sku']
                    id_product = product_['id']
                    id_category = product_['id_category']

                    # Преобразовываем URL
                    link_ = product_['link_product']
                    if link_.endswith('/'):
                        link = link_ + 'otzyvy/'
                    else:
                        link = link_ + '/otzyvy/'

                    # Забираем страницу и ждём окончания её загрузки
                    driver.get(link)
                    tmp = ''

                    while True:
                        page_source = driver.page_source
                        if len(page_source) > len(tmp):
                            tmp = page_source
                        else:
                            break

                    reviews = get_reviews(sku=sku, page_source=page_source)
                    # print(reviews)

                    if reviews:
                        MySQL().write_reviews(reviews=reviews)
                        MySQL().set_product_reviews_ready(
                            id_product=id_product)
                    elif reviews is False:
                        MySQL().set_product_reviews_bad(id_product=id_product)
                    else:
                        MySQL().set_product_reviews_ready(
                            id_product=id_product)

                    n += 1
                    print('\rCount',
                          n,
                          'Time',
                          datetime.now() - start_time,
                          end='')
            except:
                break

        driver.close()
        driver.quit()