def main(): start_time = datetime.now() n = 0 while True: product_ = MySQL().get_product_main() if product_: response = get_soup(url=product_['link_product']) sku = product_['sku'] id_product = product_['id'] id_category = product_['id_category'] images = get_images(response=response) ftp_result = None if images: ftp_result = send_to_ftp(files=images, sku=sku, id_category=id_category) if ftp_result: MySQL().write_images(images=ftp_result) MySQL().set_product_main_ready(id_product=id_product) elif ftp_result is False: MySQL().set_product_main_bad(id_product=id_product) else: MySQL().set_product_main_ready(id_product=id_product) n += 1 print('\rCount', n, 'Time', datetime.now() - start_time, end='')
def main(): start_time = datetime.now() n = 0 while True: product_ = MySQL().get_product_spec() if product_: link_ = product_['link_product'] if link_.endswith('/'): link = link_ + 'spec/' else: link = link_ + '/spec/' response = get_soup(url=link) sku = product_['sku'] id_product = product_['id'] spec = get_spec(response=response, sku=sku) print(spec) if spec: MySQL().write_spec(spec=spec) MySQL().set_product_spec_ready(id_product=id_product) n += 1 print('\rCount', n, 'Time', datetime.now() - start_time, end='') sleep(0.5)
def get_products_from_category(category): if category['links'].endswith('/'): page_link = category['links'] else: page_link = category['links'] + '/' for n in range(1, 100000): if n > 1: link = page_link + f"page-{n}/" else: link = page_link print('Page', link) pl = ProductList(page_link=link, id_category=category['categories_5']) if not pl.this_is_404_page: if pl.this_is_product_list: if pl.contains_product: pl.get_and_write_products() else: MySQL().set_category_ready(id_category=category['id']) print('Category', category['id'], 'READY NOT PRODUCTS') break else: MySQL().set_category_bad(id_category=category['id']) print('Category', category['id'], 'BAD') break else: MySQL().set_category_ready(id_category=category['id']) print('Category', category['id'], 'READY 404') break
def main(): start_time = datetime.now() while True: categories = MySQL().get_categories() count_category = len(categories) if count_category > 0: category = categories[0] MySQL().set_category_in_process(id_category=category['id']) print('----->', datetime.now() - start_time) print('Get category', category['categories_5'], 'from', count_category, category['links']) get_products_from_category(category)
def write_product(self): for index, product in enumerate(self.products, start=0): try: link_product = domain + self.soup.find('a', attrs={'data-product-sku': product['id']})['href'] self.products[index]['link'] = link_product MySQL().write_product(product=product, link_product=link_product, id_category=self.id_category) except: pass return self.products
def main(): start_time = datetime.now() n = 0 while True: product_ = MySQL().get_product_main() # print('----------------->>>') # print(product_['sku']) # MySQL().set_product_main_in_process(id_product=product_['id']) if product_: response = get_soup(url=product_['link_product']) sku = product_['sku'] id_product = product_['id'] id_category = product_['id_category'] description = get_description(response=response) # images = get_images(response=response) result = True ftp_result = None # if images: # ftp_result = save_to_disk(files=images, sku=sku, id_category=id_category) # if images is False: # result = False if result and description: # print('IF') if ftp_result: MySQL().write_images(images=ftp_result) MySQL().write_description(sku=sku, description=description) MySQL().set_product_main_ready(id_product=id_product) else: # print('ELSE') MySQL().set_product_main_ready(id_product=id_product) # MySQL().set_product_main_bad(id_product=id_product) n += 1 print('\rCount', n, 'Time', datetime.now() - start_time, end='') sleep(0.5)
def get_and_write_products(self): print('Count products', len(self.products)) for index, product in enumerate(self.products, start=0): try: link_product = domain + self.soup.find( 'a', attrs={'data-product-sku': product['id']})['href'] print(link_product) self.products[index]['link'] = link_product MySQL().write_product(product=product, link_product=link_product, id_category=self.id_category) except: pass return self.products
def main(): start_time = datetime.now() n = 0 while True: sleep(5) print("Перезапуск браузера") chromeOptions = Options() chromeOptions.add_argument('--headless') chromeOptions.add_argument('--no-sandbox') prefs = {"profile.managed_default_content_settings.images": 2} chromeOptions.add_experimental_option('prefs', prefs) driver = webdriver.Chrome(chrome_options=chromeOptions) # driver = webdriver.Chrome(options=chromeOptions) driver.get('https://www.w3.org/People/mimasa/test/') while True: try: product_ = MySQL().get_product_reviews() if product_: sku = product_['sku'] id_product = product_['id'] id_category = product_['id_category'] # Преобразовываем URL link_ = product_['link_product'] if link_.endswith('/'): link = link_ + 'otzyvy/' else: link = link_ + '/otzyvy/' # Забираем страницу и ждём окончания её загрузки driver.get(link) tmp = '' while True: page_source = driver.page_source if len(page_source) > len(tmp): tmp = page_source else: break reviews = get_reviews(sku=sku, page_source=page_source) # print(reviews) if reviews: MySQL().write_reviews(reviews=reviews) MySQL().set_product_reviews_ready( id_product=id_product) elif reviews is False: MySQL().set_product_reviews_bad(id_product=id_product) else: MySQL().set_product_reviews_ready( id_product=id_product) n += 1 print('\rCount', n, 'Time', datetime.now() - start_time, end='') except: break driver.close() driver.quit()