def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ if given_model_no is not None: data = get_links(given_name, given_url, given_model_no) else: data = get_links(given_name, given_url) if len(data) < 1: return [] data_list = [] n = 1 for prd_data in data: print(f'Getting data from link {n} of {len(data)}...') n += 1 try: t1 = datetime.now() try: title = clean_text(prd_data.find('.product--title-link')[0].text) url = list(prd_data.absolute_links)[1] except IndexError: continue try: prd_price = clean_price(prd_data.find('.price--dollars')[0].text) except Exception as e: n = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text(prd_data.find('.product--seller')[0].text) except Exception as e: n = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url } data_list.append(main) except AttributeError: pass return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ if given_model_no is not None: data = get_links(given_name, given_url, given_model_no) else: data = get_links(given_name, given_url) if len(data) < 1: return [] data_list = [] n = 1 for prd_data in data: print(f'Getting data from link {n} of {len(data)}...') n += 1 try: t1 = datetime.now() try: title = clean_text(prd_data.find('.product-tile-name')[0].text) url = prd_data.find('a.disp-block')[0].attrs['href'] except IndexError: continue try: sku = prd_data.find('.product-tile-model')[0].text except Exception as e: exc = e sku = '' try: prd_price = clean_price( prd_data.find('.pricepoint-price')[0].text) except Exception as e: exc = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text( prd_data.find('#sellerProfileTriggerId')[0].text) except Exception as e: exc = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url, 'sku': sku, } data_list.append(main) except AttributeError: pass return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ if given_model_no is not None: links = get_links(given_name, given_url, given_model_no) else: links = get_links(given_name, given_url) if len(links) < 1: return [] data_list = [] n = 1 for link in links: print(f'Getting data from link {n} of {len(links)}...') n += 1 try: t1 = datetime.now() while True: try: session = HTMLSession() prd_data = session.get(link) break except: print('Error while getting data..\nRetrying in 2 seconds..') time.sleep(2) try: title = clean_text(prd_data.html.find('#itemTitle')[0].text).replace('Details about ', '') except IndexError: continue try: sku = prd_data.html.find('#descItemNumber')[0].text except Exception as e: exc = e sku = '' try: prd_price = clean_price(prd_data.html.find('#prcIsum')[0].text) except Exception as e: exc = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text(prd_data.html.find('span.mbg-nw')[0].text) except Exception as e: exc = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': link, 'sku': sku } data_list.append(main) except AttributeError: pass return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ if given_model_no is not None: links = get_links(given_name, given_url, given_model_no) else: links = get_links(given_name, given_url) if len(links) < 1: return [] data_list = [] number = 1 for link in links: print(f'Getting data from link {number} of {len(links)}...') url = link.find('.product-item-link')[0].attrs['href'] session = HTMLSession() r = session.get(url) number += 1 try: t1 = datetime.now() try: title = clean_text(r.html.find('.page-title')[0].text) except IndexError: continue try: sku = r.html.find('.product-info-main>p>strong')[0].text except Exception as e: print(e) sku = '' try: prd_price = clean_price(r.html.find('.price')[0].text) except Exception as e: n = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text(r.html.find('.product--seller')[0].text) except Exception as e: n = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url, 'sku': sku } data_list.append(main) except AttributeError: pass return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ if given_model_no is not None: links = get_links(given_name, given_url, given_model_no) else: links = get_links(given_name, given_url) if len(links) < 1: return [] data_list = [] n = 1 for link in links: print(f'Getting data from link {n} of {len(links)}...') n += 1 try: t1 = datetime.now() while True: try: session = HTMLSession() prd_data = session.get(link) break except: print( 'Error while getting data..\nRetrying in 2 seconds..') time.sleep(2) try: title = clean_text(prd_data.html.find('#productTitle')[0].text) except IndexError: continue try: table = prd_data.html.find('tr') sku = '' for row in table: try: th = row.text if 'model' in th.lower() and 'number' in th.lower(): sku = row.find('td')[0].text break except IndexError: continue except Exception as e: exc = e sku = '' try: prd_price = clean_price( prd_data.html.find('#price_inside_buybox')[0].text) except Exception as e: exc = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text( prd_data.html.find('#sellerProfileTriggerId')[0].text) except Exception as e: exc = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': link, 'sku': sku, } data_list.append(main) except AttributeError: print('Error 0005') return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ browser = webdriver.Firefox() browser.minimize_window() inp_name = given_name.replace(' ', '+').lower() search_url = given_url.replace('{}', inp_name) browser.get(search_url) items = browser.find_elements_by_css_selector( '.sc-bdVaJa.Tile-iqbpf7-0.fIkVYO') print(f'{len(items)} Results Found for: {given_name}') data_list = [] for prd_data in items: try: t1 = datetime.now() try: title = clean_text( prd_data.find_elements_by_css_selector( '.DefaultProductTile__ProductName-dfe2sm-1' '.dRgJNf')[0].text) url = prd_data.find_elements_by_css_selector( 'a')[0].get_attribute('href') # print(title) except IndexError: continue try: p = prd_data.find_elements_by_css_selector( '.ProductPrice__Wrapper-sc-1ye3dgu-0.guXOLt')[0].text prd_price = clean_price(p) if prd_price == '': a = [][2] except IndexError: p = prd_data.find_elements_by_css_selector( '.ProductPrice__Wrapper-sc-1ye3dgu-0.guXOLt')[0].text prd_price = clean_price(p) except Exception as e: print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text( prd_data.find_elements_by_css_selector('.get_merchant') [0].text) except Exception as e: n = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url, 'sku': False, } data_list.append(main) except AttributeError: pass except Exception as e: print(e, end=' AT GET DATA') try: browser.quit() except Exception as e: print(e) pass return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ browser = webdriver.Firefox() # browser.minimize_window() browser.get(given_url) while True: try: input_field = browser.find_element_by_id('product-search-field') break except: input('Solve captcha and press enter 1:') input_field.clear() for char in given_name: input_field.send_keys(char) sleep(0.3) sleep(1) input_field.send_keys(Keys.RETURN) sleep(3) while True: try: items = browser.find_elements_by_css_selector('._1umis') break except: input('Solve captcha and press enter 2:') print(f'{len(items)} Results Found for: {given_name}') data_list = [] for prd_data in items: try: t1 = datetime.now() try: while True: try: title = clean_text( prd_data.find_elements_by_css_selector('._1A_Xq') [0].text) url = browser.find_element_by_css_selector( '._1A_Xq>a').get_attribute('href') break except Exception as e: print(e) input('Solve captcha and press enter 3:') # print(title) except IndexError: continue try: print(prd_data.find_elements_by_css_selector('._2PrMB')) p = prd_data.find_elements_by_css_selector('._2PrMB')[0].text prd_price = clean_price(p) except IndexError: p = prd_data.find_elements_by_css_selector('._2PrMB')[0].text prd_price = clean_price(p) except Exception as e: n = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text( prd_data.find_elements_by_css_selector('.merchant') [0].text) except Exception as e: n = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url } data_list.append(main) except AttributeError: pass except Exception as e: print(e, end=' AT GET DATA') try: browser.quit() except: print('Clear..') return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ browser = webdriver.Firefox() browser.minimize_window() inp_name = given_name.replace(' ', '+').lower() search_url = given_url + inp_name browser.get(search_url) sleep(2) items = browser.find_elements_by_css_selector( '.ais-hits--item.ais-hits--item') print(f'{len(items)} Results Found for: {given_name}') data_list = [] for prd_data in items: try: t1 = datetime.now() try: title = clean_text( prd_data.find_elements_by_css_selector( '.ais-hit--title.product-tile__title')[0].text) # print(title) url = prd_data.find_elements_by_css_selector( '.product-tile')[0].get_attribute('href') except IndexError: continue try: p = prd_data.find_elements_by_css_selector('span.sale')[0].text prd_price = clean_price(p) if prd_price == '': a = [][2] except IndexError: p = prd_data.find_elements_by_css_selector( 'span.ais-hit--price.price')[0].text prd_price = clean_price(p) except Exception as e: print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text( prd_data.find_elements_by_css_selector('.merchant') [0].text) except Exception as e: n = e # print(f'\n\n{e} marchant \n{title}\n\n') merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url, 'sku': False, } data_list.append(main) except AttributeError: pass except Exception as e: print(e, end=' AT GET DATA') try: browser.quit() except Exception as e: n = e pass return data_list
def scrap(given_name: str, given_url, given_model_no=None): """ :param given_model_no: :param given_name: :param given_url: :return: List of Scraped data, Data error count and Keyword """ if given_model_no is not None: links = get_links(given_name, given_url, given_model_no) else: links = get_links(given_name, given_url) if len(links) < 1: return [] data_list = [] number = 1 for link in links: print(f'Getting data from link {number} of {len(links)}...') url = link.find('.name.fn.l_mgn-tb-sm.l_dsp-blc')[0].attrs['href'] session = HTMLSession() r = session.get(url) number += 1 try: t1 = datetime.now() try: title = clean_text(r.html.find('.product-name')[0].text) sku = r.html.find('.product-id.meta.quiet.p_txt-sm')[-1].text except IndexError: continue except Exception as e: n = e continue try: prd_price = clean_price( r.html.find('.price-device>script')[0].text) except Exception as e: n = e # print(f'\n{e} price\n{title}\n\n') prd_price = '0' try: merchant = clean_text(r.html.find('#sellerProfile')[0].text) except Exception as e: n = e merchant = 'NA' timestamp = datetime.now() main = { 'name': title, 'price': prd_price, 'timestamp': timestamp, 'merchant': merchant, 'time': (datetime.now() - t1).total_seconds(), 'url': url, 'sku': sku, } data_list.append(main) except AttributeError: pass return data_list