def click_element(self, keyword): try: self.crawler.find_element_by_css_selector( self.selectors[keyword]).click() except NoSuchElementException: pass rand_delay(1, 3)
def parse_sub_category(self, category_url): self.crawler.get(category_url) cur_url = self.crawler.current_url rand_delay(2, 3) try: print('Processing', cur_url) min_qty = self.crawler.find_element_by_css_selector( '[data-atag="tr-minQty"] > span > div:last-child').text print('min_qty', min_qty) if min_qty == 'Non-Stock': return [] except NoSuchElementException: pass final_urls = [] if 'filter' in cur_url: final_urls += cur_url.split('?')[0:1] return final_urls elif 'products/detail' in cur_url: return [] else: a_elems = self.crawler.find_elements_by_css_selector( '[data-testid="subcategories-items"]') subcategory_urls = self.join_urls(a_elems) for url in subcategory_urls: urls = self.parse_sub_category(url) final_urls += urls return final_urls
def dkpn_sort_asc(self): rand_delay(1, 3) pos_offset = 900 self.crawler.execute_script(f"window.scrollTo(0, {pos_offset});") sort_asc = self.element_to_be_clickable( self.selectors['dkpn-sort-asc']) sort_asc.click()
def click_download(self): popup_trigger = self.element_to_be_clickable( self.selectors['popup-trigger']) popup_trigger.click() download_table_button = self.element_to_be_clickable( self.selectors['download']) download_table_button.click() rand_delay(8, 9)
def crawl(self): self.crawler.get(self.start_url) rand_delay(5, 10) self.scroll_to_bottom() a_elems = self.crawler.find_elements_by_css_selector( '[data-testid="subcategories-items"]') subcategory_urls = self.join_urls(a_elems) rand_delay(2, 5) return subcategory_urls
def click_next_page(self): btn_next_pages = self.crawler.find_elements_by_css_selector( self.selectors['next-page']) btn_next_pages += self.crawler.find_elements_by_css_selector( self.selectors['next-page-alt']) for btn_next_page in btn_next_pages: try: btn_next_page.click() break except ElementClickInterceptedException: pass rand_delay(5, 8)
def scroll_to_bottom(self): offset = 0 while True: old_y_offset = self.crawler.execute_script( "return window.pageYOffset;") offset += 900 self.crawler.execute_script(f"window.scrollTo(0, {offset});") rand_delay(2, 4) new_y_offset = self.crawler.execute_script( "return window.pageYOffset;") if old_y_offset == new_y_offset: break
def get_max_page(self): rand_delay(15, 20) max_page = self.crawler.find_element_by_css_selector( self.selectors['max-page']).text max_page = int(max_page.split('/')[-1]) return max_page
def scroll_up_down(self): pos_offset = 200 neg_offset = -200 for offset in [pos_offset, neg_offset]: self.crawler.execute_script(f"window.scrollTo(0, {offset});") rand_delay(0, 1)
def get_cur_page(self): rand_delay(1, 2) cur_page = int( self.crawler.find_element_by_css_selector( self.selectors['cur-page']).get_attribute('value')) return cur_page