Esempio n. 1
0
 def click_element(self, keyword):
     try:
         self.crawler.find_element_by_css_selector(
             self.selectors[keyword]).click()
     except NoSuchElementException:
         pass
     rand_delay(1, 3)
    def parse_sub_category(self, category_url):
        self.crawler.get(category_url)
        cur_url = self.crawler.current_url
        rand_delay(2, 3)

        try:
            print('Processing', cur_url)
            min_qty = self.crawler.find_element_by_css_selector(
                '[data-atag="tr-minQty"] > span > div:last-child').text
            print('min_qty', min_qty)
            if min_qty == 'Non-Stock':
                return []
        except NoSuchElementException:
            pass

        final_urls = []
        if 'filter' in cur_url:
            final_urls += cur_url.split('?')[0:1]
            return final_urls
        elif 'products/detail' in cur_url:
            return []
        else:
            a_elems = self.crawler.find_elements_by_css_selector(
                '[data-testid="subcategories-items"]')
            subcategory_urls = self.join_urls(a_elems)
            for url in subcategory_urls:
                urls = self.parse_sub_category(url)
                final_urls += urls
            return final_urls
Esempio n. 3
0
 def dkpn_sort_asc(self):
     rand_delay(1, 3)
     pos_offset = 900
     self.crawler.execute_script(f"window.scrollTo(0, {pos_offset});")
     sort_asc = self.element_to_be_clickable(
         self.selectors['dkpn-sort-asc'])
     sort_asc.click()
Esempio n. 4
0
 def click_download(self):
     popup_trigger = self.element_to_be_clickable(
         self.selectors['popup-trigger'])
     popup_trigger.click()
     download_table_button = self.element_to_be_clickable(
         self.selectors['download'])
     download_table_button.click()
     rand_delay(8, 9)
 def crawl(self):
     self.crawler.get(self.start_url)
     rand_delay(5, 10)
     self.scroll_to_bottom()
     a_elems = self.crawler.find_elements_by_css_selector(
         '[data-testid="subcategories-items"]')
     subcategory_urls = self.join_urls(a_elems)
     rand_delay(2, 5)
     return subcategory_urls
Esempio n. 6
0
 def click_next_page(self):
     btn_next_pages = self.crawler.find_elements_by_css_selector(
         self.selectors['next-page'])
     btn_next_pages += self.crawler.find_elements_by_css_selector(
         self.selectors['next-page-alt'])
     for btn_next_page in btn_next_pages:
         try:
             btn_next_page.click()
             break
         except ElementClickInterceptedException:
             pass
     rand_delay(5, 8)
Esempio n. 7
0
 def scroll_to_bottom(self):
     offset = 0
     while True:
         old_y_offset = self.crawler.execute_script(
             "return window.pageYOffset;")
         offset += 900
         self.crawler.execute_script(f"window.scrollTo(0, {offset});")
         rand_delay(2, 4)
         new_y_offset = self.crawler.execute_script(
             "return window.pageYOffset;")
         if old_y_offset == new_y_offset:
             break
Esempio n. 8
0
 def get_max_page(self):
     rand_delay(15, 20)
     max_page = self.crawler.find_element_by_css_selector(
         self.selectors['max-page']).text
     max_page = int(max_page.split('/')[-1])
     return max_page
Esempio n. 9
0
 def scroll_up_down(self):
     pos_offset = 200
     neg_offset = -200
     for offset in [pos_offset, neg_offset]:
         self.crawler.execute_script(f"window.scrollTo(0, {offset});")
         rand_delay(0, 1)
Esempio n. 10
0
 def get_cur_page(self):
     rand_delay(1, 2)
     cur_page = int(
         self.crawler.find_element_by_css_selector(
             self.selectors['cur-page']).get_attribute('value'))
     return cur_page