def test_header_about_submenu_company_redirect(self): about_link = self.getAboutLink() actions = chains(self.driver) actions.move_to_element(about_link).perform() company_link = self.getSubmenuAbout()[0] path = self.clickEvent(company_link) assert path == self.CONST.get( 'SUBMENU_COMPANY'), 'submenu COMPANY link is not working'
def test_header_about_submenu_testimonial_redirect(self): about_link = self.getAboutLink() actions = chains(self.driver) actions.move_to_element(about_link).perform() testimonial_link = self.getSubmenuAbout()[2] path = self.clickEvent(testimonial_link) assert path == self.CONST.get( 'SUBMENU_TESTIMONIALS'), 'submenu TESTIMONIALS link is not working'
def test_header_resources_submenu_agileTools_redirect(self): resources_link = self.getResourcesLink() actions = chains(self.driver) actions.move_to_element(resources_link).perform() agile_tools_link = self.getSubmenuResources()[3] path = self.clickEvent(agile_tools_link) assert path == self.CONST.get( 'SUBMENU_AGILE_TOOLS'), 'submenu AGILE TOOLS link is not working'
def test_header_about_submenu_careers_redirect(self): about_link = self.getAboutLink() actions = chains(self.driver) actions.move_to_element(about_link).perform() careers_link = self.getSubmenuAbout()[3] path = self.clickEvent(careers_link) assert path == self.CONST.get( 'SUBMENU_CAREERS'), 'submenu CAREERS link is not working'
def test_header_training_submenu_agileFramework_redirect(self): training_link = self.getTrainingLink() actions = chains(self.driver) actions.move_to_element(training_link).perform() agile_framework_link = self.getSubmenuTraining()[0] path = self.clickEvent(agile_framework_link) assert path == self.CONST.get( 'SUBMENU_AGILEFRAMEWORK' ), 'submenu AGILE FRAMEWORK link is not working'
def test_header_training_submenu_agileCulture_redirect(self): training_link = self.getTrainingLink() actions = chains(self.driver) actions.move_to_element(training_link).perform() agile_culture_link = self.getSubmenuTraining()[2] path = self.clickEvent(agile_culture_link) assert path == self.CONST.get( 'SUBMENU_AGILECULTURE' ), 'submenu AGILE CULTURE link is not working'
def test_header_training_submenu_agilepractices_redirect(self): training_link = self.getTrainingLink() actions = chains(self.driver) actions.move_to_element(training_link).perform() agile_practices_link = self.getSubmenuTraining()[1] path = self.clickEvent(agile_practices_link) assert path == self.CONST.get( 'SUBMENU_AGILEPRACTICES_TOOLS' ), 'submenu AGILE PRACTICES and tools link is not working'
def test_header_resources_submenu_scrumEvents_redirect(self): resources_link = self.getResourcesLink() actions = chains(self.driver) actions.move_to_element(resources_link).perform() scrum_events_link = self.getSubmenuResources()[0] path = self.clickEvent(scrum_events_link) assert path == self.CONST.get( 'SUBMENU_SCRUM_EVENTS' ), 'submenu SCRUM EVENTS PLAYBOOK link is not working'
def test_header_resources_submenu_agilePresentation_redirect(self): resources_link = self.getResourcesLink() actions = chains(self.driver) actions.move_to_element(resources_link).perform() agile_presentations_link = self.getSubmenuResources()[2] path = self.clickEvent(agile_presentations_link) assert path == self.CONST.get( 'SUBMENU_AGILE_PRESENTATIONS' ), 'submenu AGILE PRESENTATIONS link is not working'
def buscar_categoria(self, categoria, subcategoria): driver = self.driver actions = chains(driver) categorias = driver.find_element_by_class_name( "nav-menu-categories-link") actions.move_to_element(categorias).perform() subCat = driver.find_element_by_xpath('//a[contains(text(), "' + categoria + '")]') actions.move_to_element(subCat).perform() subCat2 = driver.find_element_by_xpath('//a[contains(text(), "' + subcategoria + '")]').click()
def test_header_trianing_submenu(self): training_link = self.getTrainingLink() actions = chains(self.driver) actions.move_to_element(training_link).perform() agile_framework_link, agile_practices_link, agile_culture_link = self.getSubmenuTraining( ) assert agile_framework_link.is_displayed( ), 'submenu AGILE FRAMEWORK is not being displayed' assert agile_practices_link.is_displayed( ), 'submenu AGILE PRACTICES is not being displayed' assert agile_culture_link.is_displayed( ), 'submenu AGILE CULTURE is not being displayed'
def test_header_about_submenu(self): about_link = self.getAboutLink() actions = chains(self.driver) actions.move_to_element(about_link).perform() company_link, partners_link, testimonials_link, careers_link = self.getSubmenuAbout( ) assert company_link.is_displayed( ), 'submenu COMPANY is not being displayed' assert partners_link.is_displayed( ), 'submenu PARTNERS is not being displayed' assert testimonials_link.is_displayed( ), 'submenu TESTIMONIALS is not being displayed' assert careers_link.is_displayed( ), 'submenu CAREERS is not being displayed'
def test_header_resources_submenu(self): resources_link = self.getResourcesLink() scrum_events_link, agile_videos_link, agile_presentations_link, agile_tools_link = self.getSubmenuResources( ) actions = chains(self.driver) actions.move_to_element(resources_link).perform() assert scrum_events_link.is_displayed( ), 'submenu SCRUM EVENTS PLAYBOOK is not being displayed' assert agile_videos_link.is_displayed( ), 'submenu AGILE VIDEOS is not being displayed' assert agile_presentations_link.is_displayed( ), 'submenu AGILE PRESENTATIONS is not being displayed' assert agile_tools_link.is_displayed( ), 'submenu AGILE TOOLS is not being displayed'
def fill_from(object, strAction): print("fill form") browser = object listinput = [] try: dropdownbox = browser.find_element_by_css_selector( "#clearing-house-dropdown-menu-button") dropdownbox.click() sleep(5) dropdownboxitems = browser.find_element_by_css_selector( "#participant-info-section > div:nth-child(1) > div > div.clearing-house.selector.show.open > div > a:nth-child(1)" ) dropdownboxitems.click() except: print('dropdownbox error') selects = browser.find_elements_by_tag_name("select") print('len(selects)') print(len(selects)) for arg in selects[0:]: try: Select(arg).select_by_index(1) except: print('Select error') labels = browser.find_elements_by_tag_name("label") print('len(labels)') print(len(labels)) for arg in labels[0:]: try: arg.click() except: print('labels') es = browser.find_elements_by_tag_name("input") print('len(input)') print(len(es)) for arg in es[0:]: try: if (arg.get_attribute('value') != ''): print(arg.get_attribute('value')) continue except Exception as argx: print(argx) if ((arg.get_attribute('placeholder') == 'Type your answer here...') and ((arg.get_attribute('aria-label') == 'Email Address') or (arg.get_attribute('type') == 'email'))): try: arg.send_keys('*****@*****.**') listinput.append('*****@*****.**') except: print(arg) elif (arg.get_attribute('placeholder') == 'Type your answer here...' ) and (arg.get_attribute('maxlength') == '50'): try: arg.send_keys('*****@*****.**') #listinput.append('*****@*****.**') except: print(arg) elif ( (arg.get_attribute('placeholder') == 'Type your answer here...') or (arg.get_attribute('placeholder') == 'Type telephone number here...') ) and ( (arg.get_attribute('aria-label') == 'Telephone Number') or (arg.get_attribute('aria-labelledby') == 'guideContainer-rootPanel-contactInformation-contactPersons-contact-phone___guideFieldShortDescription' )): try: arg.send_keys('123-1234-1234') listinput.append('123-1234-1234') except: print(arg) elif ( arg.get_attribute('placeholder') == 'Type your answer here...' ) and (arg.get_attribute( 'aria-labelledby' ) == 'guideContainer-rootPanel-contactinformationse-contactInformationFragment-confirmationDetailsWrapper-loginuserid___guideFieldShortDescription' ): try: arg.send_keys('BO_C88888') listinput.append('BO_C88888') except: print(arg) elif (arg.get_attribute('placeholder') == 'dd-mmm-yyyy') or (arg.get_attribute('placeholder') == 'dd-mmm-yy'): try: arg.send_keys( (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%d-%b-%Y")) listinput.append( (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%d-%b-%Y").lower()) except: print(arg) elif (arg.get_attribute('placeholder') == 'dd-mmm-yy'): try: arg.send_keys( (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%d-%b-%y")) listinput.append( (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%d-%b-%Y").lower()) except: print(arg) elif (arg.get_attribute('aria-label') == 'Stock Code'): try: arg.send_keys(600) sleep(3) menu = browser.find_element_by_xpath( "//*[@id=\"guideContainer-rootPanel-detailsOfFailedDeliveryPositionSection-detailsOfFailedDeliveryPositionFragment-stockCode__\"]/table/tbody/tr/td[2]" ) actions = chains(browser) #actions.move_to_element(menu) #sleep(3) actions.move_to_element(menu).click().perform() listinput.append(600) except Exception as e: print(e) elif (arg.get_attribute('aria-label') == 'Stock Name'): try: #arg.send_keys('CHINA INFRASTRUCTURE INVESTMENT LIMITED') listinput.append('CHINA INFRASTRUCTURE INVESTMENT LIMITED') except: print(arg) elif (arg.get_attribute( 'aria-labelledby' ) == 'guideContainer-rootPanel-detailsOfFailedDeliveryPositionSection-detailsOfFailedDeliveryPositionFragment-settlementPosNo___guideFieldShortDescription' ): try: arg.send_keys('P12345678') listinput.append('P12345678') except: print(arg) elif (arg.get_attribute( 'aria-labelledby' ) == 'guideContainer-rootPanel-detailsOfFailedDeliveryPositionSection-detailsOfFailedDeliveryPositionFragment-shareQuantityApplied___guideFieldShortDescription' ): try: arg.send_keys('12345678') listinput.append('12345678') except: print(arg) elif (arg.get_attribute('placeholder') == 'Type your answer here...'): try: #numberrx =random.randint(12345678901234567890,123456789012345678901234567890) arg.send_keys("12345678") listinput.append(arg.get_attribute('value')) except Exception as exa: print(exa) else: try: arg.send_keys('1') except: print(arg) textareas = browser.find_elements_by_tag_name("textarea") print('len(textareas)') print(len(textareas)) for arg in textareas[0:]: try: arg.send_keys( 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij' ) listinput.append(arg.get_attribute('value')) except: print('textareas error') selects = browser.find_elements_by_tag_name("select") print('len(selects)') print(len(selects)) for arg in selects[0:]: try: Select(arg).select_by_index(1) except: print('Select error') for arg in es[0:]: try: print(arg.get_attribute('aria-selected')) if arg.get_attribute('aria-selected') == 'true': listinput.append(arg.get_attribute('aria-label').lower()) print(arg.get_attribute('value')) except: print('radio') print(listinput) try: browser.find_element_by_id("preview-btn").click() sleep(3) browser.find_element_by_id("preview-btn").click() sleep(3) browser.find_element_by_id("preview-btn").click() sleep(5) except: print('preview error') try: buttons = browser.find_element_by_xpath("//button[text()='" + strAction.capitalize() + "']") buttons.click() sleep(3) vbuttons = browser.find_element_by_xpath("//button[text()='" + strAction.capitalize() + "']") vbuttons.click() sleep(3) except: print('action error ' + strAction) handles = browser.window_handles browser.switch_to.window(handles[0]) try: browser.find_element_by_id("message-text").send_keys(strAction) browser.find_element_by_css_selector( "div.modal-footer > button:nth-child(2)").click() except: print("cant click") return 'error' sleep(15) return listinput
def close_tab(self): chains(self.driver).key_down(Keys.CONTROL).send_keys('w').key_up( Keys.CONTROL).perform()
def parse(self, response): self.driver.get(response.url) self.driver.execute_script( "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});" ) self.driver.execute_script( "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})" ) self.driver.execute_script( "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};" ) self.sel = scrapy.Selector(text=self.driver.page_source) cat_list = self.sel.xpath( '//*[@id="leftMenu"]/div/div/div/ul/li/a/text()').extract() for ci, c in enumerate(cat_list): cat_list[ci] = c.strip() for catidx, category in enumerate(cat_list): if catidx < 1: continue xpath = '//*[@id="leftMenu"]/div/div/div/ul/li[{}]/a'.format( str(catidx + 1)) # click category try: actions = chains(self.driver) xpath = '//*[@id="leftMenu"]/div/div/div/ul/li[{}]/a'.format( str(catidx + 1)) cat = self.driver.find_element_by_xpath(xpath) actions.move_to_element(cat).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CLICKING color") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print('\n\n {} clicked\n\n'.format(category)) curr_page_num = 1 while True: # crawl items xpath = '//*[@id="productListDiv"]/div/div/ul/li' item_lists = self.sel.xpath(xpath).extract() for li_idx in range(len(item_lists)): curr_url = self.driver.current_url xpath = '//*[@id="productListDiv"]/div/div/ul/li[{}]/div/div/a/@href'.format( str(li_idx + 1)) script_res = self.sel.xpath(xpath).extract_first() script_res = script_res.split("'") productNo = script_res[1] if productNo in self.progress: self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) continue infwPathTp = script_res[3] infwPathNo = script_res[5] brand = self.sel.xpath( '//*[@id="productListDiv"]/div/div/ul/li[{}]/div[2]/div[1]/span/text()' .format(str(li_idx + 1))).extract_first() url_string = "productNo={}&infwPathTp={}&infwPathNo={}".format( productNo, infwPathTp, infwPathNo) url = 'http://fashion.sivillage.com/product/productDetail?' + url_string self.driver.get(url) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) # scrape page name = self.sel.xpath( '//*[@id="content"]/div[1]/div[2]/div[1]/div[4]/text()' ).extract_first() if name is None: self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) continue else: name = name.strip() prod_num = self.sel.xpath( '//*[@id="content"]/div[1]/div[2]/div[1]/div[2]/text()' ).extract_first().strip() print('\n\n\n\n\n{}\n\n\n\n\n'.format(prod_num)) if prod_num in self.progress: self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) continue price = self.sel.xpath( '//*[@id="content"]/div[1]/div[2]/div[1]/div[5]/span/text()' ).extract_first() prod_desc = self.sel.xpath( '//*[@id="content"]/div[1]/div[2]/div[6]/dl[1]/dd/div/div[1]/span[2]/text()' ).extract() if prod_desc is None: prod_desc = '' else: prod_desc = ' '.join(prod_desc) colors = self.sel.xpath( '//*[@id="content"]/div[1]/div[2]/div[4]/div/dl[1]/dd/ul/li/a/img/@alt' ).extract() color = {} for c_idx, c in enumerate(colors): if c not in color: color[c] = {'img_url': [], 'size': [], 'rel': []} else: continue # click color try: actions = chains(self.driver) xpath = '//*[@id="content"]/div[1]/div[2]/div[4]/div/dl[1]/dd/ul/li[{}]/a'.format( str(c_idx + 1)) col = self.driver.find_element_by_xpath(xpath) actions.move_to_element(col).perform() to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CLICKING color") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) size = self.sel.xpath( '//ul[@class="size"]/li/a[not(contains(@class,"disabled"))]/text()' ).extract() color[c]['size'] = size imgs = self.sel.xpath( '//*[@id="mImgDiv"]/div/a/img/@src').extract() color[c]['img_url'] = imgs # write progress __record_progress__(productNo) # write data __record_data__(url, category, brand, name, price, prod_num, prod_desc, color) # go back to main page self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) # click next page page_list = self.sel.xpath( '//div[@class="paging"]/a/text()').extract() print(page_list) next_page_num = curr_page_num + 1 if str(next_page_num) not in page_list: if 'Next' in page_list: # click next button xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no' prev_page_item = self.sel.xpath(xpath).extract_first() while True: xpath = '//div[@class="paging"]/a[contains(text(), "Next")]' try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING Next Button") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no' curr_item = self.sel.xpath(xpath).extract_first() if prev_page_item != curr_item: print( "\n\nNEXT BUTTON CLICKED: page now {}\n\n". format(str(curr_page_num + 1))) curr_page_num += 1 break else: # final page break else: # click next_page xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no' prev_page_item = self.sel.xpath(xpath).extract_first() while True: xpath = '//div[@class="paging"]/a[contains(text(), "{}")]'.format( str(next_page_num)) try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING next page") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no' curr_item = self.sel.xpath(xpath).extract_first() if prev_page_item != curr_item: print("\n\nNEXT PAGE CLICKED: page now {}\n\n". format(str(curr_page_num + 1))) curr_page_num += 1 break
def parse(self, response): self.driver.get(response.url) self.driver.execute_script( "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});" ) self.driver.execute_script( "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})" ) self.driver.execute_script( "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};" ) upper_class_index = [3, 4, 5, 6, 7] for upidx in upper_class_index: subclass_xpath = '//*[@id="cate_m_main"]/li[3]/div/div/ul/li[{}]/ul/li/a/text()'.format( str(upidx)) subclass_lists = response.xpath(subclass_xpath).extract() for subidx, subclass in enumerate(subclass_lists): xpath = '//*[@id="cate_m_main"]/li[3]/div/div/ul/li[{}]/ul/li[{}]/a'.format( str(upidx), str(subidx + 1)) # click sub category while True: # click sub category try: actions = chains(self.driver) man = self.driver.find_element_by_xpath( '//*[@id="cate_m_main"]/li[3]/a[@href="/ko/item/me"]' ) actions.move_to_element(man).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CLICKING {}".format(subclass)) continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) xpath = '//*[@id="bodyWrap"]/h3/span/a[3]/text()' check = self.sel.xpath(xpath).extract_first() if check == subclass: break print(subclass + ' clicked\n') last_page = False urls = [] while True: xpath = '//*[@id="listBody"]/li' lists = self.sel.xpath(xpath).extract() for li_idx in range(len(lists)): curr_url = self.driver.current_url xpath = '//*[@id="listBody"]/li[{}]/div/a[1]/@href'.format( str(li_idx + 1)) rel_url = self.sel.xpath(xpath).extract_first() url = urllib.parse.urljoin( 'http://www.thehandsome.com/', rel_url) self.driver.get(url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # check page_source changes #scrape page # category, brand, name, price, prod_num, prod_desc, color, size, img, url category = subclass brand = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/h4/a/span/span/text()' ).extract_first() name = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/h4/span/text()' ).extract_first().strip() price = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/p[1]/span/text()' ).extract_first()[1:] prod_num = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/p[2]/text()' ).extract_first().strip() if prod_num in self.progress: self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) continue xpath = '//*[@id="contentDiv"]/div[1]/div[1]/p[3]/text()' prod_desc = self.sel.xpath(xpath).extract_first() if prod_desc is None: prod_desc = '' else: prod_desc = prod_desc.strip() colors = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[3]/ul/li[1]/div/ul/li/a/@onmouseover' ).extract() color = {} # {'brand', 'name', 'product_num'} for c_idx, c in enumerate(colors): c = c.split("'")[1] if c not in color: color[c] = { 'img_url': [], 'size': [], 'rel': [] } else: continue # click color try: actions = chains(self.driver) xpath = '//*[@id="contentDiv"]/div[1]/div[3]/ul/li[1]/div/ul/li[{}]/a'.format( str(c_idx + 1)) col = self.driver.find_element_by_xpath(xpath) actions.move_to_element(col).perform() to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CLICKING color") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # size and img by each color size = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[3]/ul/li[2]/span[2]/ul/li/a[not(@class)]/text()' ).extract() color[c]['size'] = size imgs = self.sel.xpath( '//*[@id="imageDiv"]/ul/li/img/@src').extract( ) color[c]['img_url'] = imgs # check existance of relevant item try: xpath = '//div[@class="related_evt"]' self.driver.find_element_by_xpath(xpath) except NoSuchElementException: # continue to next color continue # go to relevant item item_li = self.sel.xpath( '//*[@id="referencesListContent"]/ul').extract( ) for itemidx in range(len(item_li)): try: actions = chains(self.driver) xpath = '//*[@id="referencesListContent"]/ul/li[{}]/a'.format( str(itemidx + 1)) item = self.driver.find_element_by_xpath( xpath) actions.move_to_element(item).perform() to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except NoSuchElementException: print( "\n\nERROR WHILE CLICKING relevant item" ) continue self.driver.execute_script( 'arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # get brand, name, product_num rel_brand = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/h4/a/span/span/text()' ).extract_first() rel_name = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/h4/span/text()' ).extract_first().strip() rel_prod_num = self.sel.xpath( '//*[@id="contentDiv"]/div[1]/div[1]/p[2]/text()' ).extract_first().strip() color[c]['rel'].append({ 'rel_brand': rel_brand, 'rel_name': rel_name, 'rel_prod_num': rel_prod_num }) # go back to original item self.driver.get(url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # write progress __record_progress__(prod_num) # write data __record_data__(url, category, brand, name, price, prod_num, prod_desc, color) # go back to main page self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # click next page xpath = '//*[@id="listBody"]/li[1]/div/a[1]/span[1]/img/@src' prev_page = self.sel.xpath(xpath).extract_first() cnt = 0 while True: xpath = '//*[@id="bodyWrap"]/div[1]/div[2]/a[3]' try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING next page") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) xpath = '//*[@id="listBody"]/li[1]/div/a[1]/span[1]/img/@src' check = self.sel.xpath(xpath).extract_first() if prev_page != check: break cnt += 1 if cnt > 4: last_page = True break print("NEXT PAGE CLICKED\n\n") if last_page == True: break
def hover_to_click_element(self, locator1, locator2): element = WDW(self.driver, 10).until(EC.presence_of_element_located(locator1)) chains(self.driver).move_to_element(element).perform() WDW(self.driver, 10).until(EC.visibility_of_element_located(locator2)).click()
def open_tab(self): chains(self.driver).key_down(Keys.CONTROL).send_keys('t').key_up( Keys.CONTROL).perform()
def hover_to_click(self, locator): element = WDW(self.driver, 10).until(EC.presence_of_element_located(locator)) chains(self.driver).move_to_element(element).click().perform()
Keys.ARROW_DOWN) driver.find_element_by_xpath("//input[@id='custCity']").send_keys(Keys.RETURN) time.sleep(3) driver.find_element_by_xpath("//input[@id='custAddress']").click() driver.find_element_by_xpath("//input[@id='custAddress']").send_keys("Madhav") time.sleep(3) driver.find_element_by_xpath("//input[@id='custAddress']").send_keys( Keys.ARROW_DOWN) driver.find_element_by_xpath("//input[@id='custAddress']").send_keys( Keys.RETURN) time.sleep(3) driver.find_element_by_xpath("//input[@id='buildOrderBtn']").click() time.sleep(3) #MouseOver Action Code actions = chains(driver) val1 = driver.find_element_by_xpath("//a[@id='customize_pizza_1']") actions.move_to_element(val1).perform() time.sleep(2) val2 = wait(driver, 10).until( EC.element_to_be_clickable((By.XPATH, "//a[@id='quick_add_1']"))) val2.click() #MouseOver Action End #Add Product to Cart End time.sleep(3) driver.find_element_by_xpath("//a[@id='checkout']").click() time.sleep(5) driver.find_element_by_xpath("//a[@id='crossButtonUpselling']").click() time.sleep(5) driver.find_element_by_xpath(
def parse(self, response): self.driver.get(response.url) self.driver.execute_script( "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});" ) self.driver.execute_script( "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})" ) self.driver.execute_script( "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};" ) self.sel = scrapy.Selector(text=self.driver.page_source) for i in range(2): if i != 0: continue target_idx = str(i + 4) try: actions = chains(self.driver) xpath = '//*[@id="category-list"]/li[{}]/a'.format(target_idx) upper_cat = self.driver.find_element_by_xpath(xpath) actions.move_to_element(upper_cat).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print("\n\nTimeoutExceptoin while clicking upper category {}". format(target_idx)) continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print('\n\n upper category {} clicked\n\n'.format(target_idx)) cat_list = self.sel.xpath( '//*[@id="category-list"]/li[{}]/ul/li/a/text()'.format( target_idx)).extract() cat_list = [cat.strip() for cat in cat_list] for catidx, cat in enumerate(cat_list): if catidx != 6: # sweater continue try: actions = chains(self.driver) xpath = '//*[@id="category-list"]/li[{}]/ul/li[{}]/a'.format( target_idx, str(catidx + 1)) category = self.driver.find_element_by_xpath(xpath) actions.move_to_element(category).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print("\n\nTimeoutExceptoin while clicking category {}". format(cat)) continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print('\n\n category {} clicked\n\n'.format(cat)) curr_page_num = 1 while True: # crawl items prod_list = self.sel.xpath( '//*[@id="wrap"]/div/div[1]/section/div[1]/div/figure/a/@href' ).extract() main_prod_page_url = self.driver.current_url for prod in prod_list: url = urljoin('https://www.ssense.com/', prod) if url in self.progress: print("\n\nProduct already in progress {}\n\n". format(url)) continue self.driver.get(url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # prod page prod_num = self.sel.xpath( '//span[@class="product-sku"]/text()' ).extract_first() if prod_num is None: prod_num = '' brand = self.sel.xpath( '//h1[@class="product-brand"]/a/text()' ).extract_first() name = self.sel.xpath( '//h2[@class="product-name"]/text()' ).extract_first() price = self.sel.xpath( '//span[@class="price"]/text()').extract_first() prod_desc = self.sel.xpath( '//p[@class="vspace1 product-description-text"]/span/text()' ).extract() prod_desc = ' '.join(prod_desc) prod_desc = prod_desc.replace("'", "") prod_desc = prod_desc.replace('"', '') color = {} c = '' imgs = self.sel.xpath( '//div[@class="product-images-container"]/div/div/div' ).extract() imgs = [im.split('"')[5] for im in imgs] size = self.sel.xpath( '//*[@id="size"]/option/text()').extract() size = [s.strip() for s in size][1:] size = [s.split()[0] for s in size] color[c] = {'img_url': imgs, 'size': size, 'rel': []} # rel page indicator = self.sel.xpath( '//div[@class="related-product-tab inline-block smartphone-portrait-narrow-full-width"]/a/span[1]/text()' ).extract_first() if indicator == 'Styled with': rels = self.sel.xpath( '//div[@class="related-product-container tab-container"]/div[2]/div/div/div/div/a/@href' ).extract() for rel_url in rels: rel_result = {} rel_url = urljoin('https://www.ssense.com/', rel_url) self.driver.get(rel_url) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) # rel prod page rel_result['rel_brand'] = self.sel.xpath( '//h1[@class="product-brand"]/a/text()' ).extract_first() rel_result['rel_name'] = self.sel.xpath( '//h2[@class="product-name"]/text()' ).extract_first() rel_prod_num = self.sel.xpath( '//span[@class="product-sku"]/text()' ).extract_first() if rel_prod_num is None: rel_prod_num = '' rel_result['rel_prod_num'] = rel_prod_num rel_prod_desc = self.sel.xpath( '//p[@class="vspace1 product-description-text"]/span/text()' ).extract() rel_prod_desc = ' '.join(rel_prod_desc) rel_prod_desc = rel_prod_desc.replace('"', '') rel_prod_desc = rel_prod_desc.replace("'", "") rel_result['rel_prod_desc'] = rel_prod_desc color[c]['rel'].append(rel_result) # write progress __record_progress__(url) # write data __record_data__(url, cat, brand, name, price, prod_num, prod_desc, color) # go back to main page self.driver.get(main_prod_page_url) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) # click next page page_list = self.sel.xpath( '//div[@class="span16 text-center"]/nav/ul/li/a/text()' ).extract() next_page_num = curr_page_num + 1 if str(next_page_num) not in page_list: # final page print("\n\nFinal Page\n\n") break else: # click next page prev_page_item = self.sel.xpath( '//*[@id="wrap"]/div/div[1]/section/div[1]/div/figure/a/@href' ).extract_first() break_item = False while True: xpath = '//div[@class="span16 text-center"]/nav/ul/li/a[contains(text(), "{}")]'.format( str(next_page_num)) try: element_visible = True actions = chains(self.driver) visible_cnt = 0 while True: try: clickable = self.driver.find_element_by_xpath( xpath) break except NoSuchElementException: try: WebDriverWait( self.driver, 120 ).until( EC.presence_of_element_located( (By.XPATH, xpath))) continue except TimeoutException: print( "\n\nElement Not Visible.\n\n") if visible_cnt < 3: visible_cnt += 1 continue else: element_visible = False break if not element_visible: break_item = True print("\n\nMove on to Next Item...\n\n") break actions.move_to_element(clickable).perform() to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING next page") continue while True: try: self.driver.execute_script( 'arguments[0].click()', to_be_clicked) break except StaleElementReferenceException: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) continue __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) curr_item = self.sel.xpath( '//*[@id="wrap"]/div/div[1]/section/div[1]/div/figure/a/@href' ).extract_first() print('\n\n{}\n\n'.format(curr_item)) if prev_page_item != curr_item: print("\n\nNEXT PAGE CLICKED: page now {}\n\n". format(str(next_page_num))) curr_page_num += 1 break if break_item: break
def click_out(self): chains(self.driver).move_by_offset(50, 50).click().perform()
def parse(self, response): self.driver.get(response.url) self.driver.execute_script( "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});" ) self.driver.execute_script( "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})" ) self.driver.execute_script( "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};" ) self.sel = scrapy.Selector(text=self.driver.page_source) cat_list = self.sel.xpath( '/html/body/div[3]/nav/ul[1]/li[2]/ul/li/a/text()').extract() for catidx, category in enumerate(cat_list): if catidx < 9: # skip 'All' continue # click category try: actions = chains(self.driver) xpath = '/html/body/div[3]/nav/ul[1]/li[2]/ul/li[{}]/a'.format( str(catidx + 1)) cat = self.driver.find_element_by_xpath(xpath) actions.move_to_element(cat).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print("\n\nTimeoutExceptoin while clicking category {}".format( category)) continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print('\n\n {} clicked\n\n'.format(category)) curr_page_num = 1 while True: # crawl items item_lists = self.sel.xpath( '//*[@id="dspGood"]/li/@data-prdno').extract() for li_idx, prod_num in enumerate(item_lists): curr_url = self.driver.current_url xpath = '//*[@id="dspGood"]/li[{}]/a/@onclick'.format( str(li_idx + 1)) script_res = self.sel.xpath(xpath).extract_first().split( "'") brand = script_res[1] prod_num = script_res[3] if prod_num in self.progress: continue url = "http://www.ssfshop.com/{}/{}/good?dspCtgryNo=&brandShopNo=&brndShopId=".format( brand, prod_num) print('\n\n\n\n{}\n\n\n\n'.format(url)) while True: try: self.driver.get(url) break except TimeoutException: print(curr_url) try: self.driver.get(curr_url) break except TimeoutException: continue __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) # scrape page brand = self.sel.xpath( '/html/body/div[3]/div[1]/section[2]/div[1]/div[2]/h3/a/text()' ).extract_first().strip().split('\xa0')[0] name = self.sel.xpath( '/html/body/div[3]/div[1]/section[2]/div[1]/div[2]/h1/text()' ).extract()[-1].strip() price = self.sel.xpath( '/html/body/div[3]/div[1]/section[2]/div[1]/div[2]/div[1]/em/text()' ).extract_first() if '\xa0' in price: price = price.strip('\xa0') prod_desc = self.sel.xpath( '//*[@id="about"]/div/text()').extract() if prod_desc is None: prod_desc = '' else: prod_desc = (' '.join(prod_desc)).strip() color = {} c = '' color[c] = {'img_url': [], 'size': [], 'rel': []} size_list = self.sel.xpath( '//div[@class="option"]/ul/li/a/em/text()').extract() size = [] for idx, s in enumerate(size_list): if '(' in s.strip() or ')' in s.strip(): continue size.append(s.strip().strip('/').strip()) img_script_list = self.sel.xpath( '/html/body/div[3]/div[1]/section[2]/div[1]/div[1]/script/text()' ).extract() imgs = [] for img_script in img_script_list: temp = img_script.split('\\"') if len(temp) == 1: continue else: imgs.append(temp[1]) color[c]['img_url'] = imgs color[c]['size'] = size # write progress __record_progress__(prod_num) # write data __record_data__(url, category, brand, name, price, prod_num, prod_desc, color) # go back to main page self.driver.get(curr_url) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) # click next page page_list = self.sel.xpath( '//div[@id="pagingArea"]/a/text()').extract() next_page_num = curr_page_num + 1 if str(next_page_num) not in page_list: next_btns_list = self.sel.xpath( '//div[@id="pagingArea"]/a/@alt').extract() if '다음페이지' in next_btns_list: # click next button prev_page_item = self.sel.xpath( '//*[@id="dspGood"]/li[1]/a/img/@src' ).extract_first() while True: xpath = '//div[@id="pagingArea"]/a[contains(@alt, "{}")]'.format( '다음페이지') try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING Next Button") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) curr_item = self.sel.xpath( '//div[@id="pagingArea"]/a/text()' ).extract_first() if prev_page_item != curr_item: print( "\n\n\n\n\nNEXT BUTTON CLICKED: page now {}\n\n" .format(str(curr_page_num + 1))) curr_page_num += 1 break else: # final page break else: # click next page prev_page_item = self.sel.xpath( '//*[@id="dspGood"]/li[1]/a/img/@src').extract_first() while True: xpath = '//div[@id="pagingArea"]/a[contains(text(), "{}")]'.format( str(next_page_num)) try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING next page") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) curr_item = self.sel.xpath( '//*[@id="dspGood"]/li[1]/a/img/@src' ).extract_first() if prev_page_item != curr_item: print( "\n\n\n\n\nNEXT PAGE CLICKED: page now {}\n\n". format(str(next_page_num))) curr_page_num += 1 break
def parse(self, response): self.driver.get(response.url) self.driver.execute_script( "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});" ) self.driver.execute_script( "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})" ) self.driver.execute_script( "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};" ) self.sel = scrapy.Selector(text=self.driver.page_source) caturls = self.sel.xpath( '//ul[@class="cmenu"]/li/ul/li/a/@href').extract() catlist = self.sel.xpath( '//ul[@class="cmenu"]/li/ul/li/a/text()').extract() for catidx, cat in enumerate(catlist): category = cat.strip() url = urljoin('http://www.thehyundai.com/', caturls[catidx]) self.driver.get(url) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print("\n\n{} clicked\n\n".format(category)) # brands brand_lists = self.sel.xpath( '//ul[@class="brand-list-wrap"]/li/a/text()').extract() for brandidx, brand in enumerate(brand_lists): brand = brand.strip().split("(")[0].strip() # check if 'load more' element exists elment_exists = False try: xpath = '//a[@class="btn-more-tog"]' self.driver.find_element_by_xpath(xpath) element_exists = True except NoSuchElementException: element_exists = False if element_exists == True: load_more_text = self.sel.xpath( '//a[@class="btn-more-tog"]/text()').extract_first() if load_more_text == '더보기': element_exists = True else: element_exists = False if element_exists == True: # click try: actions = chains(self.driver) xpath = '//a[@class="btn-more-tog"]' load_more = self.driver.find_element_by_xpath(xpath) actions.move_to_element(load_more).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print( "\n\nTimeoutExceptoin while clicking {} load more". format(category)) continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print('\n\n {} load more clicked\n\n'.format(category)) # click brand try: actions = chains(self.driver) if element_exists: xpath = '//ul[@class="brand-list-wrap open"]/li[{}]/a'.format( str(brandidx + 1)) else: xpath = '//ul[@class="brand-list-wrap"]/li[{}]/a'.format( str(brandidx + 1)) brand_click = self.driver.find_element_by_xpath(xpath) actions.move_to_element(brand_click).perform() to_be_clicked = WebDriverWait(self.driver, 120).until( EC.element_to_be_clickable((By.XPATH, xpath))) except TimeoutException: print("\n\nTimeoutExceptoin while clicking {} ".format( brand)) continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector(text=self.driver.page_source) print('\n\n {} clicked\n\n'.format(brand)) curr_page_num = 1 while True: # fetch product data prod_urls = self.sel.xpath( '//ul[@class="product-list type1"]/li/div/div[1]/a/@href' ).extract() names = self.sel.xpath( '//ul[@class="product-list type1"]/li/div/div[1]/a/img/@alt' ).extract() imgs = self.sel.xpath( '//ul[@class="product-list type1"]/li/div/div[1]/a/img/@src' ).extract() prices = self.sel.xpath( '//ul[@class="product-list type1"]/li/div/div[2]/div/span[1]/text()' ).extract() for prod_idx, produ in enumerate(prod_urls): prod_url = urljoin('https://www.thehyundai.com/', produ) name = names[prod_idx] img = imgs[prod_idx] prod_desc = '' prod_num = '' price = prices[prod_idx] color = {} c = '' color[c] = {'img_url': img, 'size': '', 'rel': []} # write progress __record_progress__(prod_url) # write data __record_data__(prod_url, category, brand, name, price, prod_num, prod_desc, color) # next page page_list = self.sel.xpath( '//*[@id="container"]/div[2]/div/div[2]/ul/li/a/text()' ).extract() next_page_num = curr_page_num + 1 if str(next_page_num) not in page_list: next_button_exists = False try: xpath = '//*[@id="container"]/div[2]/div/div[2]/ul/li/a[@class="direction next_1"]' self.driver.find_element_by_xpath(xpath) next_button_exists = True except NoSuchElementException: next_button_exists = False if next_button_exists: # click next button prev_page_item = self.sel.xpath( '//*[@id="product-list"]/li[1]/div/div[2]/a/text()' ).extract_first() while True: xpath = '//*[@id="container"]/div[2]/div/div[2]/ul/li/a[@class="direction next_1"]' try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print( "\n\nERROR WHILE CILCKING Next Button after {}" .format(str(curr_page_num))) continue self.driver.execute_script( 'arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) curr_item = self.sel.xpath( '//*[@id="product-list"]/li[1]/div/div[2]/a/text()' ).extract_first() if prev_page_item != curr_item: print( "\n\nNEXT BUTTON CLICKED: page now {}\n\n" .format(str(curr_page_num + 1))) curr_page_num += 1 break else: # final page break else: # click next page prev_page_item = self.sel.xpath( '//*[@id="product-list"]/li[1]/div/div[2]/a/text()' ).extract_first() while True: xpath = '//*[@id="container"]/div[2]/div/div[2]/ul/li/a[contains(text(), "{}")]'.format( str(next_page_num)) try: to_be_clicked = WebDriverWait( self.driver, 120).until( EC.element_to_be_clickable( (By.XPATH, xpath))) except TimeoutException: print("\n\nERROR WHILE CILCKING next page") continue self.driver.execute_script('arguments[0].click()', to_be_clicked) __delay_time__() self.sel = scrapy.Selector( text=self.driver.page_source) curr_item = self.sel.xpath( '//*[@id="product-list"]/li[1]/div/div[2]/a/text()' ).extract_first() if prev_page_item != curr_item: print("\n\nNEXT PAGE CLICKED: page now {}\n\n". format(str(next_page_num))) curr_page_num += 1 break