def load_more(url): crawler = Crawler() crawler.get(url) assert "Influencer Love | Fashion ID" in crawler.title, "TITLE INCORRECT" try: times_clicked = 0 start = int(time()) while True: # delete tab if we accidentally trip a twitter tab open if "Twitter" in crawler.getTitle(): crawler.driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 'w') any_button = crawler.findElementsByXPath("//a")[0] any_button.send_keys(Keys.COMMAND + 'w') crawler.closeExtraTabs() # find load more button load_more_button = crawler.findElementByXPath("//a[@id='ctf-more']") crawler.highlight("//a[@id='ctf-more']") crawler.click(load_more_button) times_clicked += 1 print('%s CLICKS' % times_clicked) crawler.closeExtraTabs() except Exception as e: print('EXCEPTION', e) crawler.close() end = int(time()) print(start) print(end) print('TOTAL TIME ELAPSED: %s' % (end - start))
def random_page(url): crawler = Crawler() crawler.get(url) assert "Urban Dictionary" in crawler.title, "TITLE INCORRECT" try: # find random page random_button = crawler.findElementByXPath( "//a[@class='circle-button' and @href='/random.php']") crawler.highlight( "//a[@class='circle-button' and @href='/random.php']") crawler.click(random_button) # extract content content = {} content["word"] = crawler.findElementByXPath( "(//a[@class='word'])[1]").text crawler.highlight("(//a[@class='word'])[1]") content["meaning"] = crawler.findElementByXPath( "(//div[@class='meaning'])[1]").text crawler.highlight("(//div[@class='meaning'])[1]") content_dict = dumps(content) return content_dict except: print('MISSING', e) crawler.close()
class CrawlerTest(unittest.TestCase): def setUp(self): self.crawler = Crawler() def testNavigateAndRetrieveLinks(self): crawler = self.crawler crawler.navigate("http://www.google.ca") ret = crawler.findNext(".*") for r in ret: print r def tearDown(self): self.crawler.close()
def get_reviews(url): posts = [] crawler = Crawler() crawler.get(url) assert "Sunglasses" in crawler.title, "TITLE INCORRECT" try: close_banner = crawler.findElementByXPath( "//a[@class='next-dialog-close']") crawler.click(close_banner) product_details = crawler.findElementByXPath( "//div[@id='product-detail']") crawler.scrollTo(product_details) reviews_tab = crawler.findElementByXPath( "//div[@id='product-detail']//ul/li[@ae_object_type='feedback']") crawler.scrollTo(reviews_tab) crawler.highlight( "//div[@id='product-detail']//ul/li[@ae_object_type='feedback']") crawler.click(reviews_tab) crawler.switchFrameByXPath("//iframe[@id='product-evaluation']") photo_filter = crawler.findElementByXPath( "//label[text()[contains(.,'Photo')]]") crawler.click(photo_filter) crawler.highlight("//label[text()[contains(.,'Photo')]]") reviews = crawler.findElementsByXPath( "//div[@class='feedback-item clearfix']") for count, review in enumerate(reviews): post = {} post["text"] = review.find_element_by_xpath(".//dt/span").text print(post["text"]) review_pic = review.find_element_by_xpath(".//img") post["src"] = review_pic.get_attribute("src") print(post["src"]) post["file"] = f"{count}_review.png" urllib.request.urlretrieve(post["src"], post["file"]) posts.append(post) except Exception as e: print('ERROR', e) crawler.close() return posts
def write_reviews(url, reviews): crawler = Crawler() crawler.get(url) assert "Fall Animal Costumes" in crawler.title, "TITLE INCORRECT" for review in reviews: try: crawler.switchFrameByXPath("//iframe[@id='looxReviewsFrame']") write_review_button = crawler.findElementByXPath( "//button[@id='write']") crawler.scrollTo(write_review_button) crawler.highlight("//button[@id='write']") crawler.click(write_review_button) crawler.switchToParentFrame() crawler.switchFrameByXPath("//iframe[@id='looxDialog']") # rating review love_it_button = crawler.findElementByXPath( "//div/span[contains(text(), 'Love')]/..") crawler.highlight("//div/span[contains(text(), 'Love')]/..") crawler.click(love_it_button) # photo review upload_button = crawler.findPresentElementByXPath( "//input[@id='imageupload']") crawler.highlight("//label[contains(text(), 'Choose')]") crawler.click(upload_button) cwd = Path.cwd() file_name = f"{cwd}/{review['file']}" print(file_name) upload_button.send_keys(file_name) # generate user first_name = get_first_name( gender='male' if random() > .5 else 'female') last_name = get_last_name() email = f"{first_name}.{last_name}@gmail.com" # written review text_field = crawler.findElementByXPath("//textarea") crawler.highlight("//textarea") crawler.click(text_field) text_field.send_keys(review["text"]) next_button = crawler.findElementByXPath( "//div[contains(text(),'Next')]") crawler.highlight("//div[contains(text(),'Next')]") crawler.click(next_button) first_name_field = crawler.findElementByXPath( "//input[@id='first_name']") crawler.highlight("//input[@id='first_name']") crawler.click(first_name_field) first_name_field.send_keys(first_name) print("FIRST NAME: ", first_name) last_name_field = crawler.findElementByXPath( "//input[@id='last_name']") crawler.highlight("//input[@id='last_name']") crawler.click(last_name_field) last_name_field.send_keys(last_name) print("LAST NAME: ", last_name) email_field = crawler.findElementByXPath("//input[@id='email']") crawler.highlight("//input[@id='email']") crawler.click(email_field) email_field.send_keys(email) # exit review done_button = crawler.findElementByXPath( "//button[contains(text(),'Done')]") crawler.highlight("//button[contains(text(),'Done')]") crawler.click(done_button) exit_button = crawler.findElementByXPath("//a[@id='close-elem']") crawler.highlight("//a[@id='close-elem']") crawler.click(exit_button) crawler.switchToParentFrame() except Exception as e: print('MISSING', e) crawler.close()