Python Crawler.close Examples

Programming Language: Python

Namespace/Package Name: Crawler

Class/Type: Crawler

Method/Function: close

Examples at hotexamples.com: 5

Python Crawler.close - 5 examples found. These are the top rated real world Python examples of Crawler.Crawler.close extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Crawler(30)

crawl(15)

click(5)

close(4)

crawl_native(4)

getPage(3)

_process_next_url(2)

crawl_and_createfile(2)

add_to_dirlist(2)

crawl_multithread(2)

_process_html_link(2)

_process_html_asset(2)

_process_html(2)

save_crawler_data(2)

save_lists(2)

_make_request(2)

__init__(2)

render_sitemap(2)

crawling_process(1)

create_file(1)

create_view(1)

getCurrentPage(1)

getLinkStructure(1)

crawling(1)

crawl_own_albums(1)

Crawl(1)

getNextPage(1)

getPage2(1)

getTreeIndex(1)

getVisited(1)

hasNext(1)

join(1)

loadConf(1)

printLinkStructure(1)

process_q(1)

startCrawl(1)

startCrawling(1)

go(1)

crawl_index(1)

crawl_one(1)

baidu_search(1)

SLEEP_TIME(1)

URL_LIMIT(1)

_normalize_url(1)

_parse_url(1)

add(1)

addNewWorks(1)

add_target_full_profile(1)

add_target_short_profile(1)

all(1)

Example #1

Show file

File: driller.py Project: PlasmaIntec/CopyReviews

def load_more(url):
	crawler = Crawler()
	crawler.get(url)
	assert "Influencer Love | Fashion ID" in crawler.title, "TITLE INCORRECT"
	try:
		times_clicked = 0
		start = int(time())
		while True:
			# delete tab if we accidentally trip a twitter tab open
			if "Twitter" in crawler.getTitle():
				crawler.driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 'w')
				any_button = crawler.findElementsByXPath("//a")[0]
				any_button.send_keys(Keys.COMMAND + 'w')
				crawler.closeExtraTabs()
			
			# find load more button
			load_more_button = crawler.findElementByXPath("//a[@id='ctf-more']")
			crawler.highlight("//a[@id='ctf-more']")
			crawler.click(load_more_button)
			times_clicked += 1
			print('%s CLICKS' % times_clicked)
			crawler.closeExtraTabs()

	except Exception as e:
		print('EXCEPTION', e)
	crawler.close()
	end = int(time())
	print(start)
	print(end)
	print('TOTAL TIME ELAPSED: %s' % (end - start))

Example #2

Show file

def random_page(url):
    crawler = Crawler()
    crawler.get(url)
    assert "Urban Dictionary" in crawler.title, "TITLE INCORRECT"
    try:
        # find random page
        random_button = crawler.findElementByXPath(
            "//a[@class='circle-button' and @href='/random.php']")
        crawler.highlight(
            "//a[@class='circle-button' and @href='/random.php']")
        crawler.click(random_button)

        # extract content
        content = {}
        content["word"] = crawler.findElementByXPath(
            "(//a[@class='word'])[1]").text
        crawler.highlight("(//a[@class='word'])[1]")
        content["meaning"] = crawler.findElementByXPath(
            "(//div[@class='meaning'])[1]").text
        crawler.highlight("(//div[@class='meaning'])[1]")
        content_dict = dumps(content)
        return content_dict
    except:
        print('MISSING', e)
    crawler.close()

Example #3

Show file

File: CrawlerTest.py Project: Chartreusy/Chipmunk

class CrawlerTest(unittest.TestCase):
    def setUp(self):
        self.crawler = Crawler()

    def testNavigateAndRetrieveLinks(self):
        crawler = self.crawler
        crawler.navigate("http://www.google.ca")
        ret =  crawler.findNext(".*")
        for r in ret:
            print r



    def tearDown(self):
        self.crawler.close()

Example #4

Show file

File: ingestor.py Project: PlasmaIntec/CopyReviews

def get_reviews(url):
    posts = []
    crawler = Crawler()
    crawler.get(url)
    assert "Sunglasses" in crawler.title, "TITLE INCORRECT"
    try:
        close_banner = crawler.findElementByXPath(
            "//a[@class='next-dialog-close']")
        crawler.click(close_banner)

        product_details = crawler.findElementByXPath(
            "//div[@id='product-detail']")
        crawler.scrollTo(product_details)

        reviews_tab = crawler.findElementByXPath(
            "//div[@id='product-detail']//ul/li[@ae_object_type='feedback']")
        crawler.scrollTo(reviews_tab)
        crawler.highlight(
            "//div[@id='product-detail']//ul/li[@ae_object_type='feedback']")
        crawler.click(reviews_tab)

        crawler.switchFrameByXPath("//iframe[@id='product-evaluation']")

        photo_filter = crawler.findElementByXPath(
            "//label[text()[contains(.,'Photo')]]")
        crawler.click(photo_filter)
        crawler.highlight("//label[text()[contains(.,'Photo')]]")

        reviews = crawler.findElementsByXPath(
            "//div[@class='feedback-item clearfix']")
        for count, review in enumerate(reviews):
            post = {}
            post["text"] = review.find_element_by_xpath(".//dt/span").text
            print(post["text"])
            review_pic = review.find_element_by_xpath(".//img")
            post["src"] = review_pic.get_attribute("src")
            print(post["src"])
            post["file"] = f"{count}_review.png"
            urllib.request.urlretrieve(post["src"], post["file"])
            posts.append(post)
    except Exception as e:
        print('ERROR', e)
    crawler.close()
    return posts

Example #5

Show file

File: migrator.py Project: PlasmaIntec/CopyReviews

def write_reviews(url, reviews):
    crawler = Crawler()
    crawler.get(url)
    assert "Fall Animal Costumes" in crawler.title, "TITLE INCORRECT"
    for review in reviews:
        try:
            crawler.switchFrameByXPath("//iframe[@id='looxReviewsFrame']")

            write_review_button = crawler.findElementByXPath(
                "//button[@id='write']")
            crawler.scrollTo(write_review_button)
            crawler.highlight("//button[@id='write']")
            crawler.click(write_review_button)

            crawler.switchToParentFrame()

            crawler.switchFrameByXPath("//iframe[@id='looxDialog']")

            # rating review
            love_it_button = crawler.findElementByXPath(
                "//div/span[contains(text(), 'Love')]/..")
            crawler.highlight("//div/span[contains(text(), 'Love')]/..")
            crawler.click(love_it_button)

            # photo review
            upload_button = crawler.findPresentElementByXPath(
                "//input[@id='imageupload']")
            crawler.highlight("//label[contains(text(), 'Choose')]")
            crawler.click(upload_button)

            cwd = Path.cwd()
            file_name = f"{cwd}/{review['file']}"
            print(file_name)
            upload_button.send_keys(file_name)

            # generate user
            first_name = get_first_name(
                gender='male' if random() > .5 else 'female')
            last_name = get_last_name()
            email = f"{first_name}.{last_name}@gmail.com"

            # written review
            text_field = crawler.findElementByXPath("//textarea")
            crawler.highlight("//textarea")
            crawler.click(text_field)
            text_field.send_keys(review["text"])

            next_button = crawler.findElementByXPath(
                "//div[contains(text(),'Next')]")
            crawler.highlight("//div[contains(text(),'Next')]")
            crawler.click(next_button)

            first_name_field = crawler.findElementByXPath(
                "//input[@id='first_name']")
            crawler.highlight("//input[@id='first_name']")
            crawler.click(first_name_field)
            first_name_field.send_keys(first_name)
            print("FIRST NAME: ", first_name)

            last_name_field = crawler.findElementByXPath(
                "//input[@id='last_name']")
            crawler.highlight("//input[@id='last_name']")
            crawler.click(last_name_field)
            last_name_field.send_keys(last_name)
            print("LAST NAME: ", last_name)

            email_field = crawler.findElementByXPath("//input[@id='email']")
            crawler.highlight("//input[@id='email']")
            crawler.click(email_field)
            email_field.send_keys(email)

            # exit review
            done_button = crawler.findElementByXPath(
                "//button[contains(text(),'Done')]")
            crawler.highlight("//button[contains(text(),'Done')]")
            crawler.click(done_button)

            exit_button = crawler.findElementByXPath("//a[@id='close-elem']")
            crawler.highlight("//a[@id='close-elem']")
            crawler.click(exit_button)

            crawler.switchToParentFrame()
        except Exception as e:
            print('MISSING', e)
    crawler.close()