if self.cache_manager:
                        self.cache_manager.cache_results(scrape.parser, scrape.query, scrape.search_engine_name, scrape.scrape_method,
                                      scrape.page_number)

                    if scrape.parser:
                        serp = parse_serp(self.config, parser=scrape.parser, scraper=scrape, query=scrape.query)

                        if self.scraper_search:
                            self.scraper_search.serps.append(serp)

                        if self.session:
                            self.session.add(serp)
                            self.session.commit()

                        store_serp_result(serp, self.config)
        print("----------------------------end def run(self):-----------------------------")


if __name__ == '__main__':
    from GoogleScraper.config import get_config
    from GoogleScraper.scrape_jobs import default_scrape_jobs_for_keywords

    some_words = get_some_words(n=1)

    cfg = get_config()
    scrape_jobs = list(default_scrape_jobs_for_keywords(some_words, ['bing'], 'http-async', 1))

    manager = AsyncScrapeScheduler(cfg, scrape_jobs)
    manager.run()
Beispiel #2
0
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import random
import pprint
from GoogleScraper.utils import get_some_words


def random_word():
    return random.choice(words)

words = get_some_words(n=100)
pprint.pprint(words)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from GoogleScraper import scrape_with_config, GoogleSearchError
from GoogleScraper.utils import get_some_words

keywords = get_some_words(10)
with open("keywords.txt", "wt") as f:
    for word in keywords:
        f.write(word + "\n")

# See in the config.cfg file for possible values
config = {
    "use_own_ip": True,
    "keyword_file": "keywords.txt",
    "search_engines": ["bing", "duckduckgo"],
    "num_pages_for_keyword": 2,
    "scrape_method": "http-async",
    "do_caching": True,
    "output_filename": "out.csv",
}

try:
    search = scrape_with_config(config)
except GoogleSearchError as e:
    print(e)

# let's inspect what we got. Get the last search:
for serp in search.serps:
    print(serp)
    for link in serp.links:
Beispiel #4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from GoogleScraper import scrape_with_config, GoogleSearchError
from GoogleScraper.utils import get_some_words

keywords = get_some_words(10)
with open('keywords.txt', 'wt') as f:
    for word in keywords:
        f.write(word + '\n')

# See in the config.cfg file for possible values
config = {
    'use_own_ip': True,
    'keyword_file': 'keywords.txt',
    'search_engines': ['bing', 'duckduckgo'],
    'num_pages_for_keyword': 2,
    'scrape_method': 'http-async',
    'do_caching': True,
    'output_filename': 'out.csv',
}

try:
    search = scrape_with_config(config)
except GoogleSearchError as e:
    print(e)

# let's inspect what we got. Get the last search:
for serp in search.serps:
    print(serp)
    for link in serp.links: