Beispiel #1
0
    def setup_crawler(self, supermarket, reactor_control):
        """Set up the Scrapy crawler. 
        See http://doc.scrapy.org/en/latest/topics/practices.html#run-scrapy-from-a-script.
        
        Keyword arguments:
        supermarket -- the supermarket whose crawler should be set up
        """

        cachefile = supermarket_filename(supermarket)
        if isfile(cachefile):
            remove(cachefile)

        settings = get_project_settings()

        url = supermarket_url(supermarket)
        settings.set('FEED_URI', supermarket_filename(supermarket))

        spider = MySupermarketSpider(url)
        crawler = Crawler(settings)
        crawler.signals.connect(reactor_control.remove_crawler,
                                signal=signals.spider_closed)
        crawler.configure()
        crawler.crawl(spider)
        crawler.start()
        reactor_control.add_crawler()
Beispiel #2
0
    def setup_crawler(self, supermarket, reactor_control):
        """Set up the Scrapy crawler. 
        See http://doc.scrapy.org/en/latest/topics/practices.html#run-scrapy-from-a-script.
        
        Keyword arguments:
        supermarket -- the supermarket whose crawler should be set up
        """
        
        cachefile = supermarket_filename(supermarket)
        if isfile(cachefile):
            remove(cachefile)
            
        settings = get_project_settings()

        url = supermarket_url(supermarket)
        settings.set('FEED_URI', supermarket_filename(supermarket))

        spider = MySupermarketSpider(url)
        crawler = Crawler(settings)
        crawler.signals.connect(reactor_control.remove_crawler, signal=signals.spider_closed)
        crawler.configure()
        crawler.crawl(spider)
        crawler.start()
        reactor_control.add_crawler()
Beispiel #3
0
    def cache_exists(self, supermarket):
        """Check whether a JSON file already exists for data scraped from
        the given supermarket, and if so, whether it was created today.
        Note that 'created today' is not the same as 'age < 24 hours'. Prices
        are assumed to change overnight so a cachefile created at 9pm
        yesterday is considered out of date at 9am today (but a cachefile
        created at 9am is not out of date at 9pm).

        Keyword arguments:
        supermarket -- the supermarket whose cachefile should be checked
        """
        cachefile = supermarket_filename(supermarket)
        if not isfile(cachefile):
            return False

        mtime = datetime.fromtimestamp(getmtime(cachefile))
        now = datetime.fromtimestamp(time())
        return mtime.day == now.day
Beispiel #4
0
    def cache_exists(self, supermarket):
        """Check whether a JSON file already exists for data scraped from
        the given supermarket, and if so, whether it was created today.
        Note that 'created today' is not the same as 'age < 24 hours'. Prices
        are assumed to change overnight so a cachefile created at 9pm
        yesterday is considered out of date at 9am today (but a cachefile
        created at 9am is not out of date at 9pm).

        Keyword arguments:
        supermarket -- the supermarket whose cachefile should be checked
        """
        cachefile = supermarket_filename(supermarket)
        if not isfile(cachefile):
            return False

        mtime = datetime.fromtimestamp(getmtime(cachefile))
        now = datetime.fromtimestamp(time())
        return mtime.day == now.day
Beispiel #5
0
def run():
    """Main method.
    Check which supermarkets were requested, create a scraper, then search the 
    scraped data.
    """
    (options, args) = parse_args()
    if (options.all):
        supermarkets = supermarket_names()
    else:
        supermarkets = [options.supermarket]

    scraper = CachingScraper(supermarkets, options.force_refresh)
    log.start()
    scraper.get_data()

    search_phrases = []
    for line in fileinput.input(args):
        search_phrases.append(line.split())

    for supermarket in supermarkets:
        log.msg("*** Savvy buys in %s ***" % supermarket.upper())
        search_file(search_phrases, supermarket_filename(supermarket))