Esempio n. 1
0
    def testCrawlJobsScheduledChecked(self):
        """
        tests out the mark_job_scheduled and mark_job_checked logic
        """
        urls = [u"http://feeds.feedburner.com/43folders",
                u"http://advocacy.python.org/podcasts/littlebit.rss",
                u"http://friendfeed.com/alawrence?format=atom",
                u"http://feeds.feedburner.com/antiwar"]

        # indicate url is scheduled to be crawled
        with transaction.manager:
            for url in urls:
                rec = scheduler.mark_job_scheduled(url)
                self.assert_(rec, "no rec for url %s" % url)

        recs = [r for r in \
                meta.Session().query(scheduler.CrawlJobModel).all()]
        self.assert_(len(recs) == len(urls), (len(recs), len(urls)))

        # pretend we crawled the url and update the record
        with transaction.manager:
            etag = str(uuid.uuid4())
            last_modified = datetime.datetime.now()
            rec = scheduler.mark_job_checked(url,
                                             etag=etag,
                                             last_modified=last_modified)
            self.assert_(rec, "no rec for url %s" % url)
            self.assert_(etag == rec.etag)
            self.assert_(last_modified == rec.last_modified)
Esempio n. 2
0
 def testAssumeChecked(self):
     """
     coverage
     if url comes in that hasn't been scheduled, assume it was
     checked and just persist/return the record
     """
     with transaction.manager:
         newrec = scheduler.mark_job_checked("http://gooogle.com")
         meta.Session().add(newrec)
         self.assert_(newrec)