class TestIssueCache(unittest.TestCase): def setUp(self): self.cache = IssueCache('tests/fixtures/tmp', 'tests/fixtures/images') def tearDown(self): shutil.rmtree('tests/fixtures/tmp') def test_cache(self): self.cache['url1'] = [{'images': [{'path': 'width-large.jpg'}]}] self.cache['url2'] = [ {'images': [{'path': 'width-large.jpg'}]}, {'images': [{'path': 'width-small.jpg'}]}, # Repressent a missing image {'images': []} ] self.cache['url3'] = [{'images': [{'path': 'width-small.jpg'}]}] self.assertTrue(len(self.cache) == 3) self.assertEqual(self.cache['url1'][0], [{'images': [{'path': 'width-large.jpg'}]}]) for key in self.cache: self.assertTrue(key in ('url1', 'url2', 'url3')) v = self.cache[key] if key in ('url1', 'url3'): self.assertTrue(len(v[0]) == 1) else: self.assertTrue(len(v[0]) == 3) del self.cache['url1'] self.assertTrue(len(self.cache) == 2) self.assertTrue('url1' not in self.cache) self.assertTrue('url2' in self.cache) self.assertTrue('url3' in self.cache) def test_is_valid(self): self.cache['url1'] = [ {'images': [{'path': 'width-large.jpg'}]}, {'images': []} ] # Create a temporal image open('tests/fixtures/images/missing-image.jpg', 'a').close() self.cache['url2'] = [{'images': [{'path': 'missing-image.jpg'}]}] # Remove temporal image os.unlink('tests/fixtures/images/missing-image.jpg') self.assertTrue(self.cache.is_valid('url1')) self.assertFalse(self.cache.is_valid('url2')) self.assertFalse(self.cache.is_valid('url3'))
def _create_mobi(issue, result=None): """RQ job to create a single MOBI document.""" issue_cache = IssueCache(settings.ISSUES_STORE, settings.IMAGES_STORE) if issue.url not in issue_cache: logger.error('Issue not found in issue cache (%s)' % issue) if result: result.set_status(Result.FAILED) elif not issue_cache.is_valid(issue.url): logger.error('Issue in issue cache is not valid (%s)' % issue) if result: result.set_status(Result.FAILED) del issue_cache[issue.url] else: images, _ = issue_cache[issue.url] mobictl = MobiCtl(issue, images, settings.IMAGES_STORE) mobictl.create_mobi()
def collect(self, spider): # Signalize as an error the missing self.items, probably there # is a hidden bug in the spider. if not self.items: logger.error('Items are empty, please check [%s]' % spider) return cache = IssueCache(self.issues_store, self.images_store) for url, images in self.items.items(): cache[url] = images
def scrape(self, issues, dry_run=False): """Create crawlers to scrape issues.""" cache = IssueCache(settings.ISSUES_STORE, settings.IMAGES_STORE) crawlers = [ self._create_crawler(issue.manga.source.name.lower(), issue.manga.name, issue.number, issue.url, dry_run) for issue in issues if issue.url not in cache ] process_control = ProcessControl(crawlers, self.process) process_control.run()
def handle(self, *args, **options): command = options['command'] actions = ('force', 'list', 'remove') if not any(options[i] for i in actions): msg = 'Please, provide one action: %s' % '|'.join(actions) raise CommandError(msg) # `cover` command do not use the `days` nor `hours` parameter if options['days'] is None and options['hours'] is None \ and command not in ('cover',): raise CommandError('Provide some days/hours to find old objects.') elif options['days'] or options['hours']: hours = 24 * int(options['days'] if options['days'] else 0) hours += int(options['hours'] if options['hours'] else 0) sources = self._get_sources(options['spiders']) remove = options['remove'] list_ = options['list'] force = options['force'] list_ = list_ or not force loglevel = options['loglevel'] logger.setLevel(loglevel) if command == 'manga': self._clean_manga(hours, sources, list_) elif command == 'user': self._clean_user(hours, remove, list_) elif command == 'image-cache': cache = os.path.join(settings.IMAGES_STORE, 'full') self._clean_image_cache(hours, cache, list_) elif command == 'mobi-cache': cache = MobiCache(settings.MOBI_STORE) self._clean_cache(hours, cache, list_) elif command == 'issue-cache': cache = IssueCache(settings.ISSUES_STORE, settings.IMAGES_STORE) self._clean_cache(hours, cache, list_) mobi_cache = MobiCache(settings.MOBI_STORE) self._clean_broken_issue_cache(cache, mobi_cache, list_) elif command == 'cover': self._clean_cover(sources, list_) elif command == 'result-processing': self._clean_result(hours, Result.PROCESSING, list_) elif command == 'result-failed': self._clean_result(hours, Result.FAILED, list_) else: raise CommandError('Not valid command value.')
def setUp(self): self.cache = IssueCache('tests/fixtures/tmp', 'tests/fixtures/images')