def test_export(self): self.setup_project() crawler = indexing.MasterCrawler(root=self._tmp_dir.name) crawler.tags = {'test1'} index = self.get_index_collection() signac.export(crawler.crawl(), index) self.assertTrue(index.replace_one.called or index.bulk_write.called) for doc in crawler.crawl(): self.assertIsNotNone(index.find_one({'_id': doc['_id']}))
def test_master_crawler(self): self.setup_project() crawler = indexing.MasterCrawler(root=self._tmp_dir.name) crawler.tags = {'test1'} no_find = True for doc in crawler.crawl(): no_find = False ffn = os.path.join(doc['root'], doc['filename']) self.assertTrue(os.path.isfile(ffn)) with open(ffn) as file: doc2 = json.load(file) self.assertEqual(doc2['a'], doc['a']) with signac.fetch(doc) as file: pass self.assertFalse(no_find)
def test_fetch(self): with self.assertRaises(ValueError): signac.fetch(None) with self.assertRaises(errors.FetchError): signac.fetch(dict()) self.setup_project() crawler = indexing.MasterCrawler(root=self._tmp_dir.name) crawler.tags = {'test1'} docs = list(crawler.crawl()) self.assertEqual(len(docs), 2) for doc in docs: with signac.fetch(doc) as file: pass for doc, file in indexing.fetched(docs): doc2 = json.load(file) self.assertEqual(doc['a'], doc2['a']) file.close()
def test_export_to_mirror(self): self.setup_project() crawler = indexing.MasterCrawler(root=self._tmp_dir.name) crawler.tags = {'test1'} index = self.get_index_collection() mirror = TestFS() for doc in crawler.crawl(): self.assertIn('file_id', doc) doc.pop('file_id') with self.assertRaises(errors.ExportError): signac.export_to_mirror(doc, mirror) break for doc in crawler.crawl(): self.assertIn('file_id', doc) signac.export_one(doc, index) signac.export_to_mirror(doc, mirror) self.assertTrue(index.replace_one.called) for doc in crawler.crawl(): self.assertIsNotNone(index.find_one({'_id': doc['_id']})) with mirror.get(doc['file_id']): pass
def test_master_crawler_tags(self): self.setup_project() crawler = indexing.MasterCrawler(root=self._tmp_dir.name) self.assertEqual(0, len(list(crawler.crawl()))) crawler.tags = None self.assertEqual(0, len(list(crawler.crawl()))) crawler.tags = {} self.assertEqual(0, len(list(crawler.crawl()))) crawler.tags = {'nomatch'} self.assertEqual(0, len(list(crawler.crawl()))) crawler.tags = {'test1'} self.assertEqual(2, len(list(crawler.crawl()))) crawler.tags = {'test2'} self.assertEqual(2, len(list(crawler.crawl()))) crawler.tags = {'test1', 'test2'} self.assertEqual(2, len(list(crawler.crawl()))) crawler.tags = {'test1', 'bs'} self.assertEqual(2, len(list(crawler.crawl()))) crawler.tags = {'test2', 'bs'} self.assertEqual(2, len(list(crawler.crawl()))) crawler.tags = {'test1', 'test2', 'bs'} self.assertEqual(2, len(list(crawler.crawl())))