def _test_parse_file(self, file, n_articles, min_article_length=20, expected_articles=()): """ Tests the parsing of a file using the scrape_2 format. @param expected_articles A sequence of {field: value} dicts, each with at least a 'title' field. """ field_map = self._create_id_field_map("title", "text", "medium", "date") script = BZK(field_map=field_map, file=file, project=self.project.id, encoding="UTF-8") result_set = script.run() self.assertIsInstance(result_set, ArticleSet) articles = list(result_set.articles.all()) self.assertEqual(len(articles), n_articles) article_map = {a.title: a for a in articles} for fields in expected_articles: title = fields['title'] article = article_map[title] self.assertGreaterEqual(len(article.text), min_article_length) for field, value in fields.items(): self.assertEqual( article.get_property(field), value, "Expected value '{}' for article field '{}'".format( value, field))
class TestBZK(amcattest.AmCATTestCase): def setUp(self): from django.core.files import File import os.path self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZK(project=amcattest.create_test_project().id, file=File(open(os.path.join(self.dir, 'test.html'))), articlesets=[amcattest.create_test_set().id]) self.result = ArticleSet.objects.get(id=self.bzk.run()[0]).articles.all() def todo_test_scrape_unit(self): self.assertTrue(self.result) def todo_test_scrape_file(self): must_props = ('headline', 'text', 'medium', 'date') must_props = [[getattr(a, prop) for a in self.result] for prop in must_props] for proplist in must_props: self.assertTrue(all(proplist))
class TestBZK(amcattest.AmCATTestCase): def setUp(self): from django.core.files import File import os.path self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZK(project=amcattest.create_test_project().id, file=File(open(os.path.join(self.dir, 'test.html'))), articlesets=[amcattest.create_test_set().id]) self.result = ArticleSet.objects.get( id=self.bzk.run()[0]).articles.all() def todo_test_scrape_unit(self): self.assertTrue(self.result) def todo_test_scrape_file(self): must_props = ('headline', 'text', 'medium', 'date') must_props = [[getattr(a, prop) for a in self.result] for prop in must_props] for proplist in must_props: self.assertTrue(all(proplist))