def setUp(self): super().setUp() self.active_dataset = DatasetFactory.create(name="active test", is_active=True) created_time = make_aware(datetime.now()) for version_number in range(0, 29, 7): created_time -= timedelta(days=version_number) include_current = version_number == 0 create_dataset_version( self.active_dataset, f"0.0.{28 - version_number}", created_time, include_current=include_current ) self.inactive_dataset = DatasetFactory.create(name="inactive test", is_active=False) created_time = make_aware(datetime.now()) for version_number in range(21, 43, 7): created_time -= timedelta(days=version_number) create_dataset_version( self.inactive_dataset, f"0.0.{42 - version_number}", created_time, include_current=False ) self.new_extension = Extension.objects.create(id="new", is_addition=True, properties={}) self.old_extension = Extension.objects.create( id="old", is_addition=True, properties={}, deleted_at=make_aware(datetime(year=1970, month=1, day=1)) )
def setUp(self): super().setUp() self.client.force_login(self.user) created_time = make_aware(datetime.now()) self.active_dataset = DatasetFactory.create(name="active test", is_active=True, is_latest=True) create_dataset_version(self.active_dataset, "0.0.1", created_time, include_current=True) self.inactive_dataset = DatasetFactory.create(name="inactive test", is_active=False, is_latest=False) create_dataset_version(self.inactive_dataset, "0.0.1", created_time, docs=1, include_current=False)
def setUp(self): super().setUp() now = make_aware(datetime.now()) self.dataset = DatasetFactory() create_dataset_version( self.dataset, "0.0.1", created_at=now, include_current=True, copies=2, docs=22 )
def setUp(self): super().setUp() datasets = { "inactive": DatasetFactory.create(name="inactive", is_active=False), "secondary": DatasetFactory.create(name="secondary"), "primary": DatasetFactory.create(name="primary"), } self.pushed_ats = {} for dataset_type, dataset in datasets.items(): dataset_versions = create_dataset_data(dataset) for dataset_version in dataset_versions: pushed_at = create_dataset_version_indices(dataset_version) self.pushed_ats[dataset_version.id] = pushed_at sleep(3)
def setUp(self): super().setUp() self.latest_update_at = make_aware( datetime(year=2020, month=2, day=10, hour=13, minute=8, second=39, microsecond=315000)) sources = { "sharekit": HarvestSourceFactory(spec="edusources", repository=Repositories.SHAREKIT), "sharekit_private": HarvestSourceFactory(spec="edusourcesprivate", repository=Repositories.SHAREKIT), "wikiwijs": HarvestSourceFactory(spec="wikiwijsmaken", repository=Repositories.EDUREP) } datasets = { "primary": DatasetFactory.create(name="primary"), "inactive": DatasetFactory.create(name="inactive", is_active=False) } for dataset_type, dataset in datasets.items(): create_dataset_data(dataset, include_current=dataset_type == "primary") create_dataset_harvests(dataset_type, dataset, sources, self.latest_update_at) SharekitMetadataHarvestFactory.create(is_initial=False, number=0, is_restricted=False, is_extracted=True) SharekitMetadataHarvestFactory.create(is_initial=False, number=1, is_restricted=False) sleep( 1 ) # makes sure created_at and modified_at will differ at least 1 second when asserting
def setUp(self): super().setUp() self.dataset = DatasetFactory.create(name="test") dataset_version = DatasetVersionFactory.create(dataset=self.dataset) self.harvest = HarvestFactory.create(dataset=self.dataset, stage=HarvestStages.PREVIEW) # Documents that will actually get processed DocumentFactory.create(dataset_version=dataset_version, mime_type="text/html", from_youtube=True) DocumentFactory.create(dataset_version=dataset_version, mime_type="application/pdf") # Other Documents that get ignored due to various reasons DocumentFactory.create(dataset_version=dataset_version, mime_type="text/html", analysis_allowed=False, from_youtube=True) DocumentFactory.create(dataset_version=dataset_version, mime_type="application/pdf", analysis_allowed=False) DocumentFactory.create(dataset_version=dataset_version, mime_type="foo/bar")
def setUpClass(cls): super().setUpClass() cls.dataset = DatasetFactory(name="test") now = make_aware(datetime.now()) cls.docs_count = 22 create_dataset_version(cls.dataset, "0.0.3", now, include_current=True, copies=2, docs=cls.docs_count) cls.original_version = cls.dataset.versions.filter( is_current=True).last() cls.new_version = cls.dataset.versions.filter(is_current=False).last() for doc in cls.new_version.document_set.all()[:3]: doc.delete()
def setUpClass(cls): super().setUpClass() cls.dataset = DatasetFactory() cls.sharekit = HarvestSourceFactory( spec="edusources", repository=Repositories.SHAREKIT, delete_policy=DeletePolicies.NO ) cls.wikiwijs = HarvestSourceFactory( name="Wikiwijs Maken", spec="wikiwijsmaken", repository=Repositories.EDUREP ) cls.edurep_delen = HarvestSourceFactory( name="Edurep Delen", spec="edurep_delen", repository=Repositories.EDUREP )
class TestDataset(TestCase): def setUp(self): super().setUp() now = make_aware(datetime.now()) self.dataset = DatasetFactory() create_dataset_version( self.dataset, "0.0.1", created_at=now, include_current=True, copies=2, docs=22 ) def test_evaluate_dataset_version_pass(self): test_version = self.dataset.versions.filter(is_current=False).last() fallback_collections = self.dataset.evaluate_dataset_version(test_version) self.assertEqual(fallback_collections, [], "Expected identical versions to pass evaluation") test_version.document_set.last().delete() fallback_collections = self.dataset.evaluate_dataset_version(test_version) self.assertEqual(fallback_collections, [], "Expected versions with less than 5% difference to pass evaluation") extra_documents = [ DocumentFactory.create(dataset_version=test_version, collection=test_version.collection_set.last()) for ix in range(10) ] test_version.document_set.add(*extra_documents) fallback_collections = self.dataset.evaluate_dataset_version(test_version) self.assertEqual(fallback_collections, [], "Expected versions with increase of more than 5% to pass evaluation") self.dataset.versions.update(is_current=False) fallback_collections = self.dataset.evaluate_dataset_version(test_version) self.assertEqual(fallback_collections, [], "Expected no fallbacks when no promoted previous versions exist") def test_evaluate_dataset_version_fail(self): fallback_collections = self.dataset.evaluate_dataset_version(DatasetVersionFactory.create(is_current=False)) self.assertEqual(len(fallback_collections), 1, "Expected empty dataset version to generate fallback") self.assertIsInstance(fallback_collections[0], Collection) self.assertEqual(fallback_collections[0].name, "test") test_version = self.dataset.versions.filter(is_current=False).last() for doc in test_version.document_set.all()[:3]: doc.delete() fallback_collections = self.dataset.evaluate_dataset_version(test_version) self.assertEqual( len(fallback_collections), 1, "Expected dataset version with insufficient docs to generate fallback" ) self.assertIsInstance(fallback_collections[0], Collection) self.assertEqual(fallback_collections[0].name, "test") def test_evaluate_dataset_version_old_corrupt_collection(self): # First we corrupt a collection by setting dataset_version to None on all documents. # And then we'll copy a healthy collection to the test dataset version. # This state may get created by the index_dataset_version command. test_version = self.dataset.versions.filter(is_current=False).last() test_version.document_set.update(dataset_version=None) current_version = self.dataset.versions.filter(is_current=True).last() current_collection = current_version.collection_set.last() test_version.copy_collection(current_collection) # In this state we want to ensure that the healthy collection is not seen as erroneous. # And the corrupt collection should get ignored. fallback_collections = self.dataset.evaluate_dataset_version(test_version) self.assertEqual( fallback_collections, [], "Expected the old corrupt collection to get ignored. " "No fallback needed with the new healthy collection in place." )