def test_dataset_multiple_elements_content_retrieval(self): """ Factory can retrieve content of multiple elements at once. """ editor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.RO_WATCH_DATASET ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) self.session.flush() editor = editor.link_dataset(dataset) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset) self.session.flush() dataset = dataset.update() with self.assertRaises(RequestedRangeNotSatisfiable) as ex: contents = DatasetElementFactory(editor, dataset).get_elements_content([element._id, element2._id, element3._id]) contents = DatasetElementFactory(editor, dataset).get_elements_content([element._id, element3._id]) self.assertEqual(contents[element._id], b"content1") self.assertEqual(contents[element3._id], b"content2")
def test_dataset_fork(self): """ Factory can fork a dataset :return: """ viewer = TokenDAO("normal user only view dataset", 2, 10, "viewer", privileges=Privileges.RO_WATCH_DATASET) creator = TokenDAO("normal user privileged", 2, 10, "creator", privileges=Privileges.CREATE_DATASET) d = DatasetDAO("viewer/dataset", "dataset", "description for dataset", "none", ["d1", "d2"]) self.session.flush() element1 = DatasetElementDAO("a", "a", None, "noneaa", ["taga"], dataset=d) element2 = DatasetElementDAO("b", "b", None, "nonebb", ["tagb"], dataset=d) self.session.flush() d = d.update() self.assertEqual(len(d.elements), 2) viewer = viewer.link_dataset(d) d.update() # Creator can clone it from viewer. forked_dataset = DatasetFactory(creator).fork_dataset(d.url_prefix, viewer, title="dataset_cloned", url_prefix="dataset", description="desc", reference="none", tags=["d2", "d1"]) self.assertEqual(forked_dataset.url_prefix, "creator/dataset") self.assertEqual(forked_dataset.title, "dataset_cloned") self.assertEqual(forked_dataset.description, "desc") self.assertEqual(forked_dataset.tags, ["d2", "d1"]) self.assertEqual(len(forked_dataset.elements), 2) elements_titles = [element1.title, element2.title] self.assertIn(forked_dataset.elements[0].title, elements_titles) self.assertIn(forked_dataset.elements[1].title, elements_titles) # viewer can NOT clone it from creator. with self.assertRaises(Unauthorized) as ex: forked_dataset = DatasetFactory(viewer).fork_dataset(forked_dataset.url_prefix, creator, title="dataset_cloned", url_prefix="dataset", description="desc", reference="none", tags=["d2", "d1"]) # Dataset can be forked omitting some options forked_dataset2 = DatasetFactory(creator).fork_dataset(d.url_prefix, viewer, url_prefix="dataset2") self.assertEqual(forked_dataset2.title, d.title) self.assertEqual(forked_dataset2.description, d.description) self.assertEqual(forked_dataset2.tags, d.tags) self.assertEqual(forked_dataset2.reference, d.reference)
def test_clone_element(self): """ Factory can clone elements. """ editor = TokenDAO("normal user privileged with link", 2, 1, "user1", privileges=Privileges.RO_WATCH_DATASET + Privileges.EDIT_DATASET + Privileges.ADD_ELEMENTS ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) dataset2 = DatasetDAO("user1/dataset2", "example_dataset2", "dataset2 for testing purposes", "none", tags=["example", "1"]) self.session.flush() editor = editor.link_dataset(dataset) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset) self.session.flush() dataset = dataset.update() dataset2 = dataset2.update() self.assertEqual(len(dataset.elements), 3) self.assertEqual(len(dataset2.elements), 0) with self.assertRaises(Unauthorized) as ex: new_element = DatasetElementFactory(editor, dataset).clone_element(element._id, dataset2.url_prefix) editor = editor.link_dataset(dataset2) self.assertEqual(len(element.dataset_id), 1) new_element = DatasetElementFactory(editor, dataset).clone_element(element._id, dataset2.url_prefix) dataset2 = dataset2.update() self.assertEqual(len(dataset2.elements), 1) self.assertEqual(new_element.file_ref_id, element.file_ref_id) self.assertEqual(new_element.title, element.title) self.assertEqual(new_element.description, element.description) self.assertEqual(new_element.tags, element.tags) self.assertEqual(new_element._id, element._id) self.assertEqual(len(new_element.dataset_id), 2) self.assertEqual(len(element.dataset_id), 2) with self.assertRaises(Unauthorized) as ex: new_element = DatasetElementFactory(editor, dataset).clone_element(element2._id, dataset2.url_prefix)
def test_dataset_elements_info_by_pages(self): """ Factory can retrieve multiple elements at once by pages. """ editor = TokenDAO("normal user privileged with link", 1, 5, "user1", privileges=Privileges.RO_WATCH_DATASET ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) self.session.flush() editor = editor.link_dataset(dataset) elements = [DatasetElementDAO("example{}".format(x), "none", None, dataset=dataset).title for x in range(5)] self.session.flush() dataset = dataset.update() page_size = global_config.get_page_size() for page in range(len(elements) // page_size + int(len(elements) % page_size > 0)): retrieved_elements = DatasetElementFactory(editor, dataset).get_elements_info(page) for x in retrieved_elements: self.assertIn(x.title, elements)
def test_dataset_elements_removal(self): """ Factory can remove mutliple elements from datasets at once. """ destructor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.DESTROY_DATASET + Privileges.DESTROY_ELEMENTS ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) self.session.flush() destructor = destructor.link_dataset(dataset) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset) self.session.flush() dataset = dataset.update() self.assertEqual(len(dataset.elements), 3) with self.assertRaises(RequestedRangeNotSatisfiable) as ex: DatasetElementFactory(destructor, dataset).destroy_elements([element._id, element2._id, element3._id]) DatasetElementFactory(destructor, dataset).destroy_elements([element._id, element2._id]) dataset = dataset.update() self.assertEqual(len(dataset.elements), 1) self.assertEqual(dataset.elements[0]._id, element3._id) element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) self.session.flush() dataset = dataset.update() DatasetElementFactory(destructor, dataset).destroy_elements()
def test_dataset_specific_elements_info(self): """ Factory can retrieve multiple elements at once by specific sets. """ editor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.RO_WATCH_DATASET ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) self.session.flush() editor = editor.link_dataset(dataset) elements = [DatasetElementDAO("example{}".format(x), "none", None, dataset=dataset) for x in range(5)] titles = [element.title for element in elements] ids = [element._id for element in elements] self.session.flush() dataset = dataset.update() # Can't retrieve more elements than the page size at once. with self.assertRaises(RequestedRangeNotSatisfiable) as ex: retrieved_elements = [x for x in DatasetElementFactory(editor, dataset).get_specific_elements_info(ids)] request1 = ids[0:2] retrieved_elements = [x for x in DatasetElementFactory(editor, dataset).get_specific_elements_info(request1)] self.assertEqual(len(retrieved_elements), 2) self.assertIn(retrieved_elements[0].title, titles) self.assertIn(retrieved_elements[1].title, titles) request2 = ids[1:3] retrieved_elements2 = [x for x in DatasetElementFactory(editor, dataset).get_specific_elements_info(request2)] self.assertEqual(len(retrieved_elements2), 2) self.assertIn(retrieved_elements2[0].title, titles) self.assertIn(retrieved_elements2[1].title, titles) self.assertNotEqual(retrieved_elements[0].title, retrieved_elements2[0].title) self.assertNotEqual(retrieved_elements[1].title, retrieved_elements2[1].title) self.assertEqual(retrieved_elements[1].title, retrieved_elements2[0].title)
def test_dataset_elements_info_by_different_pages_size(self): """ Factory can retrieve multiple elements with different page sizes. """ initial_page_size = global_config.get_page_size() global_config.set_page_size(10) editor = TokenDAO("normal user privileged with link", 1, 5, "user1", privileges=Privileges.RO_WATCH_DATASET ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) self.session.flush() editor = editor.link_dataset(dataset) elements = [DatasetElementDAO("example{}".format(x), "none", None, dataset=dataset).title for x in range(5)] self.session.flush() dataset = dataset.update() # We need to know the order of the elements ordered_elements = [l for l in DatasetElementFactory(editor, dataset).get_elements_info(page_size=len(elements))] pages_size = [1, 2, 3, 4, 5] for page_size in pages_size: num_pages = len(elements) // page_size + int(len(elements) % page_size > 0) for page in range(num_pages): retrieved_elements = [l for l in DatasetElementFactory(editor, dataset).get_elements_info(page, page_size=page_size)] for retrieved_element, ordered_element in zip(retrieved_elements, ordered_elements[page*page_size:(page+1)*page_size]): self.assertEqual(retrieved_element._id, ordered_element._id) with self.assertRaises(Conflict): retrieved_elements = DatasetElementFactory(editor, dataset).get_elements_info(page_size=global_config.get_page_size()+1) global_config.set_page_size(initial_page_size)
def test_dataset_elements_edit(self): """ Factory can edit multiple elements from datasets at once. """ editor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.EDIT_DATASET + Privileges.EDIT_ELEMENTS ) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) self.session.flush() editor = editor.link_dataset(dataset) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset) self.session.flush() dataset = dataset.update() self.assertEqual(len(dataset.elements), 3) modifications = { element._id: dict(title="asd6", content=b"content4"), element3._id: dict(description="ffff", content=b"New Content!") } DatasetElementFactory(editor, dataset).edit_elements(modifications) self.session.flush() dataset = dataset.update() element = element.update() element3 = element3.update() self.assertEqual(element.title, "asd6") self.assertEqual(storage.get_file(element.file_ref_id).content, b"content4") self.assertEqual(element3.description, "ffff") self.assertEqual(storage.get_file(element3.file_ref_id).content, b"New Content!")
def test_dataset_element_content_retrieval(self): """ Factory can retrieve content of an element. """ editor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.RO_WATCH_DATASET ) editor2 = TokenDAO("normal user unprivileged", 1, 1, "user1", privileges=0 ) admin = TokenDAO("admin user", 1, 1, "admin", privileges=Privileges.ADMIN_CREATE_TOKEN + Privileges.ADMIN_EDIT_TOKEN + Privileges.ADMIN_DESTROY_TOKEN) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) dataset2 = DatasetDAO("user1/dataset2", "example_dataset2", "dataset2 for testing purposes", "none", tags=["example", "1"]) self.session.flush() editor = editor.link_dataset(dataset) editor2 = editor2.link_dataset(dataset2) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset2) self.session.flush() dataset = dataset.update() dataset2 = dataset2.update() self.assertEqual(len(dataset.elements), 2) self.assertEqual(len(dataset2.elements), 1) # editor can see elements from his dataset element_content = DatasetElementFactory(editor, dataset).get_element_content(element._id) self.assertEqual(element_content, b"content1") # editor can not see elements from other's datasets with self.assertRaises(Unauthorized) as ex: element_content = DatasetElementFactory(editor, dataset2).get_element_content(element3._id) # editor can not see external elements within his dataset with self.assertRaises(Unauthorized) as ex: element_content = DatasetElementFactory(editor, dataset).get_element_content(element3._id) # editor2 is not privileged and can not see any elements of his own dataset with self.assertRaises(Unauthorized) as ex: element_content = DatasetElementFactory(editor2, dataset2).get_element_content(element3._id) # Or external elements with self.assertRaises(Unauthorized) as ex: element_content = DatasetElementFactory(editor2, dataset2).get_element_content(element2._id) # Or other datasets with self.assertRaises(Unauthorized) as ex: element_content = DatasetElementFactory(editor2, dataset).get_element_content(element2._id) # Admin can do anything element_content = DatasetElementFactory(admin, dataset).get_element_content(element._id) self.assertEqual(element_content, b"content1") # But not this: dataset2 does not have element with self.assertRaises(Unauthorized) as ex: element_content = DatasetElementFactory(admin, dataset2).get_element_content(element._id) element_content = DatasetElementFactory(admin, dataset2).get_element_content(element3._id) self.assertEqual(element_content, b"content2")
def test_dataset_element_edit(self): """ Factory can edit elements from datasets. """ editor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.EDIT_DATASET + Privileges.EDIT_ELEMENTS ) editor2 = TokenDAO("normal user unprivileged", 1, 1, "user1", privileges=Privileges.EDIT_DATASET ) admin = TokenDAO("admin user", 1, 1, "admin", privileges=Privileges.ADMIN_CREATE_TOKEN + Privileges.ADMIN_EDIT_TOKEN + Privileges.ADMIN_DESTROY_TOKEN) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) dataset2 = DatasetDAO("user1/dataset2", "example_dataset2", "dataset2 for testing purposes", "none", tags=["example", "1"]) self.session.flush() editor = editor.link_dataset(dataset) editor2 = editor2.link_dataset(dataset2) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset2) self.session.flush() dataset = dataset.update() dataset2 = dataset2.update() self.assertEqual(len(dataset.elements), 2) self.assertEqual(len(dataset2.elements), 1) # editor can not edit elements from a dataset that is not linked to with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(editor, dataset2).edit_element(element._id, title="asd") with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(editor, dataset2).edit_element(element2._id, title="asd2") with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(editor, dataset2).edit_element(element3._id, title="asd3") # editor can not edit elements if they exist but are not inside his dataset with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(editor, dataset).edit_element(element3._id, title="asd4") # editor can not edit elements if they don't exist with self.assertRaises(NotFound) as ex: DatasetElementFactory(editor, dataset).edit_element("randomID", title="asd5") # Editor can edit elements if they exist and are inside his dataset DatasetElementFactory(editor, dataset).edit_element(element._id, title="asd6") self.session.flush() dataset = dataset.update() element = element.update() # Editor can not change references to files with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(editor, dataset).edit_element(element._id, file_ref_id="other_reference") # BUT he can change the content DatasetElementFactory(editor, dataset).edit_element(element._id, content=b"other_content") element = element.update() self.assertEqual(storage.get_file(element.file_ref_id).content, b"other_content") # Admin can do whatever he wants DatasetElementFactory(admin, dataset).edit_element(element2._id, title="changed by admin") element2 = element2.update() self.assertEqual(element2.title, "changed by admin") DatasetElementFactory(admin, dataset2).edit_element(element3._id, file_ref_id=element.file_ref_id) element3 = element3.update() self.assertEqual(storage.get_file(element3.file_ref_id).content, storage.get_file(element.file_ref_id).content) self.session.flush()
def test_dataset_element_removal(self): """ Factory can remove elements from datasets. """ anonymous = TokenDAO("Anonymous", 1, 1, "anonymous") destructor = TokenDAO("normal user privileged with link", 1, 1, "user1", privileges=Privileges.DESTROY_DATASET + Privileges.DESTROY_ELEMENTS ) destructor2 = TokenDAO("normal user unprivileged", 1, 1, "user1", privileges=Privileges.DESTROY_DATASET ) admin = TokenDAO("admin user", 1, 1, "admin", privileges=Privileges.ADMIN_CREATE_TOKEN + Privileges.ADMIN_EDIT_TOKEN + Privileges.ADMIN_DESTROY_TOKEN) dataset = DatasetDAO("user1/dataset1", "example_dataset", "dataset for testing purposes", "none", tags=["example", "0"]) dataset2 = DatasetDAO("user1/dataset2", "example_dataset2", "dataset2 for testing purposes", "none", tags=["example", "1"]) self.session.flush() destructor = destructor.link_dataset(dataset) destructor2 = destructor2.link_dataset(dataset2) file_id1 = storage.put_file_content(b"content1") file_id2 = storage.put_file_content(b"content2") element = DatasetElementDAO("example1", "none", file_id1, dataset=dataset) element2 = DatasetElementDAO("example2", "none", file_id1, dataset=dataset) element3 = DatasetElementDAO("example3", "none", file_id2, dataset=dataset2) self.session.flush() dataset = dataset.update() dataset2 = dataset2.update() self.assertEqual(len(dataset.elements), 2) self.assertEqual(len(dataset2.elements), 1) # Destructor can not destroy elements from a dataset that is not linked to with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(destructor, dataset2).destroy_element(element._id) with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(destructor, dataset2).destroy_element(element2._id) with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(destructor, dataset2).destroy_element(element3._id) # Destructor can not destroy elements if they exist but are not inside his dataset with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(destructor, dataset).destroy_element(element3._id) # Destructor can not destroy elements if they don't exist with self.assertRaises(Unauthorized) as ex: DatasetElementFactory(destructor, dataset).destroy_element("randomID") # Destructor can destroy elements if they exist and are inside his dataset DatasetElementFactory(destructor, dataset).destroy_element(element._id) # Even though element is destroyed, file referenced should still exist self.assertEqual(storage.get_file(file_id1).content, b"content1") dataset = dataset.update() self.assertEqual(len(dataset.elements), 1) # Admin can remove elements form any source DatasetElementFactory(admin, dataset).destroy_element(element2._id) DatasetElementFactory(admin, dataset2).destroy_element(element3._id) self.session.flush() dataset = dataset.update() dataset2 = dataset2.update() self.assertEqual(len(dataset.elements), 0) self.assertEqual(len(dataset2.elements), 0)
def test_gc_works_as_expected(self): """ Garbage Collector effectively collects all the garbage when it gets dereferenced. :return: """ gc = GarbageCollector() self.assertEqual(gc.do_garbage_collect(), 0) contents = ["hello{}".format(i).encode() for i in range(1000)] files = [FileContentDAO(content=content, size=len(content)) for content in contents] self.session.flush() self.assertEqual(FileDAO.query.find().count(), len(contents)) # First time is always 0. It is a security check to ensure that a content is not being used. self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), len(contents)) self.assertEqual(FileDAO.query.find().count(), 0) contents = ["hello{}".format(i).encode() for i in range(10)] files = [FileContentDAO(content=content, size=len(content)) for content in contents] elements = [ DatasetElementDAO("title1", "none", files[0]._id, ), DatasetElementDAO("title2", "none", files[0]._id, ), DatasetElementDAO("title3", "none", files[1]._id, ), DatasetElementDAO("title4", "none", files[2]._id, )] self.session.flush() self.assertEqual(FileDAO.query.find().count(), len(contents)) self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 7) self.assertEqual(FileDAO.query.find().count(), 3) self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 0) elements[2].delete() self.session.flush() self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 1) self.assertEqual(FileDAO.query.find().count(), 2) elements[0].delete() self.session.flush() self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(FileDAO.query.find().count(), 2) elements[1].delete() self.session.flush() self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 1) self.assertEqual(FileDAO.query.find().count(), 1) elements[3].delete() self.session.flush() self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 1) self.assertEqual(FileDAO.query.find().count(), 0) self.assertEqual(gc.do_garbage_collect(), 0) self.assertEqual(gc.do_garbage_collect(), 0)