def test_untracked_output_dir(self, mock_config_file): """Test that contents of the untracked directory (in output) truly is untracked. """ im = InventoryManager(mock_config_file[0]) lb = im.create_labbook('test', 'test', 'labbook1', description="my first labbook", author=GitAuthor(name="test", email="*****@*****.**")) assert os.path.isdir(os.path.join(lb.root_dir, 'output', 'untracked')) altered_file = os.path.join(lb.root_dir, 'output', 'untracked', 'samplefiles') with open(altered_file, 'w') as f: f.write('Initial Content') lb.sweep_uncommitted_changes() c1 = lb.git.commit_hash with open(altered_file, 'w') as f: f.write('Changed Content') lb.sweep_uncommitted_changes() c2 = lb.git.commit_hash # Assert that Git detects no changes assert c1 == c2
def test_delete_labbook_linked_dataset(self, mock_config_file): """Test trying to create a labbook with a name that already exists locally""" inv_manager = InventoryManager(mock_config_file[0]) inv_manager.create_labbook("test", "test", "labbook1", description="my first labbook") lb = inv_manager.load_labbook("test", "test", "labbook1") auth = GitAuthor(name="test", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) inv_manager.link_dataset_to_labbook(f"{ds.root_dir}/.git", "test", "dataset1", lb) dataset_delete_jobs = inv_manager.delete_labbook( "test", "test", "labbook1") assert len(dataset_delete_jobs) == 1 assert dataset_delete_jobs[0].namespace == "test" assert dataset_delete_jobs[0].name == "dataset1" with pytest.raises(InventoryException): inv_manager.load_labbook("test", "test", "labbook1")
def test_readme_wierd_strings(self, mock_config_file): """Test creating a reading and writing a readme file to the labbook with complex strings""" im = InventoryManager(mock_config_file[0]) lb = im.create_labbook('test', 'test', 'labbook1', description="my first labbook", author=GitAuthor(name="test", email="*****@*****.**")) assert lb.get_readme() is None assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is False rand_str = os.urandom(1000000) with pytest.raises(TypeError): lb.write_readme(rand_str) assert lb.get_readme() is None assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is False with pytest.raises(TypeError): lb.write_readme(None) assert lb.get_readme() is None assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is False lb.write_readme("") assert lb.get_readme() == "" assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is True
def test_is_labbook_modified_date(self, mock_config_file): """Test getting the modified date""" im = InventoryManager(mock_config_file[0]) lb = im.create_labbook('test', 'test', 'labbook1', description="my first labbook", author=GitAuthor(name="test", email="*****@*****.**")) modified_1 = lb.modified_on time.sleep(2) lb.write_readme("doing something to change the modified time") modified_2 = lb.modified_on assert modified_2 > modified_1 assert modified_1.microsecond == 0 assert modified_1.tzname() == "UTC" assert modified_2.microsecond == 0 assert modified_2.tzname() == "UTC" assert (datetime.datetime.now(datetime.timezone.utc) - modified_1).total_seconds() < 10 assert (datetime.datetime.now(datetime.timezone.utc) - modified_2).total_seconds() < 10
def test_get_commits_behind(self, fixture_working_dir): """Test temporar field commitsBehind on dataset objects""" im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset("default", "default", "test-ds", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="default", email="*****@*****.**")) lb = im.create_labbook("default", "default", "test-lb") im.link_dataset_to_labbook(f"{ds.root_dir}/.git", 'default', 'test-ds', lb) query = """ { labbook(owner: "default", name:"test-lb") { linkedDatasets{ name commitsBehind } } } """ r = fixture_working_dir[2].execute(query) assert 'errors' not in r assert r['data']['labbook']['linkedDatasets'][0]['name'] == 'test-ds' assert r['data']['labbook']['linkedDatasets'][0]['commitsBehind'] == 0 ds.write_readme("test contents to make a commit") r = fixture_working_dir[2].execute(query) assert 'errors' not in r assert r['data']['labbook']['linkedDatasets'][0]['name'] == 'test-ds' assert r['data']['labbook']['linkedDatasets'][0]['commitsBehind'] == 1
def test_change_dataset_name(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) root_dir1 = ds.root_dir assert ds.name == 'dataset1' ds.name = 'dataset-updated' root_dir2 = ds.root_dir parts1 = root_dir1.split(os.path.sep) parts2 = root_dir2.split(os.path.sep) assert parts1[0] == parts2[0] assert parts1[1] == parts2[1] assert parts1[2] == parts2[2] assert parts1[3] == parts2[3] assert parts1[4] == parts2[4] assert parts1[5] == parts2[5] assert parts1[6] == 'dataset1' assert parts2[6] == 'dataset-updated'
def test_create_dataset_invalid_storage_type(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") with pytest.raises(ValueError): inv_manager.create_dataset("test", "test", "dataset1", "asdfdfgh", description="my first dataset", author=auth)
def test_is_dataset_modified_date(self, mock_config_file): """Test getting the modified date""" im = InventoryManager(mock_config_file[0]) ds = im.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="test", email="*****@*****.**")) modified_1 = ds.modified_on time.sleep(3) helper_modify_dataset(ds) modified_2 = ds.modified_on assert modified_2 > modified_1 assert modified_1.microsecond == 0 assert modified_1.tzname() == "UTC" assert modified_2.microsecond == 0 assert modified_2.tzname() == "UTC" assert (datetime.datetime.now(datetime.timezone.utc) - modified_1).total_seconds() < 10 assert (datetime.datetime.now(datetime.timezone.utc) - modified_2).total_seconds() < 10
def test_is_dataset_is_managed(self, mock_config_file): """Test getting the create date, both when stored in the buildinfo file and when using git fallback""" im = InventoryManager(mock_config_file[0]) ds = im.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="test", email="*****@*****.**")) assert ds.is_managed() is True
def test_delete_dataset_while_linked(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="test", email="*****@*****.**") lb = inv_manager.create_labbook("test", "test", "labbook1", description="my first labbook") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) ds_root_dir = ds.root_dir lb_root_dir = lb.root_dir assert os.path.exists(ds_root_dir) is True assert os.path.exists(lb_root_dir) is True # Link dataset inv_manager.link_dataset_to_labbook(f"{ds_root_dir}/.git", "test", "dataset1", lb) m = Manifest(ds, 'test') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "dfg") assert os.path.exists( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt")) is True assert os.path.exists( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt")) is True dataset_delete_job = inv_manager.delete_dataset( "test", "test", "dataset1") assert os.path.exists(ds_root_dir) is False assert os.path.exists(lb_root_dir) is True assert os.path.exists(m.cache_mgr.cache_root) is True assert dataset_delete_job.namespace == "test" assert dataset_delete_job.name == "dataset1" assert dataset_delete_job.cache_root == m.cache_mgr.cache_root jobs.clean_dataset_file_cache("test", dataset_delete_job.namespace, dataset_delete_job.name, dataset_delete_job.cache_root, config_file=mock_config_file[0]) assert os.path.exists(m.cache_mgr.cache_root) is True cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1) assert os.path.exists(cache_base) is True
def test_create_dataset_that_exists(self, mock_config_file): """Test trying to create a labbook with a name that already exists locally""" inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) with pytest.raises(ValueError): inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth)
def test_change_dataset_name_errors(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) with pytest.raises(ValueError): ds.name = 'dataset_updated' ds.name = 'd' * 100 with pytest.raises(ValueError): ds.name = 'd' * 101
def test_is_dataset_create_date(self, mock_config_file): """Test getting the create date, both when stored in the buildinfo file and when using git fallback""" im = InventoryManager(mock_config_file[0]) ds = im.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="test", email="*****@*****.**")) create_on = ds.creation_date assert create_on.microsecond == 0 assert create_on.tzname() == "UTC" assert (datetime.datetime.now(datetime.timezone.utc) - create_on).total_seconds() < 5
def test_load_dataset_from_file(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) with tempfile.TemporaryDirectory() as tempdir: r = shutil.move(ds.root_dir, tempdir) ds_loaded_from_file = inv_manager.load_dataset_from_directory(r) # Test failing case - invalid dir with pytest.raises(InventoryException): r = inv_manager.load_dataset_from_directory('/tmp')
def test_put_dataset(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) ds.namespace = 'test' orig_location = ds.root_dir with tempfile.TemporaryDirectory() as tempdir: r = shutil.move(ds.root_dir, tempdir) ds_loaded_from_file = inv_manager.load_dataset_from_directory(r) assert not os.path.exists(orig_location) assert orig_location not in [d.root_dir for d in inv_manager.list_datasets('test')] placed_ds = inv_manager.put_dataset(r, 'test', 'test') assert placed_ds.root_dir in [d.root_dir for d in inv_manager.list_datasets('test')]
def test_create_labbook_with_author(self, mock_config_file): """Test creating an empty labbook with the author set""" inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") lb = inv_manager.create_labbook("test", "test", "labbook1", description="my first labbook", author=auth) labbook_dir = lb.root_dir log_data = lb.git.log() assert log_data[0]['author']['name'] == "username" assert log_data[0]['author']['email'] == "*****@*****.**" assert log_data[0]['committer']['name'] == "Gigantum AutoCommit" assert log_data[0]['committer']['email'] == "*****@*****.**"
def mock_config_with_activitystore(): """A pytest fixture that creates a ActivityStore (and labbook) and deletes directory after test""" # Create a temporary working directory conf_file, working_dir = _create_temp_work_dir() im = InventoryManager(conf_file) lb = im.create_labbook('default', 'default', 'labbook1', description="my first labbook", author=GitAuthor("default", "*****@*****.**")) store = ActivityStore(lb) yield store, lb # Remove the temp_dir shutil.rmtree(working_dir)
def test_linked_to(self, mock_config_file): im = InventoryManager(mock_config_file[0]) lb = im.create_labbook("test", "test", "lb1", "testing") ds = im.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="test", email="*****@*****.**")) assert ds.linked_to() is None im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "test", "dataset1", lb) assert ds.linked_to() is None linked_datasets = im.get_linked_datasets(lb) assert len(linked_datasets) == 1 assert linked_datasets[0].linked_to() == "test|test|lb1"
def test_create_dataset_with_author(self, mock_config_file): """Test creating an empty labbook with the author set""" inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) dataset_dir = ds.root_dir assert dataset_dir == os.path.join(mock_config_file[1], "test", "test", "datasets", "dataset1") assert type(ds) == Dataset # Validate directory structure assert os.path.isdir(os.path.join(dataset_dir, "metadata")) is True assert os.path.isdir(os.path.join(dataset_dir, "manifest")) is True assert os.path.isdir(os.path.join(dataset_dir, ".gigantum")) is True assert os.path.isdir(os.path.join(dataset_dir, ".gigantum", "activity")) is True assert os.path.isdir( os.path.join(dataset_dir, ".gigantum", "favorites")) is True assert os.path.isdir( os.path.join(dataset_dir, ".gigantum", "activity", "log")) is True # Validate dataset data file with open(os.path.join(dataset_dir, ".gigantum", "gigantum.yaml"), "rt") as data_file: data = yaml.safe_load(data_file) assert data["name"] == "dataset1" assert data["description"] == "my first dataset" assert data["storage_type"] == "gigantum_object_v1" assert "id" in data assert ds.build_details is not None log_data = ds.git.log() assert log_data[0]['author']['name'] == "username" assert log_data[0]['author']['email'] == "*****@*****.**" assert log_data[0]['committer']['name'] == "Gigantum AutoCommit" assert log_data[0]['committer']['email'] == "*****@*****.**" assert os.path.exists( os.path.join(ds.root_dir, '.gigantum', 'backend.json')) is True
def test_readme_size_limit(self, mock_config_file): """Test creating a reading and writing a readme file to the labbook""" im = InventoryManager(mock_config_file[0]) lb = im.create_labbook('test', 'test', 'labbook1', description="my first labbook", author=GitAuthor(name="test", email="*****@*****.**")) assert lb.get_readme() is None assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is False with pytest.raises(ValueError): lb.write_readme("A" * (6 * 1000000)) assert lb.get_readme() is None assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is False
def __init__(self, user: str, owner: str, labbook_name: str, monitor_key: str, config_file: str = None, author_name: Optional[str] = None, author_email: Optional[str] = None) -> None: """Constructor requires info to load the lab book Args: user(str): current logged in user owner(str): owner of the lab book labbook_name(str): name of the lab book monitor_key(str): Unique key for the activity monitor in redis author_name(str): Name of the user starting this activity monitor author_email(str): Email of the user starting this activity monitor """ self.monitor_key = monitor_key # List of processor classes that will be invoked in order self.processors: List[ActivityProcessor] = [] # Populate GitAuthor instance if available if author_name: author: Optional[GitAuthor] = GitAuthor(name=author_name, email=author_email) else: author = None # Load Lab Book instance im = InventoryManager(config_file) self.labbook = im.load_labbook(user, owner, labbook_name, author=author) self.user = user self.owner = owner self.labbook_name = labbook_name # Create ActivityStore instance self.activity_store = ActivityStore(self.labbook) # A flag indicating if the activity record is OK to store self.can_store_activity_record = False
def test_read_write_readme(self, mock_config_file): """Test creating a reading and writing a readme file to the labbook""" im = InventoryManager(mock_config_file[0]) lb = im.create_labbook('test', 'test', 'labbook1', description="my first labbook", author=GitAuthor(name="test", email="*****@*****.**")) assert lb.get_readme() is None assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is False lb.write_readme("## Summary\nThis is my readme") assert os.path.exists(os.path.join(lb.root_dir, 'README.md')) is True assert lb.get_readme() == "## Summary\nThis is my readme"
def get_logged_in_author(): """A Method to get the current logged in user's GitAuthor instance Returns: GitAuthor """ user = get_logged_in_user() if not user: logger = LMLogger() logger.logger.error( "Failed to load a user identity from request context.") raise ValueError( "Failed to load a user identity from request context.") # Create a GitAuthor instance if possible return GitAuthor(name=user.username, email=user.email)
def test_dataset_attributes(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="username", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) root_dir = ds.root_dir assert "dataset1" in root_dir assert "datasets" in root_dir assert ds.name == 'dataset1' assert ds.namespace == 'test' assert ds.description == 'my first dataset' assert isinstance(ds.creation_date, datetime.datetime) assert ds.build_details is not None assert ds.storage_type == "gigantum_object_v1" assert ds.backend_config == {} ds.backend_config = {"my_config": 123} assert ds.backend_config == {"my_config": 123}
def test_get_dataset_modified_on(self, fixture_working_dir_dataset_populated_scoped): """Test getting a dataset's modified date""" im = InventoryManager(fixture_working_dir_dataset_populated_scoped[0]) ds = im.create_dataset("default", "default", "modified-on-test-ds", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="default", email="*****@*****.**")) modified_query = """ { dataset(name: "modified-on-test-ds", owner: "default") { modifiedOnUtc } } """ r = fixture_working_dir_dataset_populated_scoped[2].execute(modified_query) assert 'errors' not in r d = r['data']['dataset']['modifiedOnUtc'] # using aniso8601 to parse because built-in datetime doesn't parse the UTC offset properly (configured for js) modified_on_1 = aniso8601.parse_datetime(d) assert modified_on_1.microsecond == 0 assert modified_on_1.tzname() == "+00:00" time.sleep(3) with open(os.path.join(ds.root_dir, '.gigantum', 'dummy.txt'), 'wt') as testfile: testfile.write("asdfasdf") ds.git.add_all() ds.git.commit("testing") r = fixture_working_dir_dataset_populated_scoped[2].execute(modified_query) assert 'errors' not in r d = r['data']['dataset']['modifiedOnUtc'] modified_on_2 = aniso8601.parse_datetime(d) assert modified_on_2.microsecond == 0 assert modified_on_2.tzname() == "+00:00" assert (datetime.datetime.now(datetime.timezone.utc) - modified_on_1).total_seconds() < 10 assert (datetime.datetime.now(datetime.timezone.utc) - modified_on_2).total_seconds() < 10 assert modified_on_2 > modified_on_1
def test_delete_dataset(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="test", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) root_dir = ds.root_dir assert os.path.exists(root_dir) is True m = Manifest(ds, 'test') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "dfg") assert os.path.exists(os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt")) is True assert os.path.exists(os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt")) is True inv_manager.delete_dataset("test", "test", "dataset1") assert os.path.exists(root_dir) is False assert os.path.exists(m.cache_mgr.cache_root) is False cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1) assert os.path.exists(cache_base) is True
def test_get_dataset_create_date(self, fixture_working_dir_dataset_populated_scoped): """Test getting a dataset's create date""" im = InventoryManager(fixture_working_dir_dataset_populated_scoped[0]) ds = im.create_dataset("default", "default", "create-on-test-ds", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="default", email="*****@*****.**")) query = """ { dataset(name: "create-on-test-ds", owner: "default") { createdOnUtc } } """ r = fixture_working_dir_dataset_populated_scoped[2].execute(query) assert 'errors' not in r d = r['data']['dataset']['createdOnUtc'] # using aniso8601 to parse because built-in datetime doesn't parse the UTC offset properly (configured for js) create_on = aniso8601.parse_datetime(d) assert create_on.microsecond == 0 assert create_on.tzname() == "+00:00" assert (datetime.datetime.now(datetime.timezone.utc) - create_on).total_seconds() < 5
def test_get_recent_activity(self, fixture_working_dir, snapshot, fixture_test_file): """Test paging through activity records""" im = InventoryManager(fixture_working_dir[0]) lb = im.create_labbook("default", "default", "labbook11", description="my test description", author=GitAuthor(name="tester", email="*****@*****.**")) FileOperations.insert_file(lb, "code", fixture_test_file) # fake activity store = ActivityStore(lb) adr1 = ActivityDetailRecord(ActivityDetailType.CODE) adr1.show = False adr1.importance = 100 adr1.add_value("text/plain", "first") ar = ActivityRecord(ActivityType.CODE, show=False, message="ran some code", importance=50, linked_commit="asdf") ar.add_detail_object(adr1) # Create Activity Record store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt') FileOperations.makedir(lb, "input/test") open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test") FileOperations.makedir(lb, "input/test2") open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test2") store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "output", '/tmp/test_file.txt') # Get all records at once with no pagination args and verify cursors look OK directly query = """ { labbook(owner: "default", name: "labbook11") { overview { recentActivity { message type show importance tags } } } } """ snapshot.assert_match(fixture_working_dir[2].execute(query))