def test_s3_creation(self, container: AttributeContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), f"{environment.project}/{self.gen_key()}/{type(container).__name__}", ) filename = fake.unique.file_name() bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") container[first].track_files(f"s3://{bucket_name}/{prefix}/{filename}") container[second].track_files(f"s3://{bucket_name}/{prefix}") container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_s3_download(self, container: AttributeContainer, bucket, environment): first = self.gen_key() prefix = f"{environment.project}/{self.gen_key()}/{type(container).__name__}" filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3).lstrip("/") bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") s3_client.meta.client.upload_file(filepath, bucket_name, f"{prefix}/{filepath}") container[first].track_files(f"s3://{bucket_name}/{prefix}") container.sync() with tempfile.TemporaryDirectory() as tmp: with with_check_if_file_appears(f"{tmp}/{filename}"): container[first].download(tmp) with tmp_context(): with with_check_if_file_appears(filename): container[first].download()
def test_simple_assign_and_fetch(self, container: AttributeContainer, value): key = self.gen_key() container[key] = value container.sync() assert container[key].fetch() == value
def test_local_download(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3).lstrip("/") with tmp_context() as tmp: with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) # Relative path container[first].track_files(filename) # Absolute path container[second].track_files(tmp) container.sync() with tmp_context(): with with_check_if_file_appears(f"artifacts/{filename}"): container[first].download("artifacts/") with with_check_if_file_appears(filepath): container[second].download()
def test_local_existing(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.file_name(), fake.file_path( depth=3).lstrip("/") with tmp_context() as tmp: with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) # Track all files - "a" and "b" to first artifact container[first].track_files(".") # Track only the "a" file to second artifact container[second].track_files(f"file://{tmp}/{filename}") container.sync() # Add "b" file to existing second artifact # so it should be now identical as first container[second].track_files(filepath, destination=str( Path(filepath).parent)) container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_do_not_accept_non_tag_path(self, container: AttributeContainer): random_path = "some/path" container[random_path].add(fake.unique.word()) container.sync() with pytest.raises(AttributeError): # backends accepts `'sys/tags'` only container[random_path].fetch()
def test_simple_assign_datetime(self, container: AttributeContainer): key = self.gen_key() now = datetime.now() container[key] = now container.sync() # expect truncate to milliseconds and add UTC timezone expected_now = now.astimezone(timezone.utc).replace( microsecond=int(now.microsecond / 1000) * 1000) assert container[key].fetch() == expected_now
def test_delete_atom(self, container: AttributeContainer): key = self.gen_key() value = fake.name() container[key] = value container.sync() assert container[key].fetch() == value del container[key] with pytest.raises(AttributeError): container[key].fetch()
def test_log_strings(self, container: AttributeContainer): key = self.gen_key() values = [fake.word() for _ in range(50)] container[key].log(values[0]) container[key].log(values[1:]) container.sync() assert container[key].fetch_last() == values[-1] fetched_values = container[key].fetch_values() assert list(fetched_values["value"]) == values
def test_distinct_types(self, container: AttributeContainer): namespace = self.gen_key() key = f"{fake.unique.word()}/{fake.unique.word()}" value = random.randint(0, 100) container[namespace] = {f"{key}": value} container.sync() assert container[f"{namespace}/{key}"].fetch() == value new_value = fake.name() with pytest.raises(ValueError): container[namespace] = {f"{key}": new_value} container.sync()
def sync(self, wait: bool = True) -> None: """Synchronizes local representation of the run with Neptune servers. Args: wait (bool, optional, default is True): If `True` the process will only wait for data to be saved locally from memory, but will not wait for them to reach Neptune servers. Defaults to `True`. Examples: >>> import neptune.new as neptune >>> # Connect to a run from Worker #3 ... worker_id = 3 >>> run = neptune.init(run='DIST-43', monitoring_namespace='monitoring/{}'.format(worker_id)) >>> # Try to access logs that were created in meantime by Worker #2 ... worker_2_status = run['status/2'].fetch() # Error if this field was created after this script starts >>> run.sync() # Synchronizes local representation with Neptune servers. >>> worker_2_status = run['status/2'].fetch() # No error You may also want to check `sync docs page`_. .. _sync docs page: https://docs.neptune.ai/api-reference/run#.sync """ return AttributeContainer.sync(self, wait=wait)
def test_assignment(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename = fake.unique.file_name() with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) container[first].track_files(filename) container.wait() container[second] = container[first].fetch() container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_s3_existing(self, container: AttributeContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), f"{environment.project}/{self.gen_key()}/{type(container).__name__}", ) filename, filepath = fake.file_name(), fake.file_path( depth=3).lstrip("/") bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") s3_client.meta.client.upload_file(filepath, bucket_name, f"{prefix}/{filepath}") # Track all files - "a" and "b" to first artifact container[first].track_files(f"s3://{bucket_name}/{prefix}/") # Track only the "a" file to second artifact container[second].track_files( f"s3://{bucket_name}/{prefix}/{filename}") container.sync() # Add "b" file to existing second artifact # so it should be now identical as first container[second].track_files( f"s3://{bucket_name}/{prefix}/{filepath}", destination=str(Path(filepath).parent), ) container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_log_images(self, container: AttributeContainer): key = self.gen_key() # images with size between 200KB - 12MB images = list(generate_image(size=2**n) for n in range(8, 12)) container[key].log(images[0]) container[key].log(images[1:]) container.sync() with tmp_context(): container[key].download_last("last") container[key].download("all") with Image.open("last/3.png") as img: assert img == image_to_png(image=images[-1]) for i in range(4): with Image.open(f"all/{i}.png") as img: assert img == image_to_png(image=images[i])
def test_single_file(self, container: AttributeContainer, file_size: int): key = self.gen_key() filename = fake.file_name() downloaded_filename = fake.file_name() with tmp_context(): # create 10MB file with open(filename, "wb") as file: file.write(b"\0" * file_size) container[key].upload(filename) container.sync() container[key].download(downloaded_filename) assert os.path.getsize(downloaded_filename) == file_size with open(downloaded_filename, "rb") as file: content = file.read() assert len(content) == file_size assert content == b"\0" * file_size
def test_delete_namespace(self, container: AttributeContainer): namespace = fake.unique.word() key1 = fake.unique.word() key2 = fake.unique.word() value1 = fake.name() value2 = fake.name() container[namespace][key1] = value1 container[namespace][key2] = value2 container.sync() assert container[namespace][key1].fetch() == value1 assert container[namespace][key2].fetch() == value2 del container[namespace] with pytest.raises(AttributeError): container[namespace][key1].fetch() with pytest.raises(AttributeError): container[namespace][key2].fetch()
def test_add_and_remove_tags(self, container: AttributeContainer): remaining_tag1 = fake.unique.word() remaining_tag2 = fake.unique.word() to_remove_tag1 = fake.unique.word() to_remove_tag2 = fake.unique.word() container.sync() if container.exists(self.neptune_tags_path): container[self.neptune_tags_path].clear() container[self.neptune_tags_path].add(remaining_tag1) container[self.neptune_tags_path].add([to_remove_tag1, remaining_tag2]) container[self.neptune_tags_path].remove(to_remove_tag1) container[self.neptune_tags_path].remove( to_remove_tag2) # remove non existing tag container.sync() assert container[self.neptune_tags_path].fetch() == { remaining_tag1, remaining_tag2, }
def sync(self, wait: bool = True) -> None: """Synchronizes local representation of the project with Neptune servers. Args: wait (bool, optional, default is True): If `True` the process will only wait for data to be saved locally from memory, but will not wait for them to reach Neptune servers. Defaults to `True`. You may also want to check `sync docs page`_. .. _sync docs page: https://docs.neptune.ai/api-reference/project#.sync """ return AttributeContainer.sync(self, wait=wait)
def test_reassigning(self, container: AttributeContainer): namespace = self.gen_key() key = f"{fake.unique.word()}/{fake.unique.word()}" value = fake.name() # Assign a namespace container[namespace] = {f"{key}": value} container.sync() assert container[f"{namespace}/{key}"].fetch() == value # Direct reassign internal value value = fake.name() container[f"{namespace}/{key}"] = value container.sync() assert container[f"{namespace}/{key}"].fetch() == value # Reassigning by namespace value = fake.name() container[namespace] = {f"{key}": value} container.sync() assert container[f"{namespace}/{key}"].fetch() == value
def test_fileset(self, container: AttributeContainer): key = self.gen_key() large_filesize = 10 * 2 ** 20 large_filename = fake.file_name() small_files = [ (f"{uuid.uuid4()}.{fake.file_extension()}", fake.sentence().encode("utf-8")) for _ in range(100) ] with tmp_context(): # create single large file (multipart) and a lot of very small files with open(large_filename, "wb") as file: file.write(b"\0" * large_filesize) for filename, contents in small_files: with open(filename, "wb") as file: file.write(contents) small_filenames = [filename for filename, _ in small_files] # make sure there are no duplicates assert len({large_filename, *small_filenames}) == len(small_files) + 1 # when one file as fileset uploaded container[key].upload_files([large_filename]) # then check if will be downloaded container.sync() container[key].download("downloaded1.zip") with ZipFile("downloaded1.zip") as zipped: assert set(zipped.namelist()) == {large_filename, "/"} with zipped.open(large_filename, "r") as file: content = file.read() assert len(content) == large_filesize assert content == b"\0" * large_filesize # when small files as fileset uploaded container[key].upload_files(small_filenames) # then check if everything will be downloaded container.sync() container[key].download("downloaded2.zip") with ZipFile("downloaded2.zip") as zipped: assert set(zipped.namelist()) == {large_filename, "/", *small_filenames} with zipped.open(large_filename, "r") as file: content = file.read() assert len(content) == large_filesize assert content == b"\0" * large_filesize for filename, expected_content in small_files: with zipped.open(filename, "r") as file: content = file.read() assert len(content) == len(expected_content) assert content == expected_content # when first file is removed container[key].delete_files([large_filename]) # then check if the rest will be downloaded container.sync() container[key].download("downloaded3.zip") with ZipFile("downloaded3.zip") as zipped: assert set(zipped.namelist()) == {"/", *small_filenames} for filename, expected_content in small_files: with zipped.open(filename, "r") as file: content = file.read() assert len(content) == len(expected_content) assert content == expected_content