def test_local_download(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3).lstrip("/") with tmp_context() as tmp: with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) # Relative path container[first].track_files(filename) # Absolute path container[second].track_files(tmp) container.sync() with tmp_context(): with with_check_if_file_appears(f"artifacts/{filename}"): container[first].download("artifacts/") with with_check_if_file_appears(filepath): container[second].download()
def test_local_existing(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.file_name(), fake.file_path( depth=3).lstrip("/") with tmp_context() as tmp: with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) # Track all files - "a" and "b" to first artifact container[first].track_files(".") # Track only the "a" file to second artifact container[second].track_files(f"file://{tmp}/{filename}") container.sync() # Add "b" file to existing second artifact # so it should be now identical as first container[second].track_files(filepath, destination=str( Path(filepath).parent)) container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_s3_creation(self, container: AttributeContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), f"{environment.project}/{self.gen_key()}/{type(container).__name__}", ) filename = fake.unique.file_name() bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") container[first].track_files(f"s3://{bucket_name}/{prefix}/{filename}") container[second].track_files(f"s3://{bucket_name}/{prefix}") container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_s3_download(self, container: AttributeContainer, bucket, environment): first = self.gen_key() prefix = f"{environment.project}/{self.gen_key()}/{type(container).__name__}" filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3).lstrip("/") bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") s3_client.meta.client.upload_file(filepath, bucket_name, f"{prefix}/{filepath}") container[first].track_files(f"s3://{bucket_name}/{prefix}") container.sync() with tempfile.TemporaryDirectory() as tmp: with with_check_if_file_appears(f"{tmp}/{filename}"): container[first].download(tmp) with tmp_context(): with with_check_if_file_appears(filename): container[first].download()
def test_simple_assign_and_fetch(self, container: AttributeContainer, value): key = self.gen_key() container[key] = value container.sync() assert container[key].fetch() == value
def test_do_not_accept_non_tag_path(self, container: AttributeContainer): random_path = "some/path" container[random_path].add(fake.unique.word()) container.sync() with pytest.raises(AttributeError): # backends accepts `'sys/tags'` only container[random_path].fetch()
def test_simple_assign_datetime(self, container: AttributeContainer): key = self.gen_key() now = datetime.now() container[key] = now container.sync() # expect truncate to milliseconds and add UTC timezone expected_now = now.astimezone(timezone.utc).replace( microsecond=int(now.microsecond / 1000) * 1000) assert container[key].fetch() == expected_now
def test_delete_atom(self, container: AttributeContainer): key = self.gen_key() value = fake.name() container[key] = value container.sync() assert container[key].fetch() == value del container[key] with pytest.raises(AttributeError): container[key].fetch()
def test_log_strings(self, container: AttributeContainer): key = self.gen_key() values = [fake.word() for _ in range(50)] container[key].log(values[0]) container[key].log(values[1:]) container.sync() assert container[key].fetch_last() == values[-1] fetched_values = container[key].fetch_values() assert list(fetched_values["value"]) == values
def test_distinct_types(self, container: AttributeContainer): namespace = self.gen_key() key = f"{fake.unique.word()}/{fake.unique.word()}" value = random.randint(0, 100) container[namespace] = {f"{key}": value} container.sync() assert container[f"{namespace}/{key}"].fetch() == value new_value = fake.name() with pytest.raises(ValueError): container[namespace] = {f"{key}": new_value} container.sync()
def test_assignment(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename = fake.unique.file_name() with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) container[first].track_files(filename) container.wait() container[second] = container[first].fetch() container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def fetch(self) -> dict: """Fetch values of all non-File Atom fields as a dictionary. The result will preserve the hierarchical structure of the run's metadata, but will contain only non-File Atom fields. You can use this method to quickly retrieve previous run's parameters. Returns: `dict` containing all non-File Atom fields values. Examples: >>> import neptune.new as neptune >>> resumed_run = neptune.init(run="HEL-3") >>> params = resumed_run['model/parameters'].fetch() >>> run_data = resumed_run.fetch() >>> print(run_data) >>> # this will print out all Atom attributes stored in run as a dict You may also want to check `fetch docs page`_. .. _fetch docs page: https://docs.neptune.ai/api-reference/run#.fetch """ return AttributeContainer.fetch(self)
def sync(self, wait: bool = True) -> None: """Synchronizes local representation of the run with Neptune servers. Args: wait (bool, optional, default is True): If `True` the process will only wait for data to be saved locally from memory, but will not wait for them to reach Neptune servers. Defaults to `True`. Examples: >>> import neptune.new as neptune >>> # Connect to a run from Worker #3 ... worker_id = 3 >>> run = neptune.init(run='DIST-43', monitoring_namespace='monitoring/{}'.format(worker_id)) >>> # Try to access logs that were created in meantime by Worker #2 ... worker_2_status = run['status/2'].fetch() # Error if this field was created after this script starts >>> run.sync() # Synchronizes local representation with Neptune servers. >>> worker_2_status = run['status/2'].fetch() # No error You may also want to check `sync docs page`_. .. _sync docs page: https://docs.neptune.ai/api-reference/run#.sync """ return AttributeContainer.sync(self, wait=wait)
def pop(self, path: str, wait: bool = False) -> None: """Removes the field or whole namespace stored under the path completely and all data associated with them. Args: path (str): Path of the field or namespace to be removed. wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. This makes the call synchronous. Defaults to `False`. Examples: >>> import neptune.new as neptune >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") >>> # Delete a field along with it's data ... project.pop("datasets/v0.4") >>> # .pop() can be invoked directly on fields and namespaces >>> project['parameters/learning_rate'] = 0.3 >>> # Following line ... project.pop("datasets/v0.4") >>> # is equiavlent to this line ... project["datasets/v0.4"].pop() >>> # or this line ... project["datasets"].pop("v0.4") >>> # You can also delete in batch whole namespace ... project["datasets"].pop() You may also want to check `pop docs page`_. .. _pop docs page: https://docs.neptune.ai/api-reference/project#.pop """ return AttributeContainer.pop(self, path=path, wait=wait)
def pop(self, path: str, wait: bool = False) -> None: """Removes the field stored under the path completely and all data associated with it. Args: path (str): Path of the field to be removed. wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. This makes the call synchronous. Defaults to `True`. Examples: >>> import neptune.new as neptune >>> run = neptune.init() >>> run['parameters/learninggg_rata'] = 0.3 >>> # Delete a field along with it's data ... run.pop('parameters/learninggg_rata') >>> run['parameters/learning_rate'] = 0.3 >>> # Training finished ... run['trained_model'].upload('model.pt') >>> # 'model_checkpoint' is a File field ... run.pop('model_checkpoint') You may also want to check `pop docs page`_. .. _pop docs page: https://docs.neptune.ai/api-reference/run#.pop """ return AttributeContainer.pop(self, path=path, wait=wait)
def assign(self, value, wait: bool = False) -> None: """Assign values to multiple fields from a dictionary. You can use this method to quickly log all run's parameters. Args: value (dict): A dictionary with values to assign, where keys become the paths of the fields. The dictionary can be nested - in such case the path will be a combination of all keys. wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. This makes the call synchronous. Defaults to `False`. Examples: >>> import neptune.new as neptune >>> run = neptune.init_run() >>> # Assign multiple fields from a dictionary ... params = {"max_epochs": 10, "optimizer": "Adam"} >>> run["parameters"] = params >>> # You can always log explicitly parameters one by one ... run["parameters/max_epochs"] = 10 >>> run["parameters/optimizer"] = "Adam" >>> # Dictionaries can be nested ... params = {"train": {"max_epochs": 10}} >>> run["parameters"] = params >>> # This will log 10 under path "parameters/train/max_epochs" You may also want to check `assign docs page`_. .. _assign docs page: https://docs.neptune.ai/api-reference/run#.assign """ return AttributeContainer.assign(self, value=value, wait=wait)
def assign(self, value, wait: bool = False) -> None: """Assign values to multiple fields from a dictionary. You can use this method to log multiple pieces of information with one command. Args: value (dict): A dictionary with values to assign, where keys become the paths of the fields. The dictionary can be nested - in such case the path will be a combination of all keys. wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. This makes the call synchronous. Defaults to `False`. Examples: >>> import neptune.new as neptune >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") >>> # Assign multiple fields from a dictionary ... general_info = {"brief": URL_TO_PROJECT_BRIEF, "deadline": "2049-06-30"} >>> project["general"] = general_info >>> # You can always log explicitly parameters one by one ... project["general/brief"] = URL_TO_PROJECT_BRIEF >>> project["general/deadline"] = "2049-06-30" >>> # Dictionaries can be nested ... general_info = {"brief": {"url": URL_TO_PROJECT_BRIEF}} >>> project["general"] = general_info >>> # This will log the url under path "general/brief/url" You may also want to check `assign docs page`_. .. _assign docs page: https://docs.neptune.ai/api-reference/project#.assign """ return AttributeContainer.assign(self, value=value, wait=wait)
def stop(self, seconds: Optional[Union[float, int]] = None) -> None: """Stops the tracked run and kills the synchronization thread. `.stop()` will be automatically called when a script that created the run finishes or on the destruction of Neptune context. When using Neptune with Jupyter notebooks it's a good practice to stop the tracked run manually as it will be stopped automatically only when the Jupyter kernel stops. Args: seconds (int or float, optional): Seconds to wait for all tracking calls to finish before stopping the tracked run. If `None` will wait for all tracking calls to finish. Defaults to `True`. Examples: If you are creating tracked runs from the script you don't need to call `.stop()`: >>> import neptune.new as neptune >>> run = neptune.init() >>> # Your training or monitoring code ... pass ... # If you are executing Python script .stop() ... # is automatically called at the end for every run If you are performing multiple training jobs from one script one after the other it is a good practice to `.stop()` the finished tracked runs as every open run keeps an open connection with Neptune, monitors hardware usage, etc. You can also use Context Managers - Neptune will automatically call `.stop()` on the destruction of Run context: >>> import neptune.new as neptune >>> # If you are running consecutive training jobs from the same script ... # stop the tracked runs manually at the end of single training job ... for config in configs: ... run = neptune.init() ... # Your training or monitoring code ... pass ... run.stop() >>> # You can also use with statement and context manager ... for config in configs: ... with neptune.init() as run: ... # Your training or monitoring code ... pass ... # .stop() is automatically called ... # when code execution exits the with statement .. warning:: If you are using Jupyter notebooks for creating your runs you need to manually invoke `.stop()` once the training and evaluation is done. You may also want to check `stop docs page`_. .. _stop docs page: https://docs.neptune.ai/api-reference/run#.stop """ return AttributeContainer.stop(self, seconds=seconds)
def stop(self, seconds: Optional[Union[float, int]] = None) -> None: """Stops the connection to the project and kills the synchronization thread. `.stop()` will be automatically called when a script that initialized the connection finishes or on the destruction of Neptune context. When using Neptune with Jupyter notebooks it's a good practice to stop the connection manually as it will be stopped automatically only when the Jupyter kernel stops. Args: seconds (int or float, optional): Seconds to wait for all tracking calls to finish before stopping the tracked run. If `None` will wait for all tracking calls to finish. Defaults to `True`. Examples: If you are initializing the connection from a script you don't need to call `.stop()`: >>> import neptune.new as neptune >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") >>> # Your code ... pass ... # If you are executing Python script .stop() ... # is automatically called at the end for every Neptune object If you are initializing multiple connection from one script it is a good practice to .stop() the unneeded connections. You can also use Context Managers - Neptune will automatically call .stop() on the destruction of Project context: >>> import neptune.new as neptune >>> # If you are initializing multiple connections from the same script ... # stop the connection manually once not needed ... for project_name in projects: ... project = neptune.init_project(name=project_name) ... # Your code ... pass ... project.stop() >>> # You can also use with statement and context manager ... for project_name in projects: ... with neptune.init_project(name=project_name) as project: ... # Your code ... pass ... # .stop() is automatically called ... # when code execution exits the with statement .. warning:: If you are using Jupyter notebooks for connecting to a project you need to manually invoke `.stop()` once the connection is not needed. You may also want to check `stop docs page`_. .. _stop docs page: https://docs.neptune.ai/api-reference/project#.stop """ return AttributeContainer.stop(self, seconds=seconds)
def test_s3_existing(self, container: AttributeContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), f"{environment.project}/{self.gen_key()}/{type(container).__name__}", ) filename, filepath = fake.file_name(), fake.file_path( depth=3).lstrip("/") bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") s3_client.meta.client.upload_file(filepath, bucket_name, f"{prefix}/{filepath}") # Track all files - "a" and "b" to first artifact container[first].track_files(f"s3://{bucket_name}/{prefix}/") # Track only the "a" file to second artifact container[second].track_files( f"s3://{bucket_name}/{prefix}/{filename}") container.sync() # Add "b" file to existing second artifact # so it should be now identical as first container[second].track_files( f"s3://{bucket_name}/{prefix}/{filepath}", destination=str(Path(filepath).parent), ) container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_log_images(self, container: AttributeContainer): key = self.gen_key() # images with size between 200KB - 12MB images = list(generate_image(size=2**n) for n in range(8, 12)) container[key].log(images[0]) container[key].log(images[1:]) container.sync() with tmp_context(): container[key].download_last("last") container[key].download("all") with Image.open("last/3.png") as img: assert img == image_to_png(image=images[-1]) for i in range(4): with Image.open(f"all/{i}.png") as img: assert img == image_to_png(image=images[i])
def test_single_file(self, container: AttributeContainer, file_size: int): key = self.gen_key() filename = fake.file_name() downloaded_filename = fake.file_name() with tmp_context(): # create 10MB file with open(filename, "wb") as file: file.write(b"\0" * file_size) container[key].upload(filename) container.sync() container[key].download(downloaded_filename) assert os.path.getsize(downloaded_filename) == file_size with open(downloaded_filename, "rb") as file: content = file.read() assert len(content) == file_size assert content == b"\0" * file_size
def test_delete_namespace(self, container: AttributeContainer): namespace = fake.unique.word() key1 = fake.unique.word() key2 = fake.unique.word() value1 = fake.name() value2 = fake.name() container[namespace][key1] = value1 container[namespace][key2] = value2 container.sync() assert container[namespace][key1].fetch() == value1 assert container[namespace][key2].fetch() == value2 del container[namespace] with pytest.raises(AttributeError): container[namespace][key1].fetch() with pytest.raises(AttributeError): container[namespace][key2].fetch()
def test_add_and_remove_tags(self, container: AttributeContainer): remaining_tag1 = fake.unique.word() remaining_tag2 = fake.unique.word() to_remove_tag1 = fake.unique.word() to_remove_tag2 = fake.unique.word() container.sync() if container.exists(self.neptune_tags_path): container[self.neptune_tags_path].clear() container[self.neptune_tags_path].add(remaining_tag1) container[self.neptune_tags_path].add([to_remove_tag1, remaining_tag2]) container[self.neptune_tags_path].remove(to_remove_tag1) container[self.neptune_tags_path].remove( to_remove_tag2) # remove non existing tag container.sync() assert container[self.neptune_tags_path].fetch() == { remaining_tag1, remaining_tag2, }
def sync(self, wait: bool = True) -> None: """Synchronizes local representation of the project with Neptune servers. Args: wait (bool, optional, default is True): If `True` the process will only wait for data to be saved locally from memory, but will not wait for them to reach Neptune servers. Defaults to `True`. You may also want to check `sync docs page`_. .. _sync docs page: https://docs.neptune.ai/api-reference/project#.sync """ return AttributeContainer.sync(self, wait=wait)
def wait(self, disk_only=False) -> None: """Wait for all the tracking calls to finish. Args: disk_only (bool, optional, default is False): If `True` the process will only wait for data to be saved locally from memory, but will not wait for them to reach Neptune servers. Defaults to `False`. You may also want to check `wait docs page`_. .. _wait docs page: https://docs.neptune.ai/api-reference/project#.wait """ return AttributeContainer.wait(self, disk_only=disk_only)
def get_structure(self) -> Dict[str, Any]: """Returns a run's metadata structure in form of a dictionary. This method can be used to traverse the run's metadata structure programmatically when using Neptune in automated workflows. .. danger:: The returned object is a deep copy of an internal run's structure. Returns: ``dict``: with the run's metadata structure. """ return AttributeContainer.get_structure(self)
def get_structure(self) -> Dict[str, Any]: """Returns a project's metadata structure in form of a dictionary. This method can be used to traverse the project's metadata structure programmatically when using Neptune in automated workflows. .. danger:: The returned object is a shallow copy of an internal structure. Any modifications to it may result in tracking malfunction. Returns: ``dict``: with the project's metadata structure. """ return AttributeContainer.get_structure(self)
def fetch(self) -> dict: """Fetch values of all non-File Atom fields as a dictionary. The result will preserve the hierarchical structure of the projects's metadata but will contain only non-File Atom fields. Returns: `dict` containing all non-File Atom fields values. Examples: >>> import neptune.new as neptune >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") >>> # Fetch all the project metrics >>> project_metrics = project["metrics"].fetch() You may also want to check `fetch docs page`_. .. _fetch docs page: https://docs.neptune.ai/api-reference/project#.fetch """ return AttributeContainer.fetch(self)
def test_reassigning(self, container: AttributeContainer): namespace = self.gen_key() key = f"{fake.unique.word()}/{fake.unique.word()}" value = fake.name() # Assign a namespace container[namespace] = {f"{key}": value} container.sync() assert container[f"{namespace}/{key}"].fetch() == value # Direct reassign internal value value = fake.name() container[f"{namespace}/{key}"] = value container.sync() assert container[f"{namespace}/{key}"].fetch() == value # Reassigning by namespace value = fake.name() container[namespace] = {f"{key}": value} container.sync() assert container[f"{namespace}/{key}"].fetch() == value