Beispiel #1
0
    def test_local_download(self, container: AttributeContainer):
        first, second = self.gen_key(), self.gen_key()
        filename, filepath = fake.unique.file_name(), fake.unique.file_path(
            depth=3).lstrip("/")

        with tmp_context() as tmp:
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            # Relative path
            container[first].track_files(filename)
            # Absolute path
            container[second].track_files(tmp)

            container.sync()

            with tmp_context():
                with with_check_if_file_appears(f"artifacts/{filename}"):
                    container[first].download("artifacts/")

                with with_check_if_file_appears(filepath):
                    container[second].download()
Beispiel #2
0
    def test_local_existing(self, container: AttributeContainer):
        first, second = self.gen_key(), self.gen_key()
        filename, filepath = fake.file_name(), fake.file_path(
            depth=3).lstrip("/")

        with tmp_context() as tmp:
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            # Track all files - "a" and "b" to first artifact
            container[first].track_files(".")

            # Track only the "a" file to second artifact
            container[second].track_files(f"file://{tmp}/{filename}")
            container.sync()

            # Add "b" file to existing second artifact
            # so it should be now identical as first
            container[second].track_files(filepath,
                                          destination=str(
                                              Path(filepath).parent))
            container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
Beispiel #3
0
    def test_s3_creation(self, container: AttributeContainer, bucket,
                         environment):
        first, second, prefix = (
            self.gen_key(),
            self.gen_key(),
            f"{environment.project}/{self.gen_key()}/{type(container).__name__}",
        )
        filename = fake.unique.file_name()

        bucket_name, s3_client = bucket

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            s3_client.meta.client.upload_file(filename, bucket_name,
                                              f"{prefix}/{filename}")

        container[first].track_files(f"s3://{bucket_name}/{prefix}/{filename}")
        container[second].track_files(f"s3://{bucket_name}/{prefix}")

        container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
Beispiel #4
0
    def test_s3_download(self, container: AttributeContainer, bucket,
                         environment):
        first = self.gen_key()
        prefix = f"{environment.project}/{self.gen_key()}/{type(container).__name__}"
        filename, filepath = fake.unique.file_name(), fake.unique.file_path(
            depth=3).lstrip("/")

        bucket_name, s3_client = bucket

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            s3_client.meta.client.upload_file(filename, bucket_name,
                                              f"{prefix}/{filename}")
            s3_client.meta.client.upload_file(filepath, bucket_name,
                                              f"{prefix}/{filepath}")

        container[first].track_files(f"s3://{bucket_name}/{prefix}")

        container.sync()

        with tempfile.TemporaryDirectory() as tmp:
            with with_check_if_file_appears(f"{tmp}/{filename}"):
                container[first].download(tmp)

        with tmp_context():
            with with_check_if_file_appears(filename):
                container[first].download()
Beispiel #5
0
    def test_simple_assign_and_fetch(self, container: AttributeContainer,
                                     value):
        key = self.gen_key()

        container[key] = value
        container.sync()
        assert container[key].fetch() == value
Beispiel #6
0
    def test_do_not_accept_non_tag_path(self, container: AttributeContainer):
        random_path = "some/path"
        container[random_path].add(fake.unique.word())
        container.sync()

        with pytest.raises(AttributeError):
            # backends accepts `'sys/tags'` only
            container[random_path].fetch()
Beispiel #7
0
    def test_simple_assign_datetime(self, container: AttributeContainer):
        key = self.gen_key()
        now = datetime.now()

        container[key] = now
        container.sync()

        # expect truncate to milliseconds and add UTC timezone
        expected_now = now.astimezone(timezone.utc).replace(
            microsecond=int(now.microsecond / 1000) * 1000)
        assert container[key].fetch() == expected_now
Beispiel #8
0
    def test_delete_atom(self, container: AttributeContainer):
        key = self.gen_key()
        value = fake.name()

        container[key] = value
        container.sync()

        assert container[key].fetch() == value

        del container[key]
        with pytest.raises(AttributeError):
            container[key].fetch()
    def test_log_strings(self, container: AttributeContainer):
        key = self.gen_key()
        values = [fake.word() for _ in range(50)]

        container[key].log(values[0])
        container[key].log(values[1:])
        container.sync()

        assert container[key].fetch_last() == values[-1]

        fetched_values = container[key].fetch_values()
        assert list(fetched_values["value"]) == values
Beispiel #10
0
    def test_distinct_types(self, container: AttributeContainer):
        namespace = self.gen_key()
        key = f"{fake.unique.word()}/{fake.unique.word()}"
        value = random.randint(0, 100)

        container[namespace] = {f"{key}": value}
        container.sync()

        assert container[f"{namespace}/{key}"].fetch() == value

        new_value = fake.name()

        with pytest.raises(ValueError):
            container[namespace] = {f"{key}": new_value}
            container.sync()
Beispiel #11
0
    def test_assignment(self, container: AttributeContainer):
        first, second = self.gen_key(), self.gen_key()
        filename = fake.unique.file_name()

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            container[first].track_files(filename)
            container.wait()
            container[second] = container[first].fetch()
            container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
Beispiel #12
0
    def fetch(self) -> dict:
        """Fetch values of all non-File Atom fields as a dictionary.
        The result will preserve the hierarchical structure of the run's metadata, but will contain only non-File Atom
        fields.
        You can use this method to quickly retrieve previous run's parameters.

        Returns:
            `dict` containing all non-File Atom fields values.

        Examples:
            >>> import neptune.new as neptune
            >>> resumed_run = neptune.init(run="HEL-3")
            >>> params = resumed_run['model/parameters'].fetch()

            >>> run_data = resumed_run.fetch()

            >>> print(run_data)
            >>> # this will print out all Atom attributes stored in run as a dict

        You may also want to check `fetch docs page`_.

        .. _fetch docs page:
            https://docs.neptune.ai/api-reference/run#.fetch
        """
        return AttributeContainer.fetch(self)
Beispiel #13
0
    def sync(self, wait: bool = True) -> None:
        """Synchronizes local representation of the run with Neptune servers.

        Args:
            wait (bool, optional, default is True): If `True` the process will only wait for data to be saved
                locally from memory, but will not wait for them to reach Neptune servers.
                Defaults to `True`.

        Examples:
            >>> import neptune.new as neptune

            >>> # Connect to a run from Worker #3
            ... worker_id = 3
            >>> run = neptune.init(run='DIST-43', monitoring_namespace='monitoring/{}'.format(worker_id))

            >>> # Try to access logs that were created in meantime by Worker #2
            ... worker_2_status = run['status/2'].fetch() # Error if this field was created after this script starts

            >>> run.sync() # Synchronizes local representation with Neptune servers.
            >>> worker_2_status = run['status/2'].fetch() # No error

        You may also want to check `sync docs page`_.

        .. _sync docs page:
            https://docs.neptune.ai/api-reference/run#.sync
        """
        return AttributeContainer.sync(self, wait=wait)
Beispiel #14
0
    def pop(self, path: str, wait: bool = False) -> None:
        """Removes the field or whole namespace stored under the path completely and all data associated with them.

        Args:
            path (str): Path of the field or namespace to be removed.
            wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server.
                This makes the call synchronous. Defaults to `False`.

        Examples:
            >>> import neptune.new as neptune
            >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT")

            >>> # Delete a field along with it's data
            ... project.pop("datasets/v0.4")

            >>> # .pop() can be invoked directly on fields and namespaces

            >>> project['parameters/learning_rate'] = 0.3

            >>> # Following line
            ... project.pop("datasets/v0.4")
            >>> # is equiavlent to this line
            ... project["datasets/v0.4"].pop()
            >>> # or this line
            ... project["datasets"].pop("v0.4")

            >>> # You can also delete in batch whole namespace
            ... project["datasets"].pop()

        You may also want to check `pop docs page`_.

        .. _pop docs page:
           https://docs.neptune.ai/api-reference/project#.pop
        """
        return AttributeContainer.pop(self, path=path, wait=wait)
Beispiel #15
0
    def pop(self, path: str, wait: bool = False) -> None:
        """Removes the field stored under the path completely and all data associated with it.

        Args:
            path (str): Path of the field to be removed.
            wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server.
                This makes the call synchronous. Defaults to `True`.

        Examples:
            >>> import neptune.new as neptune
            >>> run = neptune.init()

            >>> run['parameters/learninggg_rata'] = 0.3

            >>> # Delete a field along with it's data
            ... run.pop('parameters/learninggg_rata')

            >>> run['parameters/learning_rate'] = 0.3

            >>> # Training finished
            ... run['trained_model'].upload('model.pt')
            >>> # 'model_checkpoint' is a File field
            ... run.pop('model_checkpoint')

        You may also want to check `pop docs page`_.

        .. _pop docs page:
           https://docs.neptune.ai/api-reference/run#.pop
        """
        return AttributeContainer.pop(self, path=path, wait=wait)
Beispiel #16
0
    def assign(self, value, wait: bool = False) -> None:
        """Assign values to multiple fields from a dictionary.
        You can use this method to quickly log all run's parameters.

        Args:
            value (dict): A dictionary with values to assign, where keys become the paths of the fields.
                The dictionary can be nested - in such case the path will be a combination of all keys.
            wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server.
                This makes the call synchronous. Defaults to `False`.

        Examples:
            >>> import neptune.new as neptune
            >>> run = neptune.init_run()

            >>> # Assign multiple fields from a dictionary
            ... params = {"max_epochs": 10, "optimizer": "Adam"}
            >>> run["parameters"] = params

            >>> # You can always log explicitly parameters one by one
            ... run["parameters/max_epochs"] = 10
            >>> run["parameters/optimizer"] = "Adam"

            >>> # Dictionaries can be nested
            ... params = {"train": {"max_epochs": 10}}
            >>> run["parameters"] = params
            >>> # This will log 10 under path "parameters/train/max_epochs"

        You may also want to check `assign docs page`_.

        .. _assign docs page:
            https://docs.neptune.ai/api-reference/run#.assign
        """
        return AttributeContainer.assign(self, value=value, wait=wait)
Beispiel #17
0
    def assign(self, value, wait: bool = False) -> None:
        """Assign values to multiple fields from a dictionary.
        You can use this method to log multiple pieces of information with one command.

        Args:
            value (dict): A dictionary with values to assign, where keys become the paths of the fields.
                The dictionary can be nested - in such case the path will be a combination of all keys.
            wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server.
                This makes the call synchronous. Defaults to `False`.

        Examples:
            >>> import neptune.new as neptune
            >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT")

            >>> # Assign multiple fields from a dictionary
            ... general_info = {"brief": URL_TO_PROJECT_BRIEF, "deadline": "2049-06-30"}
            >>> project["general"] = general_info

            >>> # You can always log explicitly parameters one by one
            ... project["general/brief"] = URL_TO_PROJECT_BRIEF
            >>> project["general/deadline"] = "2049-06-30"

            >>> # Dictionaries can be nested
            ... general_info = {"brief": {"url": URL_TO_PROJECT_BRIEF}}
            >>> project["general"] = general_info
            >>> # This will log the url under path "general/brief/url"

        You may also want to check `assign docs page`_.

        .. _assign docs page:
            https://docs.neptune.ai/api-reference/project#.assign
        """
        return AttributeContainer.assign(self, value=value, wait=wait)
Beispiel #18
0
    def stop(self, seconds: Optional[Union[float, int]] = None) -> None:
        """Stops the tracked run and kills the synchronization thread.

        `.stop()` will be automatically called when a script that created the run finishes or on the destruction
        of Neptune context.

        When using Neptune with Jupyter notebooks it's a good practice to stop the tracked run manually as it
        will be stopped automatically only when the Jupyter kernel stops.

        Args:
            seconds (int or float, optional): Seconds to wait for all tracking calls to finish
                before stopping the tracked run.
                If `None` will wait for all tracking calls to finish. Defaults to `True`.

        Examples:
            If you are creating tracked runs from the script you don't need to call `.stop()`:

            >>> import neptune.new as neptune
            >>> run = neptune.init()

            >>> # Your training or monitoring code
            ... pass
            ... # If you are executing Python script .stop()
            ... # is automatically called at the end for every run

            If you are performing multiple training jobs from one script one after the other it is a good practice
            to `.stop()` the finished tracked runs as every open run keeps an open connection with Neptune,
            monitors hardware usage, etc. You can also use Context Managers - Neptune will automatically call `.stop()`
            on the destruction of Run context:

            >>> import neptune.new as neptune

            >>> # If you are running consecutive training jobs from the same script
            ... # stop the tracked runs manually at the end of single training job
            ... for config in configs:
            ...   run = neptune.init()
            ...   # Your training or monitoring code
            ...   pass
            ...   run.stop()

            >>> # You can also use with statement and context manager
            ... for config in configs:
            ...   with neptune.init() as run:
            ...     # Your training or monitoring code
            ...     pass
            ...     # .stop() is automatically called
            ...     # when code execution exits the with statement

        .. warning::
            If you are using Jupyter notebooks for creating your runs you need to manually invoke `.stop()` once the
            training and evaluation is done.

        You may also want to check `stop docs page`_.

        .. _stop docs page:
            https://docs.neptune.ai/api-reference/run#.stop
        """
        return AttributeContainer.stop(self, seconds=seconds)
Beispiel #19
0
    def stop(self, seconds: Optional[Union[float, int]] = None) -> None:
        """Stops the connection to the project and kills the synchronization thread.

        `.stop()` will be automatically called when a script that initialized the connection finishes
        or on the destruction of Neptune context.

        When using Neptune with Jupyter notebooks it's a good practice to stop the connection manually as it
        will be stopped automatically only when the Jupyter kernel stops.

        Args:
            seconds (int or float, optional): Seconds to wait for all tracking calls to finish
                before stopping the tracked run.
                If `None` will wait for all tracking calls to finish. Defaults to `True`.

        Examples:
            If you are initializing the connection from a script you don't need to call `.stop()`:

            >>> import neptune.new as neptune
            >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT")

            >>> # Your code
            ... pass
            ... # If you are executing Python script .stop()
            ... # is automatically called at the end for every Neptune object

            If you are initializing multiple connection from one script it is a good practice
            to .stop() the unneeded connections. You can also use Context Managers - Neptune
            will automatically call .stop() on the destruction of Project context:

            >>> import neptune.new as neptune

            >>> # If you are initializing multiple connections from the same script
            ... # stop the connection manually once not needed
            ... for project_name in projects:
            ...   project = neptune.init_project(name=project_name)
            ...   # Your code
            ...   pass
            ...   project.stop()

            >>> # You can also use with statement and context manager
            ... for project_name in projects:
            ...   with neptune.init_project(name=project_name) as project:
            ...     # Your code
            ...     pass
            ...     # .stop() is automatically called
            ...     # when code execution exits the with statement

        .. warning::
            If you are using Jupyter notebooks for connecting to a project you need to manually invoke `.stop()`
            once the connection is not needed.

        You may also want to check `stop docs page`_.

        .. _stop docs page:
            https://docs.neptune.ai/api-reference/project#.stop
        """
        return AttributeContainer.stop(self, seconds=seconds)
Beispiel #20
0
    def test_s3_existing(self, container: AttributeContainer, bucket,
                         environment):
        first, second, prefix = (
            self.gen_key(),
            self.gen_key(),
            f"{environment.project}/{self.gen_key()}/{type(container).__name__}",
        )
        filename, filepath = fake.file_name(), fake.file_path(
            depth=3).lstrip("/")

        bucket_name, s3_client = bucket

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            s3_client.meta.client.upload_file(filename, bucket_name,
                                              f"{prefix}/{filename}")
            s3_client.meta.client.upload_file(filepath, bucket_name,
                                              f"{prefix}/{filepath}")

        # Track all files - "a" and "b" to first artifact
        container[first].track_files(f"s3://{bucket_name}/{prefix}/")

        # Track only the "a" file to second artifact
        container[second].track_files(
            f"s3://{bucket_name}/{prefix}/{filename}")
        container.sync()

        # Add "b" file to existing second artifact
        # so it should be now identical as first
        container[second].track_files(
            f"s3://{bucket_name}/{prefix}/{filepath}",
            destination=str(Path(filepath).parent),
        )
        container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
    def test_log_images(self, container: AttributeContainer):
        key = self.gen_key()
        # images with size between 200KB - 12MB
        images = list(generate_image(size=2**n) for n in range(8, 12))

        container[key].log(images[0])
        container[key].log(images[1:])
        container.sync()

        with tmp_context():
            container[key].download_last("last")
            container[key].download("all")

            with Image.open("last/3.png") as img:
                assert img == image_to_png(image=images[-1])

            for i in range(4):
                with Image.open(f"all/{i}.png") as img:
                    assert img == image_to_png(image=images[i])
Beispiel #22
0
    def test_single_file(self, container: AttributeContainer, file_size: int):
        key = self.gen_key()
        filename = fake.file_name()
        downloaded_filename = fake.file_name()

        with tmp_context():
            # create 10MB file
            with open(filename, "wb") as file:
                file.write(b"\0" * file_size)
            container[key].upload(filename)

            container.sync()
            container[key].download(downloaded_filename)

            assert os.path.getsize(downloaded_filename) == file_size
            with open(downloaded_filename, "rb") as file:
                content = file.read()
                assert len(content) == file_size
                assert content == b"\0" * file_size
Beispiel #23
0
    def test_delete_namespace(self, container: AttributeContainer):
        namespace = fake.unique.word()
        key1 = fake.unique.word()
        key2 = fake.unique.word()
        value1 = fake.name()
        value2 = fake.name()

        container[namespace][key1] = value1
        container[namespace][key2] = value2
        container.sync()

        assert container[namespace][key1].fetch() == value1
        assert container[namespace][key2].fetch() == value2

        del container[namespace]
        with pytest.raises(AttributeError):
            container[namespace][key1].fetch()
        with pytest.raises(AttributeError):
            container[namespace][key2].fetch()
Beispiel #24
0
    def test_add_and_remove_tags(self, container: AttributeContainer):
        remaining_tag1 = fake.unique.word()
        remaining_tag2 = fake.unique.word()
        to_remove_tag1 = fake.unique.word()
        to_remove_tag2 = fake.unique.word()

        container.sync()
        if container.exists(self.neptune_tags_path):
            container[self.neptune_tags_path].clear()
        container[self.neptune_tags_path].add(remaining_tag1)
        container[self.neptune_tags_path].add([to_remove_tag1, remaining_tag2])
        container[self.neptune_tags_path].remove(to_remove_tag1)
        container[self.neptune_tags_path].remove(
            to_remove_tag2)  # remove non existing tag
        container.sync()

        assert container[self.neptune_tags_path].fetch() == {
            remaining_tag1,
            remaining_tag2,
        }
Beispiel #25
0
    def sync(self, wait: bool = True) -> None:
        """Synchronizes local representation of the project with Neptune servers.

        Args:
            wait (bool, optional, default is True): If `True` the process will only wait for data to be saved
                locally from memory, but will not wait for them to reach Neptune servers.
                Defaults to `True`.

        You may also want to check `sync docs page`_.

        .. _sync docs page:
            https://docs.neptune.ai/api-reference/project#.sync
        """
        return AttributeContainer.sync(self, wait=wait)
Beispiel #26
0
    def wait(self, disk_only=False) -> None:
        """Wait for all the tracking calls to finish.

        Args:
            disk_only (bool, optional, default is False): If `True` the process will only wait for data to be saved
                locally from memory, but will not wait for them to reach Neptune servers.
                Defaults to `False`.

        You may also want to check `wait docs page`_.

        .. _wait docs page:
            https://docs.neptune.ai/api-reference/project#.wait
        """
        return AttributeContainer.wait(self, disk_only=disk_only)
Beispiel #27
0
    def get_structure(self) -> Dict[str, Any]:
        """Returns a run's metadata structure in form of a dictionary.

        This method can be used to traverse the run's metadata structure programmatically
        when using Neptune in automated workflows.

        .. danger::
            The returned object is a deep copy of an internal run's structure.

        Returns:
            ``dict``: with the run's metadata structure.

        """
        return AttributeContainer.get_structure(self)
Beispiel #28
0
    def get_structure(self) -> Dict[str, Any]:
        """Returns a project's metadata structure in form of a dictionary.

        This method can be used to traverse the project's metadata structure programmatically
        when using Neptune in automated workflows.

        .. danger::
            The returned object is a shallow copy of an internal structure.
            Any modifications to it may result in tracking malfunction.

        Returns:
            ``dict``: with the project's metadata structure.

        """
        return AttributeContainer.get_structure(self)
Beispiel #29
0
    def fetch(self) -> dict:
        """Fetch values of all non-File Atom fields as a dictionary.
        The result will preserve the hierarchical structure of the projects's metadata
        but will contain only non-File Atom fields.

        Returns:
            `dict` containing all non-File Atom fields values.

        Examples:
            >>> import neptune.new as neptune
            >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT")

            >>> # Fetch all the project metrics
            >>> project_metrics = project["metrics"].fetch()

        You may also want to check `fetch docs page`_.

        .. _fetch docs page:
            https://docs.neptune.ai/api-reference/project#.fetch
        """
        return AttributeContainer.fetch(self)
Beispiel #30
0
    def test_reassigning(self, container: AttributeContainer):
        namespace = self.gen_key()
        key = f"{fake.unique.word()}/{fake.unique.word()}"
        value = fake.name()

        # Assign a namespace
        container[namespace] = {f"{key}": value}
        container.sync()

        assert container[f"{namespace}/{key}"].fetch() == value

        # Direct reassign internal value
        value = fake.name()
        container[f"{namespace}/{key}"] = value
        container.sync()

        assert container[f"{namespace}/{key}"].fetch() == value

        # Reassigning by namespace
        value = fake.name()
        container[namespace] = {f"{key}": value}
        container.sync()

        assert container[f"{namespace}/{key}"].fetch() == value