Beispiel #1
0
 def __init__(
     self,
     upload_entry: UploadEntry,
     upload_configuration: AttributeUploadConfiguration,
 ):
     self.filename: str = upload_entry.target_path
     self.upload_configuration: AttributeUploadConfiguration = upload_configuration
     self.length: int = upload_entry.length()
     self.fobj: Union[BinaryIO, io.BytesIO] = upload_entry.get_stream()
     self.permissions: str = upload_entry.get_permissions()
    def log_artifact(self, artifact, destination=None):
        """Save an artifact (file) in experiment storage.

        Args:
            artifact (:obj:`str` or :obj:`IO object`):
                A path to the file in local filesystem or IO object. It can be open
                file descriptor or in-memory buffer like `io.StringIO` or `io.BytesIO`.
            destination (:obj:`str`, optional, default is ``None``):
                A destination path.
                If ``None`` is passed, an artifact file name will be used.

        Note:
            If you use in-memory buffers like `io.StringIO` or `io.BytesIO`, remember that in typical case when you
            write to such a buffer, it's current position is set to the end of the stream, so in order to read it's
            content, you need to move back it's position to the beginning.
            We recommend to call seek(0) on the in-memory buffers before passing it to Neptune.
            Additionally, if you provide `io.StringIO`, it will be encoded in 'utf-8' before sent to Neptune.

        Raises:
            `FileNotFound`: When ``artifact`` file was not found.
            `StorageLimitReached`: When storage limit in the project has been reached.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                # simple use
                experiment.log_artifact('images/wrong_prediction_1.png')

                # save file in other directory
                experiment.log_artifact('images/wrong_prediction_1.png', 'validation/images/wrong_prediction_1.png')

                # save file under different name
                experiment.log_artifact('images/wrong_prediction_1.png', 'images/my_image_1.png')
        """
        if isinstance(artifact, str):
            if os.path.exists(artifact):
                target_name = os.path.basename(artifact) if destination is None else destination
                upload_entry = UploadEntry(os.path.abspath(artifact), normalize_file_name(target_name))
            else:
                raise FileNotFound(artifact)
        elif hasattr(artifact, 'read'):
            if destination is not None:
                upload_entry = UploadEntry(artifact, normalize_file_name(destination))
            else:
                raise ValueError("destination is required for file streams")
        else:
            raise ValueError("artifact is a local path or an IO object")

        upload_to_storage(upload_entries=[upload_entry],
                          upload_api_fun=self._backend.upload_experiment_output,
                          upload_tar_api_fun=self._backend.extract_experiment_output,
                          experiment=self)
    def test_split_upload_files_should_not_generate_empty_packages(self, getsize):
        # GIVEN
        entry = UploadEntry("/tmp/test.gz", "test.gz")
        # AND
        upload_entry = UploadEntry(entry.source_path, entry.target_path)
        size = 10 * self.MAX_PACKAGE_SIZE
        getsize.return_value = size

        # EXPECT
        expected = UploadPackage()
        expected.update(entry, size)
        for package in split_upload_files([upload_entry], max_package_size=self.MAX_PACKAGE_SIZE):
            self.assertFalse(package.is_empty())
Beispiel #4
0
    def test_split_upload_files_should_not_generate_empty_packages(
            self, getsize):
        # GIVEN
        entry = UploadEntry("/tmp/test.gz", "test.gz")
        # AND
        upload_entry = UploadEntry(entry.source_path, entry.target_path)
        size = 10 * self.MAX_PACKAGE_SIZE
        config = AttributeUploadConfiguration(size)
        getsize.return_value = size

        # EXPECT
        expected = UploadPackage()
        expected.update(entry, size)
        for package in split_upload_files(upload_entries={upload_entry},
                                          upload_configuration=config):
            self.assertFalse(package.is_empty())
Beispiel #5
0
    def test_permissions_to_unix_string_for_directory(self, lstat):
        # given
        lstat.return_value.st_mode = 0o642

        # when
        permissions_string = UploadEntry.permissions_to_unix_string("/some/path")

        # then
        self.assertEqual("drw-r---w-", permissions_string)
Beispiel #6
0
    def test_permissions_to_unix_string_for_file(self, lstat):
        # given
        lstat.return_value.st_mode = 0o731

        # when
        permissions_string = UploadEntry.permissions_to_unix_string("/some/path")

        # then
        self.assertEqual("-rwx-wx--x", permissions_string)
    def test_split_upload_files_should_generate_upload_files_list_for_only_one_file(self, getsize):
        # GIVEN
        entry = UploadEntry("/tmp/test.gz", "test.gz")
        size = 10 * self.MAX_PACKAGE_SIZE
        getsize.return_value = size

        # EXPECT
        expected = UploadPackage()
        expected.update(entry, size)
        self.assertEqual(list(split_upload_files([entry], max_package_size=self.MAX_PACKAGE_SIZE)), [expected])
Beispiel #8
0
def get_unique_upload_entries(file_globs: Iterable[str]) -> Set[UploadEntry]:
    absolute_paths = get_absolute_paths(file_globs)
    common_root = get_common_root(absolute_paths)

    upload_entries: List[UploadEntry] = []
    if common_root is not None:
        for absolute_path in absolute_paths:
            upload_entries.append(
                UploadEntry(
                    absolute_path,
                    normalize_file_name(os.path.relpath(absolute_path, common_root)),
                )
            )
    else:
        for absolute_path in absolute_paths:
            upload_entries.append(
                UploadEntry(absolute_path, normalize_file_name(absolute_path))
            )

    return scan_unique_upload_entries(upload_entries)
    def test_upload_small_sources_should_not_generate_warning(self, warning):
        # GIVEN
        entry = UploadEntry("/tmp/mocked/file", "some_file")

        # WHEN
        upload_to_storage(upload_entries=[entry],
                          upload_api_fun=MagicMock(),
                          upload_tar_api_fun=MagicMock(),
                          warn_limit=100 * 1024 * 1024)

        # THEN
        warning.assert_not_called()
Beispiel #10
0
def upload_file_attribute(
    swagger_client: SwaggerClient,
    container_id: str,
    attribute: str,
    source: Union[str, bytes],
    ext: str,
    multipart_config: Optional[MultipartConfig],
) -> List[NeptuneException]:
    if isinstance(source, str) and not os.path.isfile(source):
        return [FileUploadError(source, "Path not found or is a not a file.")]

    target = attribute
    if ext:
        target += "." + ext

    try:
        upload_entry = UploadEntry(
            source if isinstance(source, str) else BytesIO(source), target
        )
        if multipart_config is None:
            # the legacy upload procedure
            url = build_operation_url(
                swagger_client.swagger_spec.api_url,
                swagger_client.api.uploadAttribute.operation.path_name,
            )
            upload_configuration = DEFAULT_UPLOAD_CONFIG

            _upload_loop(
                file_chunk_stream=FileChunkStream(upload_entry, upload_configuration),
                http_client=swagger_client.swagger_spec.http_client,
                url=url,
                query_params={
                    "experimentId": container_id,
                    "attribute": attribute,
                    "ext": ext,
                },
            )
        else:
            _multichunk_upload(
                upload_entry,
                query_params={
                    "experimentIdentifier": container_id,
                    "attribute": attribute,
                    "ext": ext,
                },
                swagger_client=swagger_client,
                multipart_config=multipart_config,
                target=FileUploadTarget.FILE_ATOM,
            )
    except MetadataInconsistency as e:
        return [e]
    def test_upload_large_sources_should_generate_warning(self, warning):
        # GIVEN
        entry = UploadEntry("/tmp/mocked/file", "some_file")

        # WHEN
        upload_to_storage(upload_entries=[entry],
                          upload_api_fun=MagicMock(),
                          upload_tar_api_fun=MagicMock(),
                          warn_limit=100 * 1024 * 1024)

        # THEN
        warning.assert_any_call(
            'You are sending %dMB of source code to Neptune. '
            'It is pretty uncommon - please make sure it\'s what you wanted.',
            101)
        warning.assert_any_call(
            '%d MB (100%%) of source code was sent to Neptune.', 101)
Beispiel #12
0
    def test_generate_chunks_from_stream(self):
        # given
        text = u"ABCDEFGHIJKLMNOPRSTUWXYZ"

        # when
        stream = FileChunkStream(UploadEntry(StringIO(text), "some/path"))
        chunks = list()
        for chunk in stream.generate(chunk_size=10):
            chunks.append(chunk)

        # then
        self.assertEqual(stream.length, None)
        self.assertEqual(chunks, [
            FileChunk(b"ABCDEFGHIJ", 0, 10),
            FileChunk(b"KLMNOPRSTU", 10, 20),
            FileChunk(b"WXYZ", 20, 24)
        ])
Beispiel #13
0
    def test_split_upload_files_should_generate_upload_files_list_for_only_one_file(
            self, getsize):
        # GIVEN
        entry = UploadEntry("/tmp/test.gz", "test.gz")
        size = 10 * self.MAX_PACKAGE_SIZE
        config = AttributeUploadConfiguration(size)
        getsize.return_value = size

        # EXPECT
        expected = UploadPackage()
        expected.update(entry, size)
        self.assertEqual(
            list(
                split_upload_files(upload_entries={entry},
                                   upload_configuration=config)),
            [expected],
        )
    def log_artifact(self, artifact, destination=None):
        """Save an artifact (file) in experiment storage.

        Args:
            artifact (:obj:`str`): A path to the file in local filesystem.
            destination (:obj:`str`, optional, default is ``None``):
                A destination path.
                If ``None`` is passed, an artifact file name will be used.

        Raises:
            `FileNotFound`: When ``artifact`` file was not found.
            `StorageLimitReached`: When storage limit in the project has been reached.

        Example:
            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                # simple use
                experiment.log_artifact('images/wrong_prediction_1.png')

                # save file in other directory
                experiment.log_artifact('images/wrong_prediction_1.png', 'validation/images/wrong_prediction_1.png')

                # save file under different name
                experiment.log_artifact('images/wrong_prediction_1.png', 'images/my_image_1.png')
        """
        if not os.path.exists(artifact):
            raise FileNotFound(artifact)

        target_name = os.path.basename(
            artifact) if destination is None else destination

        upload_to_storage(
            upload_entries=[
                UploadEntry(os.path.abspath(artifact),
                            normalize_file_name(target_name))
            ],
            upload_api_fun=self._backend.upload_experiment_output,
            upload_tar_api_fun=self._backend.extract_experiment_output,
            experiment=self)
Beispiel #15
0
    def test_generate_chunks_from_stream(self):
        # given
        text = u"ABCDEFGHIJKLMNOPRSTUWXYZ"

        # when
        stream = FileChunkStream(
            UploadEntry(BytesIO(bytes(text, "utf-8")), "some/path"),
            AttributeUploadConfiguration(10),
        )
        chunks = list()
        for chunk in stream.generate():
            chunks.append(chunk)

        # then
        self.assertEqual(stream.length, 24)
        self.assertEqual(
            chunks,
            [
                FileChunk(b"ABCDEFGHIJ", 0, 10),
                FileChunk(b"KLMNOPRSTU", 10, 20),
                FileChunk(b"WXYZ", 20, 24),
            ],
        )
Beispiel #16
0
    def test_permissions_to_unix_string_for_nonexistent_file(self):
        # when
        permissions_string = UploadEntry.permissions_to_unix_string("/some/path")

        # then
        self.assertEqual("-" * 10, permissions_string)
Beispiel #17
0
    def create_experiment(self,
                          name=None,
                          description=None,
                          params=None,
                          properties=None,
                          tags=None,
                          upload_source_files=None,
                          abort_callback=None,
                          logger=None,
                          upload_stdout=True,
                          upload_stderr=True,
                          send_hardware_metrics=True,
                          run_monitoring_thread=True,
                          handle_uncaught_exceptions=True,
                          git_info=None,
                          hostname=None,
                          notebook_id=None,
                          notebook_path=None):
        """Create and start Neptune experiment.

        Create experiment, set its status to `running` and append it to the top of the experiments view.
        All parameters are optional, hence minimal invocation: ``neptune.create_experiment()``.

        Args:
            name (:obj:`str`, optional, default is ``'Untitled'``):
                Editable name of the experiment.
                Name is displayed in the experiment's `Details` (`Metadata` section)
                and in `experiments view` as a column.

            description (:obj:`str`, optional, default is ``''``):
                Editable description of the experiment.
                Description is displayed in the experiment's `Details` (`Metadata` section)
                and can be displayed in the `experiments view` as a column.

            params (:obj:`dict`, optional, default is ``{}``):
                Parameters of the experiment.
                After experiment creation ``params`` are read-only
                (see: :meth:`~neptune.experiments.Experiment.get_parameters`).
                Parameters are displayed in the experiment's `Details` (`Parameters` section)
                and each key-value pair can be viewed in `experiments view` as a column.

            properties (:obj:`dict`, optional, default is ``{}``):
                Properties of the experiment.
                They are editable after experiment is created.
                Properties are displayed in the experiment's `Details` (`Properties` section)
                and each key-value pair can be viewed in `experiments view` as a column.

            tags (:obj:`list`, optional, default is ``[]``):
                Must be list of :obj:`str`. Tags of the experiment.
                They are editable after experiment is created
                (see: :meth:`~neptune.experiments.Experiment.append_tag`
                and :meth:`~neptune.experiments.Experiment.remove_tag`).
                Tags are displayed in the experiment's `Details` (`Metadata` section)
                and can be viewed in `experiments view` as a column.

            upload_source_files (:obj:`list` or :obj:`str`, optional, default is ``None``):
                List of source files to be uploaded. Must be list of :obj:`str` or single :obj:`str`.
                Uploaded sources are displayed in the experiment's `Source code` tab.

                | If ``None`` is passed, Python file from which experiment was created will be uploaded.
                | Pass empty list (``[]``) to upload no files.
                | Unix style pathname pattern expansion is supported. For example, you can pass ``'*.py'`` to upload
                  all python source files from the current directory.
                  For recursion lookup use ``'**/*.py'`` (for Python 3.5 and later).
                  For more information see `glob library <https://docs.python.org/3/library/glob.html>`_.

            abort_callback (:obj:`callable`, optional, default is ``None``):
                Callback that defines how `abort experiment` action in the Web application should work.
                Actual behavior depends on your setup:

                    * (default) If ``abort_callback=None`` and `psutil <https://psutil.readthedocs.io/en/latest/>`_
                      is installed, then current process and it's children are aborted by sending `SIGTERM`.
                      If, after grace period, processes are not terminated, `SIGKILL` is sent.
                    * If ``abort_callback=None`` and `psutil <https://psutil.readthedocs.io/en/latest/>`_
                      is **not** installed, then `abort experiment` action just marks experiment as *aborted*
                      in the Web application. No action is performed on the current process.
                    * If ``abort_callback=callable``, then ``callable`` is executed when `abort experiment` action
                      in the Web application is triggered.

            logger (:obj:`logging.handlers` or `None`, optional, default is ``None``):
                If `handler <https://docs.python.org/3.6/library/logging.handlers.html>`_
                to `Python logger` is passed, new experiment's `text log`
                (see: :meth:`~neptune.experiments.Experiment.log_text`) with name `"logger"` is created.
                Each time `Python logger` logs new data, it is automatically sent to the `"logger"` in experiment.
                As a results all data from `Python logger` are in the `Logs` tab in the experiment.

            upload_stdout (:obj:`Boolean`, optional, default is ``True``):
                Whether to send stdout to experiment's *Monitoring*.

            upload_stderr (:obj:`Boolean`, optional, default is ``True``):
                Whether to send stderr to experiment's *Monitoring*.

            send_hardware_metrics (:obj:`Boolean`, optional, default is ``True``):
                Whether to send hardware monitoring logs (CPU, GPU, Memory utilization) to experiment's *Monitoring*.

            run_monitoring_thread (:obj:`Boolean`, optional, default is ``True``):
                Whether to run thread that pings Neptune server in order to determine if experiment is responsive.

            handle_uncaught_exceptions (:obj:`Boolean`, optional, default is ``True``):
                Two options ``True`` and ``False`` are possible:

                    * If set to ``True`` and uncaught exception occurs, then Neptune automatically place
                      `Traceback` in the experiment's `Details` and change experiment status to `Failed`.
                    * If set to ``False`` and uncaught exception occurs, then no action is performed
                      in the Web application. As a consequence, experiment's status is `running` or `not responding`.

            git_info (:class:`~neptune.git_info.GitInfo`, optional, default is ``None``):

                | Instance of the class :class:`~neptune.git_info.GitInfo` that provides information about
                  the git repository from which experiment was started.
                | If ``None`` is passed,
                  system attempts to automatically extract information about git repository in the following way:

                      * System looks for `.git` file in the current directory and, if not found,
                        goes up recursively until `.git` file will be found
                        (see: :meth:`~neptune.utils.get_git_info`).
                      * If there is no git repository,
                        then no information about git is displayed in experiment details in Neptune web application.

            hostname (:obj:`str`, optional, default is ``None``):
                If ``None``, neptune automatically get `hostname` information.
                User can also set `hostname` directly by passing :obj:`str`.

        Returns:
            :class:`~neptune.experiments.Experiment` object that is used to manage experiment and log data to it.

        Raises:
            `ExperimentValidationError`: When provided arguments are invalid.
            `ExperimentLimitReached`: When experiment limit in the project has been reached.

        Examples:

            .. code:: python3

                # minimal invoke
                neptune.create_experiment()

                # explicitly return experiment object
                experiment = neptune.create_experiment()

                # create experiment with name and two parameters
                neptune.create_experiment(name='first-pytorch-ever',
                                          params={'lr': 0.0005,
                                                  'dropout': 0.2})

                # create experiment with name and description, and no sources files uploaded
                neptune.create_experiment(name='neural-net-mnist',
                                          description='neural net trained on MNIST',
                                          upload_source_files=[])

                # Send all py files in cwd (excluding hidden files with names beginning with a dot)
                neptune.create_experiment(upload_source_files='*.py')

                # Send all py files from all subdirectories (excluding hidden files with names beginning with a dot)
                # Supported on Python 3.5 and later.
                neptune.create_experiment(upload_source_files='**/*.py')

                # Send all files and directories in cwd (excluding hidden files with names beginning with a dot)
                neptune.create_experiment(upload_source_files='*')

                # Send all files and directories in cwd including hidden files
                neptune.create_experiment(upload_source_files=['*', '.*'])

                # Send files with names being a single character followed by '.py' extension.
                neptune.create_experiment(upload_source_files='?.py')

                # larger example
                neptune.create_experiment(name='first-pytorch-ever',
                                          params={'lr': 0.0005,
                                                  'dropout': 0.2},
                                          properties={'key1': 'value1',
                                                      'key2': 17,
                                                      'key3': 'other-value'},
                                          description='write longer description here',
                                          tags=['list-of', 'tags', 'goes-here', 'as-list-of-strings'],
                                          upload_source_files=['training_with_pytorch.py', 'net.py'])
        """

        if name is None:
            name = "Untitled"

        if description is None:
            description = ""

        if params is None:
            params = {}

        if properties is None:
            properties = {}

        if tags is None:
            tags = []

        if git_info is None:
            git_info = get_git_info(discover_git_repo_location())

        if hostname is None:
            hostname = get_hostname()

        if notebook_id is None and os.getenv(NOTEBOOK_ID_ENV_NAME, None) is not None:
            notebook_id = os.environ[NOTEBOOK_ID_ENV_NAME]

        if isinstance(upload_source_files, six.string_types):
            upload_source_files = [upload_source_files]

        upload_source_entries = []
        main_file = sys.argv[0]
        entrypoint = main_file or None
        if upload_source_files is None:
            if os.path.isfile(main_file):
                entrypoint = normalize_file_name(os.path.basename(main_file))
                upload_source_entries = [
                    UploadEntry(os.path.abspath(main_file), normalize_file_name(os.path.basename(main_file)))
                ]
        else:
            expanded_source_files = set()
            for filepath in upload_source_files:
                expanded_source_files |= set(glob(filepath))
            for filepath in expanded_source_files:
                upload_source_entries.append(UploadEntry(os.path.abspath(filepath), normalize_file_name(filepath)))

        if notebook_path is None and os.getenv(NOTEBOOK_PATH_ENV_NAME, None) is not None:
            notebook_path = os.environ[NOTEBOOK_PATH_ENV_NAME]

        abortable = abort_callback is not None or DefaultAbortImpl.requirements_installed()

        checkpoint_id = None
        if notebook_id is not None and notebook_path is not None:
            checkpoint = create_checkpoint(backend=self._backend,
                                           notebook_id=notebook_id,
                                           notebook_path=notebook_path)
            if checkpoint is not None:
                checkpoint_id = checkpoint.id

        experiment = self._backend.create_experiment(
            project=self,
            name=name,
            description=description,
            params=params,
            properties=properties,
            tags=tags,
            abortable=abortable,
            monitored=run_monitoring_thread,
            git_info=git_info,
            hostname=hostname,
            entrypoint=entrypoint,
            notebook_id=notebook_id,
            checkpoint_id=checkpoint_id
        )

        # pylint: disable=protected-access
        experiment._start(
            upload_source_entries=upload_source_entries,
            abort_callback=abort_callback,
            logger=logger,
            upload_stdout=upload_stdout,
            upload_stderr=upload_stderr,
            send_hardware_metrics=send_hardware_metrics,
            run_monitoring_thread=run_monitoring_thread,
            handle_uncaught_exceptions=handle_uncaught_exceptions
        )

        self._push_new_experiment(experiment)

        click.echo(self._get_experiment_link(experiment))

        return experiment
Beispiel #18
0
def _multichunk_upload(
    upload_entry: UploadEntry,
    swagger_client: SwaggerClient,
    query_params: dict,
    multipart_config: MultipartConfig,
    target: FileUploadTarget,
):
    urlset = _build_multipart_urlset(swagger_client, target)
    file_stream = upload_entry.get_stream()
    entry_length = upload_entry.length()
    try:
        if entry_length <= multipart_config.max_single_part_size:
            # single upload
            data = file_stream.read()
            result = upload_raw_data(
                http_client=swagger_client.swagger_spec.http_client,
                url=urlset.single,
                data=data,
                query_params=query_params,
            )
            _attribute_upload_response_handler(result)
        else:
            # chunked upload
            result = (
                urlset.start_chunked(**query_params, totalLength=entry_length)
                .response()
                .result
            )
            if result.errors:
                raise MetadataInconsistency(
                    [err.errorDescription for err in result.errors]
                )

            if "ext" in query_params:
                del query_params["ext"]

            upload_id = result.uploadId
            chunker = FileChunker(
                upload_entry.source_path, file_stream, entry_length, multipart_config
            )
            for idx, chunk in enumerate(chunker.generate()):
                result = upload_raw_data(
                    http_client=swagger_client.swagger_spec.http_client,
                    url=urlset.send_chunk,
                    data=chunk.data,
                    headers={"X-Range": _build_x_range(chunk, entry_length)},
                    query_params={
                        "uploadId": upload_id,
                        "uploadPartIdx": idx,
                        **query_params,
                    },
                )
                _attribute_upload_response_handler(result)

            result = (
                urlset.finish_chunked(**query_params, uploadId=upload_id)
                .response()
                .result
            )
            if result.errors:
                raise MetadataInconsistency(
                    [err.errorDescription for err in result.errors]
                )
        return []
    finally:
        file_stream.close()