Esempio n. 1
0
def init_run(
    project: Optional[str] = None,
    api_token: Optional[str] = None,
    run: Optional[str] = None,
    custom_run_id: Optional[str] = None,
    mode: str = Mode.ASYNC.value,
    name: Optional[str] = None,
    description: Optional[str] = None,
    tags: Optional[Union[List[str], str]] = None,
    source_files: Optional[Union[List[str], str]] = None,
    capture_stdout: bool = True,
    capture_stderr: bool = True,
    capture_hardware_metrics: bool = True,
    fail_on_exception: bool = True,
    monitoring_namespace: Optional[str] = None,
    flush_period: float = 5,
    proxies: Optional[dict] = None,
    capture_traceback: bool = True,
    **kwargs,
) -> Run:
    """Starts a new tracked run, and append it to the top of the Runs table view.

    Args:
        project(str, optional): Name of a project in a form of `namespace/project_name`. Defaults to `None`.
            If `None`, the value of `NEPTUNE_PROJECT` environment variable will be taken.
        api_token(str, optional): User’s API token. Defaults to `None`.
            If `None`, the value of `NEPTUNE_API_TOKEN` environment variable will be taken.
            .. note::
                It is strongly recommended to use `NEPTUNE_API_TOKEN` environment variable rather than placing your
                API token in plain text in your source code.
        run (str, optional): An existing run's identifier like 'SAN-1' in case of resuming a tracked run.
            Defaults to `None`.
            A run with such identifier must exist. If `None` is passed, starts a new tracked run.
        custom_run_id (str, optional): A unique identifier to be used when running Neptune in pipelines.
            Defaults to `None`.
            Make sure you are using the same identifier throughout the whole pipeline execution.
        mode (str, optional): Connection mode in which the tracking will work. Defaults to `'async'`.
            Possible values 'async', 'sync', 'offline', 'read-only' and 'debug'.
        name (str, optional): Editable name of the run. Defaults to `'Untitled'`.
            Name is displayed in the run's Details and in Runs table as a column.
        description (str, optional): Editable description of the run. Defaults to `''`.
            Description is displayed in the run's Details and can be displayed in the runs view as a column.
        tags (list of str or str, optional): Tags of the run. Defaults to `[]`.
            They are editable after run is created.
            Tags are displayed in the run's Details and can be viewed in Runs table view as a column.
        source_files (list of str or str, optional): List of source files to be uploaded.
            Uploaded sources are displayed in the run’s Source code tab.
            Unix style pathname pattern expansion is supported. For example, you can pass '*.py' to upload all python
            source files from the current directory.
            If `None` is passed, Python file from which run was created will be uploaded.
        capture_stdout (bool, optional): Whether to send run's stdout. Defaults to `True`.
            Tracked metadata will be stored inside `monitoring_namespace`.
        capture_stderr (bool, optional):  Whether to send run’s stderr. Defaults to `True`.
            Tracked metadata will be stored inside `monitoring_namespace`.
        capture_hardware_metrics (bool, optional): Whether to send hardware monitoring logs
            (CPU, GPU, Memory utilization). Defaults to `True`.
            Tracked metadata will be stored inside `monitoring_namespace`.
        fail_on_exception (bool, optional): Whether to register an uncaught exception handler to this process and,
            in case of an exception, set run's sys/failed to True. Exception is always logged
        monitoring_namespace (str, optional): Namespace inside which all monitoring logs be stored.
            Defaults to 'monitoring'.
        flush_period (float, optional): In an asynchronous (default) connection mode how often asynchronous thread
            should synchronize data with Neptune servers. Defaults to 5.
        proxies (dict of str, optional): Argument passed to HTTP calls made via the Requests library.
            For more information see
            `their proxies section <https://2.python-requests.org/en/master/user/advanced/#proxies>`_.
        capture_traceback (bool, optional):  Whether to send run’s traceback in case of an exception.
            Defaults to `True`.
            Tracked metadata will be stored inside `monitoring/traceback`.

    Returns:
        ``Run``: object that is used to manage the tracked run and log metadata to it.

    Examples:

        >>> import neptune.new as neptune

        >>> # minimal invoke
        ... run = neptune.init()

        >>> # create a tracked run with a name
        ... run = neptune.init(name='first-pytorch-ever')

        >>> # create a tracked run with a name and a description, and no sources files uploaded
        >>> run = neptune.init(name='neural-net-mnist',
        ...                    description='neural net trained on MNIST',
        ...                    source_files=[])

        >>> # Send all py files in cwd (excluding hidden files with names beginning with a dot)
        ... run = neptune.init(source_files='*.py')

        >>> # Send all py files from all subdirectories (excluding hidden files with names beginning with a dot)
        ... # Supported on Python 3.5 and later.
        ... run = neptune.init(source_files='**/*.py')

        >>> # Send all files and directories in cwd (excluding hidden files with names beginning with a dot)
        ... run = neptune.init(source_files='*')

        >>> # Send all files and directories in cwd including hidden files
        ... run = neptune.init(source_files=['*', '.*'])

        >>> # Send files with names being a single character followed by '.py' extension.
        ... run = neptune.init(source_files='?.py')

        >>> # larger example
        ... run = neptune.init(name='first-pytorch-ever',
        ...               description='write longer description here',
        ...               tags=['list-of', 'tags', 'goes-here', 'as-list-of-strings'],
        ...               source_files=['training_with_pytorch.py', 'net.py'])

    You may also want to check `init docs page`_.

    .. _init docs page:
       https://docs.neptune.ai/api-reference/neptune#.init
    """
    _check_for_extra_kwargs(init_run.__name__, kwargs)
    verify_type("project", project, (str, type(None)))
    verify_type("api_token", api_token, (str, type(None)))
    verify_type("run", run, (str, type(None)))
    verify_type("custom_run_id", custom_run_id, (str, type(None)))
    verify_type("mode", mode, str)
    verify_type("name", name, (str, type(None)))
    verify_type("description", description, (str, type(None)))
    verify_type("capture_stdout", capture_stdout, bool)
    verify_type("capture_stderr", capture_stderr, bool)
    verify_type("capture_hardware_metrics", capture_hardware_metrics, bool)
    verify_type("monitoring_namespace", monitoring_namespace,
                (str, type(None)))
    verify_type("flush_period", flush_period, (int, float))
    verify_type("proxies", proxies, (dict, type(None)))
    verify_type("capture_traceback", capture_hardware_metrics, bool)
    if tags is not None:
        if isinstance(tags, str):
            tags = [tags]
        else:
            verify_collection_type("tags", tags, str)
    if source_files is not None:
        if isinstance(source_files, str):
            source_files = [source_files]
        else:
            verify_collection_type("source_files", source_files, str)

    name = "Untitled" if run is None and name is None else name
    description = "" if run is None and description is None else description
    hostname = get_hostname() if run is None else None
    custom_run_id = custom_run_id or os.getenv(CUSTOM_RUN_ID_ENV_NAME)
    monitoring_namespace = (monitoring_namespace
                            or os.getenv(MONITORING_NAMESPACE) or "monitoring")

    if run and custom_run_id:
        raise NeptuneRunResumeAndCustomIdCollision()

    backend = get_backend(mode, api_token=api_token, proxies=proxies)

    if mode == Mode.OFFLINE or mode == Mode.DEBUG:
        project = "offline/project-placeholder"

    project_obj = project_name_lookup(backend, project)
    project = f"{project_obj.workspace}/{project_obj.name}"

    if run:
        api_run = backend.get_run(project + "/" + run)
    else:
        if mode == Mode.READ_ONLY:
            raise NeedExistingRunForReadOnlyMode()
        git_ref = get_git_info(discover_git_repo_location())
        if custom_run_id_exceeds_length(custom_run_id):
            custom_run_id = None

        notebook_id, checkpoint_id = _create_notebook_checkpoint(backend)

        api_run = backend.create_run(project_obj.id, git_ref, custom_run_id,
                                     notebook_id, checkpoint_id)

    run_lock = threading.RLock()

    operation_processor = get_operation_processor(
        mode,
        container_id=api_run.id,
        container_type=Run.container_type,
        backend=backend,
        lock=run_lock,
        flush_period=flush_period,
    )

    stdout_path = "{}/stdout".format(monitoring_namespace)
    stderr_path = "{}/stderr".format(monitoring_namespace)
    traceback_path = "{}/traceback".format(monitoring_namespace)

    background_jobs = []
    if mode != Mode.READ_ONLY:
        if capture_stdout:
            background_jobs.append(
                StdoutCaptureBackgroundJob(attribute_name=stdout_path))
        if capture_stderr:
            background_jobs.append(
                StderrCaptureBackgroundJob(attribute_name=stderr_path))
        if capture_hardware_metrics:
            background_jobs.append(
                HardwareMetricReportingJob(
                    attribute_namespace=monitoring_namespace))
        websockets_factory = backend.websockets_factory(
            project_obj.id, api_run.id)
        if websockets_factory:
            background_jobs.append(
                WebsocketSignalsBackgroundJob(websockets_factory))
        if capture_traceback:
            background_jobs.append(
                TracebackJob(traceback_path, fail_on_exception))
        background_jobs.append(PingBackgroundJob())

    _run = Run(
        api_run.id,
        backend,
        operation_processor,
        BackgroundJobList(background_jobs),
        run_lock,
        api_run.workspace,
        api_run.project_name,
        api_run.short_id,
        project_obj.id,
        monitoring_namespace,
    )
    if mode != Mode.OFFLINE:
        _run.sync(wait=False)

    if mode != Mode.READ_ONLY:
        if name is not None:
            _run[attr_consts.SYSTEM_NAME_ATTRIBUTE_PATH] = name
        if description is not None:
            _run[attr_consts.SYSTEM_DESCRIPTION_ATTRIBUTE_PATH] = description
        if hostname is not None:
            _run[attr_consts.SYSTEM_HOSTNAME_ATTRIBUTE_PATH] = hostname
        if tags is not None:
            _run[attr_consts.SYSTEM_TAGS_ATTRIBUTE_PATH].add(tags)
        if run is None:
            _run[attr_consts.SYSTEM_FAILED_ATTRIBUTE_PATH] = False

        if capture_stdout and not _run.exists(stdout_path):
            _run.define(stdout_path, StringSeries([]))
        if capture_stderr and not _run.exists(stderr_path):
            _run.define(stderr_path, StringSeries([]))

        if run is None or source_files is not None:
            # upload default sources ONLY if creating a new run
            upload_source_code(source_files=source_files, run=_run)

    # pylint: disable=protected-access
    _run._startup(debug_mode=mode == Mode.DEBUG)

    return _run
Esempio n. 2
0
    def create_experiment(self,
                          name=None,
                          description=None,
                          params=None,
                          properties=None,
                          tags=None,
                          upload_source_files=None,
                          abort_callback=None,
                          logger=None,
                          upload_stdout=True,
                          upload_stderr=True,
                          send_hardware_metrics=True,
                          run_monitoring_thread=True,
                          handle_uncaught_exceptions=True,
                          git_info=None,
                          hostname=None,
                          notebook_id=None,
                          notebook_path=None):
        """Create and start Neptune experiment.

        Create experiment, set its status to `running` and append it to the top of the experiments view.
        All parameters are optional, hence minimal invocation: ``neptune.create_experiment()``.

        Args:
            name (:obj:`str`, optional, default is ``'Untitled'``):
                Editable name of the experiment.
                Name is displayed in the experiment's `Details` (`Metadata` section)
                and in `experiments view` as a column.

            description (:obj:`str`, optional, default is ``''``):
                Editable description of the experiment.
                Description is displayed in the experiment's `Details` (`Metadata` section)
                and can be displayed in the `experiments view` as a column.

            params (:obj:`dict`, optional, default is ``{}``):
                Parameters of the experiment.
                After experiment creation ``params`` are read-only
                (see: :meth:`~neptune.experiments.Experiment.get_parameters`).
                Parameters are displayed in the experiment's `Details` (`Parameters` section)
                and each key-value pair can be viewed in `experiments view` as a column.

            properties (:obj:`dict`, optional, default is ``{}``):
                Properties of the experiment.
                They are editable after experiment is created.
                Properties are displayed in the experiment's `Details` (`Properties` section)
                and each key-value pair can be viewed in `experiments view` as a column.

            tags (:obj:`list`, optional, default is ``[]``):
                Must be list of :obj:`str`. Tags of the experiment.
                They are editable after experiment is created
                (see: :meth:`~neptune.experiments.Experiment.append_tag`
                and :meth:`~neptune.experiments.Experiment.remove_tag`).
                Tags are displayed in the experiment's `Details` (`Metadata` section)
                and can be viewed in `experiments view` as a column.

            upload_source_files (:obj:`list` or :obj:`str`, optional, default is ``None``):
                List of source files to be uploaded. Must be list of :obj:`str` or single :obj:`str`.
                Uploaded sources are displayed in the experiment's `Source code` tab.

                | If ``None`` is passed, Python file from which experiment was created will be uploaded.
                | Pass empty list (``[]``) to upload no files.
                | Unix style pathname pattern expansion is supported. For example, you can pass ``'*.py'`` to upload
                  all python source files from the current directory.
                  For recursion lookup use ``'**/*.py'`` (for Python 3.5 and later).
                  For more information see `glob library <https://docs.python.org/3/library/glob.html>`_.

            abort_callback (:obj:`callable`, optional, default is ``None``):
                Callback that defines how `abort experiment` action in the Web application should work.
                Actual behavior depends on your setup:

                    * (default) If ``abort_callback=None`` and `psutil <https://psutil.readthedocs.io/en/latest/>`_
                      is installed, then current process and it's children are aborted by sending `SIGTERM`.
                      If, after grace period, processes are not terminated, `SIGKILL` is sent.
                    * If ``abort_callback=None`` and `psutil <https://psutil.readthedocs.io/en/latest/>`_
                      is **not** installed, then `abort experiment` action just marks experiment as *aborted*
                      in the Web application. No action is performed on the current process.
                    * If ``abort_callback=callable``, then ``callable`` is executed when `abort experiment` action
                      in the Web application is triggered.

            logger (:obj:`logging.handlers` or `None`, optional, default is ``None``):
                If `handler <https://docs.python.org/3.6/library/logging.handlers.html>`_
                to `Python logger` is passed, new experiment's `text log`
                (see: :meth:`~neptune.experiments.Experiment.log_text`) with name `"logger"` is created.
                Each time `Python logger` logs new data, it is automatically sent to the `"logger"` in experiment.
                As a results all data from `Python logger` are in the `Logs` tab in the experiment.

            upload_stdout (:obj:`Boolean`, optional, default is ``True``):
                Whether to send stdout to experiment's *Monitoring*.

            upload_stderr (:obj:`Boolean`, optional, default is ``True``):
                Whether to send stderr to experiment's *Monitoring*.

            send_hardware_metrics (:obj:`Boolean`, optional, default is ``True``):
                Whether to send hardware monitoring logs (CPU, GPU, Memory utilization) to experiment's *Monitoring*.

            run_monitoring_thread (:obj:`Boolean`, optional, default is ``True``):
                Whether to run thread that pings Neptune server in order to determine if experiment is responsive.

            handle_uncaught_exceptions (:obj:`Boolean`, optional, default is ``True``):
                Two options ``True`` and ``False`` are possible:

                    * If set to ``True`` and uncaught exception occurs, then Neptune automatically place
                      `Traceback` in the experiment's `Details` and change experiment status to `Failed`.
                    * If set to ``False`` and uncaught exception occurs, then no action is performed
                      in the Web application. As a consequence, experiment's status is `running` or `not responding`.

            git_info (:class:`~neptune.git_info.GitInfo`, optional, default is ``None``):

                | Instance of the class :class:`~neptune.git_info.GitInfo` that provides information about
                  the git repository from which experiment was started.
                | If ``None`` is passed,
                  system attempts to automatically extract information about git repository in the following way:

                      * System looks for `.git` file in the current directory and, if not found,
                        goes up recursively until `.git` file will be found
                        (see: :meth:`~neptune.utils.get_git_info`).
                      * If there is no git repository,
                        then no information about git is displayed in experiment details in Neptune web application.

            hostname (:obj:`str`, optional, default is ``None``):
                If ``None``, neptune automatically get `hostname` information.
                User can also set `hostname` directly by passing :obj:`str`.

        Returns:
            :class:`~neptune.experiments.Experiment` object that is used to manage experiment and log data to it.

        Raises:
            `ExperimentValidationError`: When provided arguments are invalid.
            `ExperimentLimitReached`: When experiment limit in the project has been reached.

        Examples:

            .. code:: python3

                # minimal invoke
                neptune.create_experiment()

                # explicitly return experiment object
                experiment = neptune.create_experiment()

                # create experiment with name and two parameters
                neptune.create_experiment(name='first-pytorch-ever',
                                          params={'lr': 0.0005,
                                                  'dropout': 0.2})

                # create experiment with name and description, and no sources files uploaded
                neptune.create_experiment(name='neural-net-mnist',
                                          description='neural net trained on MNIST',
                                          upload_source_files=[])

                # Send all py files in cwd (excluding hidden files with names beginning with a dot)
                neptune.create_experiment(upload_source_files='*.py')

                # Send all py files from all subdirectories (excluding hidden files with names beginning with a dot)
                # Supported on Python 3.5 and later.
                neptune.create_experiment(upload_source_files='**/*.py')

                # Send all files and directories in cwd (excluding hidden files with names beginning with a dot)
                neptune.create_experiment(upload_source_files='*')

                # Send all files and directories in cwd including hidden files
                neptune.create_experiment(upload_source_files=['*', '.*'])

                # Send files with names being a single character followed by '.py' extension.
                neptune.create_experiment(upload_source_files='?.py')

                # larger example
                neptune.create_experiment(name='first-pytorch-ever',
                                          params={'lr': 0.0005,
                                                  'dropout': 0.2},
                                          properties={'key1': 'value1',
                                                      'key2': 17,
                                                      'key3': 'other-value'},
                                          description='write longer description here',
                                          tags=['list-of', 'tags', 'goes-here', 'as-list-of-strings'],
                                          upload_source_files=['training_with_pytorch.py', 'net.py'])
        """

        if name is None:
            name = "Untitled"

        if description is None:
            description = ""

        if params is None:
            params = {}

        if properties is None:
            properties = {}

        if tags is None:
            tags = []

        if git_info is None:
            git_info = get_git_info(discover_git_repo_location())

        if hostname is None:
            hostname = get_hostname()

        if notebook_id is None and os.getenv(NOTEBOOK_ID_ENV_NAME, None) is not None:
            notebook_id = os.environ[NOTEBOOK_ID_ENV_NAME]

        if isinstance(upload_source_files, six.string_types):
            upload_source_files = [upload_source_files]

        upload_source_entries = []
        main_file = sys.argv[0]
        entrypoint = main_file or None
        if upload_source_files is None:
            if os.path.isfile(main_file):
                entrypoint = normalize_file_name(os.path.basename(main_file))
                upload_source_entries = [
                    UploadEntry(os.path.abspath(main_file), normalize_file_name(os.path.basename(main_file)))
                ]
        else:
            expanded_source_files = set()
            for filepath in upload_source_files:
                expanded_source_files |= set(glob(filepath))
            for filepath in expanded_source_files:
                upload_source_entries.append(UploadEntry(os.path.abspath(filepath), normalize_file_name(filepath)))

        if notebook_path is None and os.getenv(NOTEBOOK_PATH_ENV_NAME, None) is not None:
            notebook_path = os.environ[NOTEBOOK_PATH_ENV_NAME]

        abortable = abort_callback is not None or DefaultAbortImpl.requirements_installed()

        checkpoint_id = None
        if notebook_id is not None and notebook_path is not None:
            checkpoint = create_checkpoint(backend=self._backend,
                                           notebook_id=notebook_id,
                                           notebook_path=notebook_path)
            if checkpoint is not None:
                checkpoint_id = checkpoint.id

        experiment = self._backend.create_experiment(
            project=self,
            name=name,
            description=description,
            params=params,
            properties=properties,
            tags=tags,
            abortable=abortable,
            monitored=run_monitoring_thread,
            git_info=git_info,
            hostname=hostname,
            entrypoint=entrypoint,
            notebook_id=notebook_id,
            checkpoint_id=checkpoint_id
        )

        # pylint: disable=protected-access
        experiment._start(
            upload_source_entries=upload_source_entries,
            abort_callback=abort_callback,
            logger=logger,
            upload_stdout=upload_stdout,
            upload_stderr=upload_stderr,
            send_hardware_metrics=send_hardware_metrics,
            run_monitoring_thread=run_monitoring_thread,
            handle_uncaught_exceptions=handle_uncaught_exceptions
        )

        self._push_new_experiment(experiment)

        click.echo(self._get_experiment_link(experiment))

        return experiment