def init_run( project: Optional[str] = None, api_token: Optional[str] = None, run: Optional[str] = None, custom_run_id: Optional[str] = None, mode: str = Mode.ASYNC.value, name: Optional[str] = None, description: Optional[str] = None, tags: Optional[Union[List[str], str]] = None, source_files: Optional[Union[List[str], str]] = None, capture_stdout: bool = True, capture_stderr: bool = True, capture_hardware_metrics: bool = True, fail_on_exception: bool = True, monitoring_namespace: Optional[str] = None, flush_period: float = 5, proxies: Optional[dict] = None, capture_traceback: bool = True, **kwargs, ) -> Run: """Starts a new tracked run, and append it to the top of the Runs table view. Args: project(str, optional): Name of a project in a form of `namespace/project_name`. Defaults to `None`. If `None`, the value of `NEPTUNE_PROJECT` environment variable will be taken. api_token(str, optional): User’s API token. Defaults to `None`. If `None`, the value of `NEPTUNE_API_TOKEN` environment variable will be taken. .. note:: It is strongly recommended to use `NEPTUNE_API_TOKEN` environment variable rather than placing your API token in plain text in your source code. run (str, optional): An existing run's identifier like 'SAN-1' in case of resuming a tracked run. Defaults to `None`. A run with such identifier must exist. If `None` is passed, starts a new tracked run. custom_run_id (str, optional): A unique identifier to be used when running Neptune in pipelines. Defaults to `None`. Make sure you are using the same identifier throughout the whole pipeline execution. mode (str, optional): Connection mode in which the tracking will work. Defaults to `'async'`. Possible values 'async', 'sync', 'offline', 'read-only' and 'debug'. name (str, optional): Editable name of the run. Defaults to `'Untitled'`. Name is displayed in the run's Details and in Runs table as a column. description (str, optional): Editable description of the run. Defaults to `''`. Description is displayed in the run's Details and can be displayed in the runs view as a column. tags (list of str or str, optional): Tags of the run. Defaults to `[]`. They are editable after run is created. Tags are displayed in the run's Details and can be viewed in Runs table view as a column. source_files (list of str or str, optional): List of source files to be uploaded. Uploaded sources are displayed in the run’s Source code tab. Unix style pathname pattern expansion is supported. For example, you can pass '*.py' to upload all python source files from the current directory. If `None` is passed, Python file from which run was created will be uploaded. capture_stdout (bool, optional): Whether to send run's stdout. Defaults to `True`. Tracked metadata will be stored inside `monitoring_namespace`. capture_stderr (bool, optional): Whether to send run’s stderr. Defaults to `True`. Tracked metadata will be stored inside `monitoring_namespace`. capture_hardware_metrics (bool, optional): Whether to send hardware monitoring logs (CPU, GPU, Memory utilization). Defaults to `True`. Tracked metadata will be stored inside `monitoring_namespace`. fail_on_exception (bool, optional): Whether to register an uncaught exception handler to this process and, in case of an exception, set run's sys/failed to True. Exception is always logged monitoring_namespace (str, optional): Namespace inside which all monitoring logs be stored. Defaults to 'monitoring'. flush_period (float, optional): In an asynchronous (default) connection mode how often asynchronous thread should synchronize data with Neptune servers. Defaults to 5. proxies (dict of str, optional): Argument passed to HTTP calls made via the Requests library. For more information see `their proxies section <https://2.python-requests.org/en/master/user/advanced/#proxies>`_. capture_traceback (bool, optional): Whether to send run’s traceback in case of an exception. Defaults to `True`. Tracked metadata will be stored inside `monitoring/traceback`. Returns: ``Run``: object that is used to manage the tracked run and log metadata to it. Examples: >>> import neptune.new as neptune >>> # minimal invoke ... run = neptune.init() >>> # create a tracked run with a name ... run = neptune.init(name='first-pytorch-ever') >>> # create a tracked run with a name and a description, and no sources files uploaded >>> run = neptune.init(name='neural-net-mnist', ... description='neural net trained on MNIST', ... source_files=[]) >>> # Send all py files in cwd (excluding hidden files with names beginning with a dot) ... run = neptune.init(source_files='*.py') >>> # Send all py files from all subdirectories (excluding hidden files with names beginning with a dot) ... # Supported on Python 3.5 and later. ... run = neptune.init(source_files='**/*.py') >>> # Send all files and directories in cwd (excluding hidden files with names beginning with a dot) ... run = neptune.init(source_files='*') >>> # Send all files and directories in cwd including hidden files ... run = neptune.init(source_files=['*', '.*']) >>> # Send files with names being a single character followed by '.py' extension. ... run = neptune.init(source_files='?.py') >>> # larger example ... run = neptune.init(name='first-pytorch-ever', ... description='write longer description here', ... tags=['list-of', 'tags', 'goes-here', 'as-list-of-strings'], ... source_files=['training_with_pytorch.py', 'net.py']) You may also want to check `init docs page`_. .. _init docs page: https://docs.neptune.ai/api-reference/neptune#.init """ _check_for_extra_kwargs(init_run.__name__, kwargs) verify_type("project", project, (str, type(None))) verify_type("api_token", api_token, (str, type(None))) verify_type("run", run, (str, type(None))) verify_type("custom_run_id", custom_run_id, (str, type(None))) verify_type("mode", mode, str) verify_type("name", name, (str, type(None))) verify_type("description", description, (str, type(None))) verify_type("capture_stdout", capture_stdout, bool) verify_type("capture_stderr", capture_stderr, bool) verify_type("capture_hardware_metrics", capture_hardware_metrics, bool) verify_type("monitoring_namespace", monitoring_namespace, (str, type(None))) verify_type("flush_period", flush_period, (int, float)) verify_type("proxies", proxies, (dict, type(None))) verify_type("capture_traceback", capture_hardware_metrics, bool) if tags is not None: if isinstance(tags, str): tags = [tags] else: verify_collection_type("tags", tags, str) if source_files is not None: if isinstance(source_files, str): source_files = [source_files] else: verify_collection_type("source_files", source_files, str) name = "Untitled" if run is None and name is None else name description = "" if run is None and description is None else description hostname = get_hostname() if run is None else None custom_run_id = custom_run_id or os.getenv(CUSTOM_RUN_ID_ENV_NAME) monitoring_namespace = (monitoring_namespace or os.getenv(MONITORING_NAMESPACE) or "monitoring") if run and custom_run_id: raise NeptuneRunResumeAndCustomIdCollision() backend = get_backend(mode, api_token=api_token, proxies=proxies) if mode == Mode.OFFLINE or mode == Mode.DEBUG: project = "offline/project-placeholder" project_obj = project_name_lookup(backend, project) project = f"{project_obj.workspace}/{project_obj.name}" if run: api_run = backend.get_run(project + "/" + run) else: if mode == Mode.READ_ONLY: raise NeedExistingRunForReadOnlyMode() git_ref = get_git_info(discover_git_repo_location()) if custom_run_id_exceeds_length(custom_run_id): custom_run_id = None notebook_id, checkpoint_id = _create_notebook_checkpoint(backend) api_run = backend.create_run(project_obj.id, git_ref, custom_run_id, notebook_id, checkpoint_id) run_lock = threading.RLock() operation_processor = get_operation_processor( mode, container_id=api_run.id, container_type=Run.container_type, backend=backend, lock=run_lock, flush_period=flush_period, ) stdout_path = "{}/stdout".format(monitoring_namespace) stderr_path = "{}/stderr".format(monitoring_namespace) traceback_path = "{}/traceback".format(monitoring_namespace) background_jobs = [] if mode != Mode.READ_ONLY: if capture_stdout: background_jobs.append( StdoutCaptureBackgroundJob(attribute_name=stdout_path)) if capture_stderr: background_jobs.append( StderrCaptureBackgroundJob(attribute_name=stderr_path)) if capture_hardware_metrics: background_jobs.append( HardwareMetricReportingJob( attribute_namespace=monitoring_namespace)) websockets_factory = backend.websockets_factory( project_obj.id, api_run.id) if websockets_factory: background_jobs.append( WebsocketSignalsBackgroundJob(websockets_factory)) if capture_traceback: background_jobs.append( TracebackJob(traceback_path, fail_on_exception)) background_jobs.append(PingBackgroundJob()) _run = Run( api_run.id, backend, operation_processor, BackgroundJobList(background_jobs), run_lock, api_run.workspace, api_run.project_name, api_run.short_id, project_obj.id, monitoring_namespace, ) if mode != Mode.OFFLINE: _run.sync(wait=False) if mode != Mode.READ_ONLY: if name is not None: _run[attr_consts.SYSTEM_NAME_ATTRIBUTE_PATH] = name if description is not None: _run[attr_consts.SYSTEM_DESCRIPTION_ATTRIBUTE_PATH] = description if hostname is not None: _run[attr_consts.SYSTEM_HOSTNAME_ATTRIBUTE_PATH] = hostname if tags is not None: _run[attr_consts.SYSTEM_TAGS_ATTRIBUTE_PATH].add(tags) if run is None: _run[attr_consts.SYSTEM_FAILED_ATTRIBUTE_PATH] = False if capture_stdout and not _run.exists(stdout_path): _run.define(stdout_path, StringSeries([])) if capture_stderr and not _run.exists(stderr_path): _run.define(stderr_path, StringSeries([])) if run is None or source_files is not None: # upload default sources ONLY if creating a new run upload_source_code(source_files=source_files, run=_run) # pylint: disable=protected-access _run._startup(debug_mode=mode == Mode.DEBUG) return _run
def create_experiment(self, name=None, description=None, params=None, properties=None, tags=None, upload_source_files=None, abort_callback=None, logger=None, upload_stdout=True, upload_stderr=True, send_hardware_metrics=True, run_monitoring_thread=True, handle_uncaught_exceptions=True, git_info=None, hostname=None, notebook_id=None, notebook_path=None): """Create and start Neptune experiment. Create experiment, set its status to `running` and append it to the top of the experiments view. All parameters are optional, hence minimal invocation: ``neptune.create_experiment()``. Args: name (:obj:`str`, optional, default is ``'Untitled'``): Editable name of the experiment. Name is displayed in the experiment's `Details` (`Metadata` section) and in `experiments view` as a column. description (:obj:`str`, optional, default is ``''``): Editable description of the experiment. Description is displayed in the experiment's `Details` (`Metadata` section) and can be displayed in the `experiments view` as a column. params (:obj:`dict`, optional, default is ``{}``): Parameters of the experiment. After experiment creation ``params`` are read-only (see: :meth:`~neptune.experiments.Experiment.get_parameters`). Parameters are displayed in the experiment's `Details` (`Parameters` section) and each key-value pair can be viewed in `experiments view` as a column. properties (:obj:`dict`, optional, default is ``{}``): Properties of the experiment. They are editable after experiment is created. Properties are displayed in the experiment's `Details` (`Properties` section) and each key-value pair can be viewed in `experiments view` as a column. tags (:obj:`list`, optional, default is ``[]``): Must be list of :obj:`str`. Tags of the experiment. They are editable after experiment is created (see: :meth:`~neptune.experiments.Experiment.append_tag` and :meth:`~neptune.experiments.Experiment.remove_tag`). Tags are displayed in the experiment's `Details` (`Metadata` section) and can be viewed in `experiments view` as a column. upload_source_files (:obj:`list` or :obj:`str`, optional, default is ``None``): List of source files to be uploaded. Must be list of :obj:`str` or single :obj:`str`. Uploaded sources are displayed in the experiment's `Source code` tab. | If ``None`` is passed, Python file from which experiment was created will be uploaded. | Pass empty list (``[]``) to upload no files. | Unix style pathname pattern expansion is supported. For example, you can pass ``'*.py'`` to upload all python source files from the current directory. For recursion lookup use ``'**/*.py'`` (for Python 3.5 and later). For more information see `glob library <https://docs.python.org/3/library/glob.html>`_. abort_callback (:obj:`callable`, optional, default is ``None``): Callback that defines how `abort experiment` action in the Web application should work. Actual behavior depends on your setup: * (default) If ``abort_callback=None`` and `psutil <https://psutil.readthedocs.io/en/latest/>`_ is installed, then current process and it's children are aborted by sending `SIGTERM`. If, after grace period, processes are not terminated, `SIGKILL` is sent. * If ``abort_callback=None`` and `psutil <https://psutil.readthedocs.io/en/latest/>`_ is **not** installed, then `abort experiment` action just marks experiment as *aborted* in the Web application. No action is performed on the current process. * If ``abort_callback=callable``, then ``callable`` is executed when `abort experiment` action in the Web application is triggered. logger (:obj:`logging.handlers` or `None`, optional, default is ``None``): If `handler <https://docs.python.org/3.6/library/logging.handlers.html>`_ to `Python logger` is passed, new experiment's `text log` (see: :meth:`~neptune.experiments.Experiment.log_text`) with name `"logger"` is created. Each time `Python logger` logs new data, it is automatically sent to the `"logger"` in experiment. As a results all data from `Python logger` are in the `Logs` tab in the experiment. upload_stdout (:obj:`Boolean`, optional, default is ``True``): Whether to send stdout to experiment's *Monitoring*. upload_stderr (:obj:`Boolean`, optional, default is ``True``): Whether to send stderr to experiment's *Monitoring*. send_hardware_metrics (:obj:`Boolean`, optional, default is ``True``): Whether to send hardware monitoring logs (CPU, GPU, Memory utilization) to experiment's *Monitoring*. run_monitoring_thread (:obj:`Boolean`, optional, default is ``True``): Whether to run thread that pings Neptune server in order to determine if experiment is responsive. handle_uncaught_exceptions (:obj:`Boolean`, optional, default is ``True``): Two options ``True`` and ``False`` are possible: * If set to ``True`` and uncaught exception occurs, then Neptune automatically place `Traceback` in the experiment's `Details` and change experiment status to `Failed`. * If set to ``False`` and uncaught exception occurs, then no action is performed in the Web application. As a consequence, experiment's status is `running` or `not responding`. git_info (:class:`~neptune.git_info.GitInfo`, optional, default is ``None``): | Instance of the class :class:`~neptune.git_info.GitInfo` that provides information about the git repository from which experiment was started. | If ``None`` is passed, system attempts to automatically extract information about git repository in the following way: * System looks for `.git` file in the current directory and, if not found, goes up recursively until `.git` file will be found (see: :meth:`~neptune.utils.get_git_info`). * If there is no git repository, then no information about git is displayed in experiment details in Neptune web application. hostname (:obj:`str`, optional, default is ``None``): If ``None``, neptune automatically get `hostname` information. User can also set `hostname` directly by passing :obj:`str`. Returns: :class:`~neptune.experiments.Experiment` object that is used to manage experiment and log data to it. Raises: `ExperimentValidationError`: When provided arguments are invalid. `ExperimentLimitReached`: When experiment limit in the project has been reached. Examples: .. code:: python3 # minimal invoke neptune.create_experiment() # explicitly return experiment object experiment = neptune.create_experiment() # create experiment with name and two parameters neptune.create_experiment(name='first-pytorch-ever', params={'lr': 0.0005, 'dropout': 0.2}) # create experiment with name and description, and no sources files uploaded neptune.create_experiment(name='neural-net-mnist', description='neural net trained on MNIST', upload_source_files=[]) # Send all py files in cwd (excluding hidden files with names beginning with a dot) neptune.create_experiment(upload_source_files='*.py') # Send all py files from all subdirectories (excluding hidden files with names beginning with a dot) # Supported on Python 3.5 and later. neptune.create_experiment(upload_source_files='**/*.py') # Send all files and directories in cwd (excluding hidden files with names beginning with a dot) neptune.create_experiment(upload_source_files='*') # Send all files and directories in cwd including hidden files neptune.create_experiment(upload_source_files=['*', '.*']) # Send files with names being a single character followed by '.py' extension. neptune.create_experiment(upload_source_files='?.py') # larger example neptune.create_experiment(name='first-pytorch-ever', params={'lr': 0.0005, 'dropout': 0.2}, properties={'key1': 'value1', 'key2': 17, 'key3': 'other-value'}, description='write longer description here', tags=['list-of', 'tags', 'goes-here', 'as-list-of-strings'], upload_source_files=['training_with_pytorch.py', 'net.py']) """ if name is None: name = "Untitled" if description is None: description = "" if params is None: params = {} if properties is None: properties = {} if tags is None: tags = [] if git_info is None: git_info = get_git_info(discover_git_repo_location()) if hostname is None: hostname = get_hostname() if notebook_id is None and os.getenv(NOTEBOOK_ID_ENV_NAME, None) is not None: notebook_id = os.environ[NOTEBOOK_ID_ENV_NAME] if isinstance(upload_source_files, six.string_types): upload_source_files = [upload_source_files] upload_source_entries = [] main_file = sys.argv[0] entrypoint = main_file or None if upload_source_files is None: if os.path.isfile(main_file): entrypoint = normalize_file_name(os.path.basename(main_file)) upload_source_entries = [ UploadEntry(os.path.abspath(main_file), normalize_file_name(os.path.basename(main_file))) ] else: expanded_source_files = set() for filepath in upload_source_files: expanded_source_files |= set(glob(filepath)) for filepath in expanded_source_files: upload_source_entries.append(UploadEntry(os.path.abspath(filepath), normalize_file_name(filepath))) if notebook_path is None and os.getenv(NOTEBOOK_PATH_ENV_NAME, None) is not None: notebook_path = os.environ[NOTEBOOK_PATH_ENV_NAME] abortable = abort_callback is not None or DefaultAbortImpl.requirements_installed() checkpoint_id = None if notebook_id is not None and notebook_path is not None: checkpoint = create_checkpoint(backend=self._backend, notebook_id=notebook_id, notebook_path=notebook_path) if checkpoint is not None: checkpoint_id = checkpoint.id experiment = self._backend.create_experiment( project=self, name=name, description=description, params=params, properties=properties, tags=tags, abortable=abortable, monitored=run_monitoring_thread, git_info=git_info, hostname=hostname, entrypoint=entrypoint, notebook_id=notebook_id, checkpoint_id=checkpoint_id ) # pylint: disable=protected-access experiment._start( upload_source_entries=upload_source_entries, abort_callback=abort_callback, logger=logger, upload_stdout=upload_stdout, upload_stderr=upload_stderr, send_hardware_metrics=send_hardware_metrics, run_monitoring_thread=run_monitoring_thread, handle_uncaught_exceptions=handle_uncaught_exceptions ) self._push_new_experiment(experiment) click.echo(self._get_experiment_link(experiment)) return experiment