예제 #1
0
 def _to_proper_value_type(values: Union[str, Iterable[str]]) -> Iterable[str]:
     if is_collection(values):
         verify_collection_type("values", values, str)
         return list(values)
     else:
         verify_type("values", values, str)
         return [values]
예제 #2
0
 def __init__(self, file_globs: Union[str, Iterable[str]]):
     verify_type("file_globs", file_globs, (str, Iterable))
     if isinstance(file_globs, str):
         file_globs = [file_globs]
     else:
         verify_collection_type("file_globs", file_globs, str)
     self.file_globs: List[str] = list(file_globs)
예제 #3
0
 def upload_files(
     self, globs: Union[str, Iterable[str]], wait: bool = False
 ) -> None:
     if isinstance(globs, str):
         globs = [globs]
     else:
         verify_collection_type("globs", globs, str)
     self._enqueue_upload_operation(globs, reset=False, wait=wait)
예제 #4
0
 def delete_files(
     self, paths: Union[str, Iterable[str]], wait: bool = False
 ) -> None:
     if isinstance(paths, str):
         paths = [paths]
     else:
         verify_collection_type("paths", paths, str)
     with self._container.lock():
         self._enqueue_operation(DeleteFiles(self._path, set(paths)), wait)
예제 #5
0
 def _as_list(
         name: str,
         value: Optional[Union[str,
                               Iterable[str]]]) -> Optional[Iterable[str]]:
     verify_type(name, value, (type(None), str, Iterable))
     if value is None:
         return None
     if isinstance(value, str):
         return [value]
     verify_collection_type(name, value, str)
     return value
예제 #6
0
 def assign(
     self, value: Union[FileSetVal, str, Iterable[str]], wait: bool = False
 ) -> None:
     verify_type("value", value, (FileSetVal, str, Iterable))
     if isinstance(value, FileSetVal):
         value = value.file_globs
     elif isinstance(value, str):
         value = [value]
     else:
         verify_collection_type("value", value, str)
     self._enqueue_upload_operation(value, reset=True, wait=wait)
예제 #7
0
    def upload_files(self,
                     value: Union[str, Iterable[str]],
                     wait: bool = False) -> None:
        if is_collection(value):
            verify_collection_type("value", value, str)
        else:
            verify_type("value", value, str)

        with self._run.lock():
            attr = self._run.get_attribute(self._path)
            if not attr:
                attr = FileSet(self._run, parse_path(self._path))
                self._run.set_attribute(self._path, attr)
            attr.upload_files(value, wait)
예제 #8
0
def init_run(
    project: Optional[str] = None,
    api_token: Optional[str] = None,
    run: Optional[str] = None,
    custom_run_id: Optional[str] = None,
    mode: str = Mode.ASYNC.value,
    name: Optional[str] = None,
    description: Optional[str] = None,
    tags: Optional[Union[List[str], str]] = None,
    source_files: Optional[Union[List[str], str]] = None,
    capture_stdout: bool = True,
    capture_stderr: bool = True,
    capture_hardware_metrics: bool = True,
    fail_on_exception: bool = True,
    monitoring_namespace: Optional[str] = None,
    flush_period: float = 5,
    proxies: Optional[dict] = None,
    capture_traceback: bool = True,
    **kwargs,
) -> Run:
    """Starts a new tracked run, and append it to the top of the Runs table view.

    Args:
        project(str, optional): Name of a project in a form of `namespace/project_name`. Defaults to `None`.
            If `None`, the value of `NEPTUNE_PROJECT` environment variable will be taken.
        api_token(str, optional): User’s API token. Defaults to `None`.
            If `None`, the value of `NEPTUNE_API_TOKEN` environment variable will be taken.
            .. note::
                It is strongly recommended to use `NEPTUNE_API_TOKEN` environment variable rather than placing your
                API token in plain text in your source code.
        run (str, optional): An existing run's identifier like 'SAN-1' in case of resuming a tracked run.
            Defaults to `None`.
            A run with such identifier must exist. If `None` is passed, starts a new tracked run.
        custom_run_id (str, optional): A unique identifier to be used when running Neptune in pipelines.
            Defaults to `None`.
            Make sure you are using the same identifier throughout the whole pipeline execution.
        mode (str, optional): Connection mode in which the tracking will work. Defaults to `'async'`.
            Possible values 'async', 'sync', 'offline', 'read-only' and 'debug'.
        name (str, optional): Editable name of the run. Defaults to `'Untitled'`.
            Name is displayed in the run's Details and in Runs table as a column.
        description (str, optional): Editable description of the run. Defaults to `''`.
            Description is displayed in the run's Details and can be displayed in the runs view as a column.
        tags (list of str or str, optional): Tags of the run. Defaults to `[]`.
            They are editable after run is created.
            Tags are displayed in the run's Details and can be viewed in Runs table view as a column.
        source_files (list of str or str, optional): List of source files to be uploaded.
            Uploaded sources are displayed in the run’s Source code tab.
            Unix style pathname pattern expansion is supported. For example, you can pass '*.py' to upload all python
            source files from the current directory.
            If `None` is passed, Python file from which run was created will be uploaded.
        capture_stdout (bool, optional): Whether to send run's stdout. Defaults to `True`.
            Tracked metadata will be stored inside `monitoring_namespace`.
        capture_stderr (bool, optional):  Whether to send run’s stderr. Defaults to `True`.
            Tracked metadata will be stored inside `monitoring_namespace`.
        capture_hardware_metrics (bool, optional): Whether to send hardware monitoring logs
            (CPU, GPU, Memory utilization). Defaults to `True`.
            Tracked metadata will be stored inside `monitoring_namespace`.
        fail_on_exception (bool, optional): Whether to register an uncaught exception handler to this process and,
            in case of an exception, set run's sys/failed to True. Exception is always logged
        monitoring_namespace (str, optional): Namespace inside which all monitoring logs be stored.
            Defaults to 'monitoring'.
        flush_period (float, optional): In an asynchronous (default) connection mode how often asynchronous thread
            should synchronize data with Neptune servers. Defaults to 5.
        proxies (dict of str, optional): Argument passed to HTTP calls made via the Requests library.
            For more information see
            `their proxies section <https://2.python-requests.org/en/master/user/advanced/#proxies>`_.
        capture_traceback (bool, optional):  Whether to send run’s traceback in case of an exception.
            Defaults to `True`.
            Tracked metadata will be stored inside `monitoring/traceback`.

    Returns:
        ``Run``: object that is used to manage the tracked run and log metadata to it.

    Examples:

        >>> import neptune.new as neptune

        >>> # minimal invoke
        ... run = neptune.init()

        >>> # create a tracked run with a name
        ... run = neptune.init(name='first-pytorch-ever')

        >>> # create a tracked run with a name and a description, and no sources files uploaded
        >>> run = neptune.init(name='neural-net-mnist',
        ...                    description='neural net trained on MNIST',
        ...                    source_files=[])

        >>> # Send all py files in cwd (excluding hidden files with names beginning with a dot)
        ... run = neptune.init(source_files='*.py')

        >>> # Send all py files from all subdirectories (excluding hidden files with names beginning with a dot)
        ... # Supported on Python 3.5 and later.
        ... run = neptune.init(source_files='**/*.py')

        >>> # Send all files and directories in cwd (excluding hidden files with names beginning with a dot)
        ... run = neptune.init(source_files='*')

        >>> # Send all files and directories in cwd including hidden files
        ... run = neptune.init(source_files=['*', '.*'])

        >>> # Send files with names being a single character followed by '.py' extension.
        ... run = neptune.init(source_files='?.py')

        >>> # larger example
        ... run = neptune.init(name='first-pytorch-ever',
        ...               description='write longer description here',
        ...               tags=['list-of', 'tags', 'goes-here', 'as-list-of-strings'],
        ...               source_files=['training_with_pytorch.py', 'net.py'])

    You may also want to check `init docs page`_.

    .. _init docs page:
       https://docs.neptune.ai/api-reference/neptune#.init
    """
    _check_for_extra_kwargs(init_run.__name__, kwargs)
    verify_type("project", project, (str, type(None)))
    verify_type("api_token", api_token, (str, type(None)))
    verify_type("run", run, (str, type(None)))
    verify_type("custom_run_id", custom_run_id, (str, type(None)))
    verify_type("mode", mode, str)
    verify_type("name", name, (str, type(None)))
    verify_type("description", description, (str, type(None)))
    verify_type("capture_stdout", capture_stdout, bool)
    verify_type("capture_stderr", capture_stderr, bool)
    verify_type("capture_hardware_metrics", capture_hardware_metrics, bool)
    verify_type("monitoring_namespace", monitoring_namespace,
                (str, type(None)))
    verify_type("flush_period", flush_period, (int, float))
    verify_type("proxies", proxies, (dict, type(None)))
    verify_type("capture_traceback", capture_hardware_metrics, bool)
    if tags is not None:
        if isinstance(tags, str):
            tags = [tags]
        else:
            verify_collection_type("tags", tags, str)
    if source_files is not None:
        if isinstance(source_files, str):
            source_files = [source_files]
        else:
            verify_collection_type("source_files", source_files, str)

    name = "Untitled" if run is None and name is None else name
    description = "" if run is None and description is None else description
    hostname = get_hostname() if run is None else None
    custom_run_id = custom_run_id or os.getenv(CUSTOM_RUN_ID_ENV_NAME)
    monitoring_namespace = (monitoring_namespace
                            or os.getenv(MONITORING_NAMESPACE) or "monitoring")

    if run and custom_run_id:
        raise NeptuneRunResumeAndCustomIdCollision()

    backend = get_backend(mode, api_token=api_token, proxies=proxies)

    if mode == Mode.OFFLINE or mode == Mode.DEBUG:
        project = "offline/project-placeholder"

    project_obj = project_name_lookup(backend, project)
    project = f"{project_obj.workspace}/{project_obj.name}"

    if run:
        api_run = backend.get_run(project + "/" + run)
    else:
        if mode == Mode.READ_ONLY:
            raise NeedExistingRunForReadOnlyMode()
        git_ref = get_git_info(discover_git_repo_location())
        if custom_run_id_exceeds_length(custom_run_id):
            custom_run_id = None

        notebook_id, checkpoint_id = _create_notebook_checkpoint(backend)

        api_run = backend.create_run(project_obj.id, git_ref, custom_run_id,
                                     notebook_id, checkpoint_id)

    run_lock = threading.RLock()

    operation_processor = get_operation_processor(
        mode,
        container_id=api_run.id,
        container_type=Run.container_type,
        backend=backend,
        lock=run_lock,
        flush_period=flush_period,
    )

    stdout_path = "{}/stdout".format(monitoring_namespace)
    stderr_path = "{}/stderr".format(monitoring_namespace)
    traceback_path = "{}/traceback".format(monitoring_namespace)

    background_jobs = []
    if mode != Mode.READ_ONLY:
        if capture_stdout:
            background_jobs.append(
                StdoutCaptureBackgroundJob(attribute_name=stdout_path))
        if capture_stderr:
            background_jobs.append(
                StderrCaptureBackgroundJob(attribute_name=stderr_path))
        if capture_hardware_metrics:
            background_jobs.append(
                HardwareMetricReportingJob(
                    attribute_namespace=monitoring_namespace))
        websockets_factory = backend.websockets_factory(
            project_obj.id, api_run.id)
        if websockets_factory:
            background_jobs.append(
                WebsocketSignalsBackgroundJob(websockets_factory))
        if capture_traceback:
            background_jobs.append(
                TracebackJob(traceback_path, fail_on_exception))
        background_jobs.append(PingBackgroundJob())

    _run = Run(
        api_run.id,
        backend,
        operation_processor,
        BackgroundJobList(background_jobs),
        run_lock,
        api_run.workspace,
        api_run.project_name,
        api_run.short_id,
        project_obj.id,
        monitoring_namespace,
    )
    if mode != Mode.OFFLINE:
        _run.sync(wait=False)

    if mode != Mode.READ_ONLY:
        if name is not None:
            _run[attr_consts.SYSTEM_NAME_ATTRIBUTE_PATH] = name
        if description is not None:
            _run[attr_consts.SYSTEM_DESCRIPTION_ATTRIBUTE_PATH] = description
        if hostname is not None:
            _run[attr_consts.SYSTEM_HOSTNAME_ATTRIBUTE_PATH] = hostname
        if tags is not None:
            _run[attr_consts.SYSTEM_TAGS_ATTRIBUTE_PATH].add(tags)
        if run is None:
            _run[attr_consts.SYSTEM_FAILED_ATTRIBUTE_PATH] = False

        if capture_stdout and not _run.exists(stdout_path):
            _run.define(stdout_path, StringSeries([]))
        if capture_stderr and not _run.exists(stderr_path):
            _run.define(stderr_path, StringSeries([]))

        if run is None or source_files is not None:
            # upload default sources ONLY if creating a new run
            upload_source_code(source_files=source_files, run=_run)

    # pylint: disable=protected-access
    _run._startup(debug_mode=mode == Mode.DEBUG)

    return _run
예제 #9
0
 def test_verify_collection_type_failed_element(self):
     with self.assertRaises(TypeError):
         verify_collection_type("arg", ["string", 3, "a", 4.0, 1],
                                (int, str))
예제 #10
0
 def test_verify_collection_type_failed(self):
     with self.assertRaises(TypeError):
         verify_collection_type("arg", "string", (int, str))
예제 #11
0
 def test_verify_collection_type(self):
     verify_collection_type("arg", ["string", "aaa", 5, 1, "q"], (int, str))