Beispiel #1
0
def run(ctx, program, args, id, resume, dir, configs, message, name, notes,
        show, tags, run_group, job_type):
    wandb.ensure_configured()
    if configs:
        config_paths = configs.split(',')
    else:
        config_paths = []
    config = Config(config_paths=config_paths,
                    wandb_dir=dir or wandb.wandb_dir())
    tags = [tag for tag in tags.split(",") if tag] if tags else None

    # populate run parameters from env if not specified
    id = id or os.environ.get(env.RUN_ID)
    message = message or os.environ.get(env.DESCRIPTION)
    tags = tags or env.get_tags()
    run_group = run_group or os.environ.get(env.RUN_GROUP)
    job_type = job_type or os.environ.get(env.JOB_TYPE)
    name = name or os.environ.get(env.NAME)
    notes = notes or os.environ.get(env.NOTES)
    resume = resume or os.environ.get(env.RESUME)

    run = wandb_run.Run(run_id=id,
                        mode='clirun',
                        config=config,
                        description=message,
                        program=program,
                        tags=tags,
                        group=run_group,
                        job_type=job_type,
                        name=name,
                        notes=notes,
                        resume=resume)
    run.enable_logging()

    environ = dict(os.environ)
    if configs:
        environ[env.CONFIG_PATHS] = configs
    if show:
        environ[env.SHOW_RUN] = 'True'

    if not run.api.api_key:
        util.prompt_api_key(run.api, input_callback=click.prompt)

    try:
        rm = run_manager.RunManager(run)
        rm.init_run(environ)
    except run_manager.Error:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        wandb.termerror(
            'An Exception was raised during setup, see %s for full traceback.'
            % util.get_log_file_path())
        wandb.termerror(str(exc_value))
        if 'permission' in str(exc_value):
            wandb.termerror(
                'Are you sure you provided the correct API key to "wandb login"?'
            )
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        logger.error('\n'.join(lines))
        sys.exit(1)
    rm.run_user_process(program, args, environ)
Beispiel #2
0
def jupyter_login(force=True, api=None):
    """Attempt to login from a jupyter environment

    If force=False, we'll only attempt to auto-login, otherwise we'll prompt the user
    """
    def get_api_key_from_browser():
        key, anonymous = None, False
        if 'google.colab' in sys.modules:
            key = jupyter.attempt_colab_login(api.app_url)
        elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules:
            # Databricks does not seem to support getpass() so we need to fail
            # early and prompt the user to configure the key manually for now.
            termerror(
                "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks"
            )
            raise LaunchError(
                "Databricks integration requires api_key to be configured.")
        if not key and os.environ.get(env.ALLOW_ANONYMOUS) == "true":
            key = api.create_anonymous_api_key()
            anonymous = True
        if not key and force:
            termerror(
                "Not authenticated.  Copy a key from https://app.wandb.ai/authorize"
            )
            key = getpass.getpass("API Key: ").strip()
        return key, anonymous

    api = api or (run.api if run else None)
    if not api:
        raise LaunchError("Internal error: api required for jupyter login")
    return util.prompt_api_key(api, browser_callback=get_api_key_from_browser)
Beispiel #3
0
def _jupyter_login(force=True, api=None):
    """Attempt to login from a jupyter environment

    If force=False, we'll only attempt to auto-login, otherwise we'll prompt the user
    """
    def get_api_key_from_browser(signup=False):
        key, anonymous = None, False
        if 'google.colab' in sys.modules:
            key = jupyter.attempt_colab_login(api.app_url)
        elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules:
            # Databricks does not seem to support getpass() so we need to fail
            # early and prompt the user to configure the key manually for now.
            termerror(
                "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks")
            raise LaunchError("Databricks integration requires api_key to be configured.")
        # For jupyter we default to not allowing anonymous
        if not key and os.environ.get(env.ANONYMOUS, "never") != "never":
            key = api.create_anonymous_api_key()
            anonymous = True
        if not key and force:
            try:
                termerror("Not authenticated.  Copy a key from https://app.wandb.ai/authorize")
                key = getpass.getpass("API Key: ").strip()
            except NotImplementedError:
                termerror(
                    "Can't accept input in this environment, you should set WANDB_API_KEY or call wandb.login(key='YOUR_API_KEY')")
        return key, anonymous

    api = api or (run.api if run else None)
    if not api:
        raise LaunchError("Internal error: api required for jupyter login")
    return util.prompt_api_key(api, browser_callback=get_api_key_from_browser)
Beispiel #4
0
def login(anonymous=None, key=None):
    """Ensure this machine is logged in

       You can manually specify a key, but this method is intended to prompt for user input.

       anonymous can be "never", "must", or "allow".  If set to "must" we'll always login anonymously,
       if set to "allow" we'll only create an anonymous user if the user isn't already logged in.

       Returns:
            True if login was successful
            False on failure
    """
    # This ensures we have a global api object
    ensure_configured()
    if anonymous:
        os.environ[env.ANONYMOUS] = anonymous
    anonymous = anonymous or "never"
    in_jupyter = _get_python_type() != "python"
    if key:
        termwarn("If you're specifying your api key in code, ensure this code is not shared publically.\nConsider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.")
        if in_jupyter:
            termwarn("Calling wandb.login() without arguments from jupyter should prompt you for an api key.")
        util.set_api_key(api, key)
    elif api.api_key and anonymous != "must":
        key = api.api_key
    elif in_jupyter:
        os.environ[env.JUPYTER] = "true"
        # Don't return key to ensure it's not displayed in the notebook.
        key = _jupyter_login(api=api)
    else:
        key = util.prompt_api_key(api)
    return True if key else False
Beispiel #5
0
def login(key,
          host,
          anonymously,
          server=LocalServer(),
          browser=True,
          no_offline=False):
    global api
    if host == "https://api.wandb.ai":
        api.clear_setting("base_url", globally=True)
    elif host:
        if not host.startswith("http"):
            raise ClickException("host must start with http(s)://")
        api.set_setting("base_url", host, globally=True)

    key = key[0] if len(key) > 0 else None

    # Import in here for performance reasons
    import webbrowser
    browser = util.launch_browser(browser)

    def get_api_key_from_browser(signup=False):
        if not browser:
            return None
        query = '?signup=true' if signup else ''
        webbrowser.open_new_tab('{}/authorize{}'.format(api.app_url, query))
        #Getting rid of the server for now.  We would need to catch Abort from server.stop and deal accordingly
        #server.start(blocking=False)
        #if server.result.get("key"):
        #    return server.result["key"][0]
        return None

    if key:
        util.set_api_key(api, key)
    else:
        if anonymously:
            os.environ[env.ANONYMOUS] = "must"
        # Don't allow signups or dryrun for local
        local = host != None or host != "https://api.wandb.ai"
        key = util.prompt_api_key(api,
                                  input_callback=click.prompt,
                                  browser_callback=get_api_key_from_browser,
                                  no_offline=no_offline,
                                  local=local)

    if key:
        api.clear_setting('disabled')
        click.secho("Successfully logged in to Weights & Biases!", fg="green")
    elif not no_offline:
        api.set_setting('disabled', 'true')
        click.echo(
            "Disabling Weights & Biases. Run 'wandb login' again to re-enable."
        )

    # reinitialize API to create the new client
    api = InternalApi()

    return key
Beispiel #6
0
def login(key, server=LocalServer(), browser=True, anonymous=False):
    global api

    key = key[0] if len(key) > 0 else None

    # Import in here for performance reasons
    import webbrowser
    browser = util.launch_browser(browser)

    def get_api_key_from_browser():
        if not browser:
            return None
        launched = webbrowser.open_new_tab('{}/authorize?{}'.format(
            api.app_url, server.qs()))
        if not launched:
            return None

        server.start(blocking=True)
        if server.result.get("key"):
            return server.result["key"][0]
        return None

    if key:
        util.set_api_key(api, key)
    else:
        key = util.prompt_api_key(api,
                                  browser_callback=get_api_key_from_browser,
                                  anonymous=anonymous)

    if key:
        api.clear_setting('disabled')
        click.secho("Successfully logged in to Weights & Biases!", fg="green")
    else:
        api.set_setting('disabled', 'true')
        click.echo(
            "Disabling Weights & Biases. Run 'wandb login' again to re-enable."
        )

    # reinitialize API to create the new client
    api = InternalApi()

    return key
Beispiel #7
0
def login(key, anonymously, server=LocalServer(), browser=True):
    global api

    key = key[0] if len(key) > 0 else None

    # Import in here for performance reasons
    import webbrowser
    browser = util.launch_browser(browser)

    def get_api_key_from_browser(signup=False):
        if not browser:
            return None
        query = '?signup=true' if signup else ''
        webbrowser.open_new_tab('{}/authorize{}'.format(api.app_url, query))
        #Getting rid of the server for now.  We would need to catch Abort from server.stop and deal accordingly
        #server.start(blocking=False)
        #if server.result.get("key"):
        #    return server.result["key"][0]
        return None

    if key:
        util.set_api_key(api, key)
    else:
        if anonymously:
            os.environ[env.ANONYMOUS] = "must"
        key = util.prompt_api_key(api,
                                  input_callback=click.prompt,
                                  browser_callback=get_api_key_from_browser)

    if key:
        api.clear_setting('disabled')
        click.secho("Successfully logged in to Weights & Biases!", fg="green")
    else:
        api.set_setting('disabled', 'true')
        click.echo(
            "Disabling Weights & Biases. Run 'wandb login' again to re-enable."
        )

    # reinitialize API to create the new client
    api = InternalApi()

    return key
Beispiel #8
0
def init(job_type=None,
         dir=None,
         config=None,
         project=None,
         entity=None,
         reinit=None,
         tags=None,
         group=None,
         allow_val_change=False,
         resume=False,
         force=False,
         tensorboard=False,
         sync_tensorboard=False,
         name=None,
         notes=None,
         id=None,
         magic=None,
         allow_anonymous=False):
    """Initialize W&B

    If called from within Jupyter, initializes a new run and waits for a call to
    `wandb.log` to begin pushing metrics.  Otherwise, spawns a new process
    to communicate with W&B.

    Args:
        job_type (str, optional): The type of job running, defaults to 'train'
        config (dict, argparse, or tf.FLAGS, optional): The hyper parameters to store with the run
        project (str, optional): The project to push metrics to
        entity (str, optional): The entity to push metrics to
        dir (str, optional): An absolute path to a directory where metadata will be stored
        group (str, optional): A unique string shared by all runs in a given group
        tags (list, optional): A list of tags to apply to the run
        id (str, optional): A globally unique (per project) identifier for the run
        name (str, optional): A display name which does not have to be unique
        notes (str, optional): A multiline string associated with the run
        reinit (bool, optional): Allow multiple calls to init in the same process
        resume (bool, str, optional): Automatically resume this run if run from the same machine,
            you can also pass a unique run_id
        sync_tensorboard (bool, optional): Synchronize wandb logs to tensorboard or tensorboardX
        force (bool, optional): Force authentication with wandb, defaults to False
        magic (bool, dict, or str, optional): magic configuration as bool, dict, json string,
            yaml filename

    Returns:
        A wandb.run object for metric and config logging.
    """
    trigger.call('on_init', **locals())
    global run
    global __stage_dir__

    # We allow re-initialization when we're in Jupyter or explicity opt-in to it.
    in_jupyter = _get_python_type() != "python"
    if reinit or (in_jupyter and reinit != False):
        reset_env(exclude=env.immutable_keys())
        run = None

    # TODO: deprecate tensorboard
    if tensorboard or sync_tensorboard and len(patched["tensorboard"]) == 0:
        util.get_module("wandb.tensorboard").patch()

    sagemaker_config = util.parse_sm_config()
    tf_config = util.parse_tfjob_config()
    if group == None:
        group = os.getenv(env.RUN_GROUP)
    if job_type == None:
        job_type = os.getenv(env.JOB_TYPE)
    if sagemaker_config:
        # Set run_id and potentially grouping if we're in SageMaker
        run_id = os.getenv('TRAINING_JOB_NAME')
        if run_id:
            os.environ[env.RUN_ID] = '-'.join(
                [run_id,
                 os.getenv('CURRENT_HOST', socket.gethostname())])
        conf = json.load(open("/opt/ml/input/config/resourceconfig.json"))
        if group == None and len(conf["hosts"]) > 1:
            group = os.getenv('TRAINING_JOB_NAME')
        # Set secret variables
        if os.path.exists("secrets.env"):
            for line in open("secrets.env", "r"):
                key, val = line.strip().split('=', 1)
                os.environ[key] = val
    elif tf_config:
        cluster = tf_config.get('cluster')
        job_name = tf_config.get('task', {}).get('type')
        task_index = tf_config.get('task', {}).get('index')
        if job_name is not None and task_index is not None:
            # TODO: set run_id for resuming?
            run_id = cluster[job_name][task_index].rsplit(":")[0]
            if job_type == None:
                job_type = job_name
            if group == None and len(cluster.get("worker", [])) > 0:
                group = cluster[job_name][0].rsplit("-" + job_name, 1)[0]
    image = util.image_id_from_k8s()
    if image:
        os.environ[env.DOCKER] = image
    if project:
        os.environ[env.PROJECT] = project
    if entity:
        os.environ[env.ENTITY] = entity
    if group:
        os.environ[env.RUN_GROUP] = group
    if job_type:
        os.environ[env.JOB_TYPE] = job_type
    if tags:
        os.environ[env.TAGS] = ",".join(tags)
    if id:
        os.environ[env.RUN_ID] = id
        if name is None:
            # We do this because of https://github.com/wandb/core/issues/2170
            # to ensure that the run's name is explicitly set to match its
            # id. If we don't do this and the id is eight characters long, the
            # backend will set the name to a generated human-friendly value.
            #
            # In any case, if the user is explicitly setting `id` but not
            # `name`, their id is probably a meaningful string that we can
            # use to label the run.
            name = os.environ.get(
                env.NAME,
                id)  # environment variable takes precedence over this.
    if name:
        os.environ[env.NAME] = name
    if notes:
        os.environ[env.NOTES] = notes
    if magic is not None and magic is not False:
        if isinstance(magic, dict):
            os.environ[env.MAGIC] = json.dumps(magic)
        elif isinstance(magic, str):
            os.environ[env.MAGIC] = magic
        elif isinstance(magic, bool):
            pass
        else:
            termwarn("wandb.init called with invalid magic parameter type",
                     repeat=False)
        from wandb import magic_impl
        magic_impl.magic_install()
    if dir:
        os.environ[env.DIR] = dir
        util.mkdir_exists_ok(wandb_dir())
    if allow_anonymous:
        os.environ[env.ALLOW_ANONYMOUS] = str(allow_anonymous).lower()

    resume_path = os.path.join(wandb_dir(), wandb_run.RESUME_FNAME)
    if resume == True:
        os.environ[env.RESUME] = "auto"
    elif resume:
        os.environ[env.RESUME] = os.environ.get(env.RESUME, "allow")
        # TODO: remove allowing resume as a string in the future
        os.environ[env.RUN_ID] = id or resume
    elif os.path.exists(resume_path):
        os.remove(resume_path)
    if os.environ.get(env.RESUME) == 'auto' and os.path.exists(resume_path):
        if not os.environ.get(env.RUN_ID):
            os.environ[env.RUN_ID] = json.load(open(resume_path))["run_id"]

    # the following line is useful to ensure that no W&B logging happens in the user
    # process that might interfere with what they do
    # logging.basicConfig(format='user process %(asctime)s - %(name)s - %(levelname)s - %(message)s')

    # If a thread calls wandb.init() it will get the same Run object as
    # the parent. If a child process with distinct memory space calls
    # wandb.init(), it won't get an error, but it will get a result of
    # None.
    # This check ensures that a child process can safely call wandb.init()
    # after a parent has (only the parent will create the Run object).
    # This doesn't protect against the case where the parent doesn't call
    # wandb.init but two children do.
    if run or os.getenv(env.INITED):
        return run

    if __stage_dir__ is None:
        __stage_dir__ = "wandb"
        util.mkdir_exists_ok(wandb_dir())

    try:
        signal.signal(signal.SIGQUIT, _debugger)
    except AttributeError:
        pass

    try:
        run = wandb_run.Run.from_environment_or_defaults()
    except IOError as e:
        termerror('Failed to create run directory: {}'.format(e))
        raise LaunchError("Could not write to filesystem.")

    run.set_environment()

    def set_global_config(run):
        global config  # because we already have a local config
        config = run.config

    set_global_config(run)
    global summary
    summary = run.summary

    # set this immediately after setting the run and the config. if there is an
    # exception after this it'll probably break the user script anyway
    os.environ[env.INITED] = '1'

    # we do these checks after setting the run and the config because users scripts
    # may depend on those things
    if sys.platform == 'win32' and run.mode != 'clirun':
        termerror(
            'To use wandb on Windows, you need to run the command "wandb run python <your_train_script>.py"'
        )
        return run

    if in_jupyter:
        _init_jupyter(run)
    elif run.mode == 'clirun':
        pass
    elif run.mode == 'run':
        api = InternalApi()
        # let init_jupyter handle this itself
        if not in_jupyter and not api.api_key:
            termlog(
                "W&B is a tool that helps track and visualize machine learning experiments"
            )
            if force:
                termerror(
                    "No credentials found.  Run \"wandb login\" or \"wandb off\" to disable wandb"
                )
            else:
                if util.prompt_api_key(api):
                    _init_headless(run)
                else:
                    termlog(
                        "No credentials found.  Run \"wandb login\" to visualize your metrics"
                    )
                    run.mode = "dryrun"
                    _init_headless(run, False)
        else:
            _init_headless(run)
    elif run.mode == 'dryrun':
        termlog('Dry run mode, not syncing to the cloud.')
        _init_headless(run, False)
    else:
        termerror('Invalid run mode "%s". Please unset WANDB_MODE.' % run.mode)
        raise LaunchError("The WANDB_MODE environment variable is invalid.")

    # set the run directory in the config so it actually gets persisted
    run.config.set_run_dir(run.dir)

    if sagemaker_config:
        run.config.update(sagemaker_config)
        allow_val_change = True
    if config:
        run.config.update(config, allow_val_change=allow_val_change)

    # Access history to ensure resumed is set when resuming
    run.history
    # Load the summary to support resuming
    run.summary.load()

    atexit.register(run.close_files)

    return run