Ejemplo n.º 1
0
def test_settings(test_dir, mocker):
    """ Settings object for tests"""
    #  TODO: likely not the right thing to do, we shouldn't be setting this
    wandb._IS_INTERNAL_PROCESS = False
    wandb.wandb_sdk.wandb_run.EXIT_TIMEOUT = 15
    wandb.wandb_sdk.wandb_setup._WandbSetup.instance = None
    wandb_dir = os.path.join(os.getcwd(), "wandb")
    mkdir_exists_ok(wandb_dir)
    # root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    # TODO: consider making a debugable directory that stays around...
    settings = wandb.Settings(
        _start_time=time.time(),
        base_url="http://localhost",
        root_dir=os.getcwd(),
        save_code=True,
        project="test",
        console="off",
        host="test",
        api_key=DUMMY_API_KEY,
        run_id=wandb.util.generate_id(),
        _start_datetime=datetime.datetime.now(),
    )
    settings.setdefaults()
    yield settings
    # Just incase someone forgets to join in tests
    if wandb.run is not None:
        wandb.run.join()
Ejemplo n.º 2
0
    def setup(self, kwargs):
        self.kwargs = kwargs

        # built up login settings
        login_settings: Settings = wandb.Settings()
        if settings_param := kwargs.pop("_settings", None):
            login_settings._apply_settings(settings_param)
Ejemplo n.º 3
0
    def __init__(
        self,
        entity: str,
        project: str,
        queues: Iterable[str] = None,
        max_jobs: int = None,
    ):
        self._entity = entity
        self._project = project
        self._api = Api()
        self._settings = wandb.Settings()
        self._base_url = self._api.settings().get("base_url")
        self._jobs: Dict[Union[int, str], AbstractRun] = {}
        self._ticks = 0
        self._running = 0
        self._cwd = os.getcwd()
        self._namespace = wandb.util.generate_id()
        self._access = _convert_access("project")
        self._max_jobs = max_jobs or 1

        # serverside creation
        self.gorilla_supports_agents = (
            self._api.launch_agent_introspection() is not None
        )
        create_response = self._api.create_launch_agent(
            entity, project, queues, self.gorilla_supports_agents
        )
        self._id = create_response["launchAgentId"]
        self._name = ""  # hacky: want to display this to the user but we don't get it back from gql until polling starts. fix later
        self._queues = queues if queues else ["default"]
Ejemplo n.º 4
0
 def update_session(self, key):
     settings: Settings = wandb.Settings()
     settings._apply_source_login(dict(api_key=key))
     self._wl._update(settings=settings)
     # Whenever the key changes, make sure to pull in user settings
     # from server.
     self._wl._update_user_settings()
Ejemplo n.º 5
0
def test_settings(test_dir, mocker, live_mock_server):
    """Settings object for tests"""
    #  TODO: likely not the right thing to do, we shouldn't be setting this
    wandb._IS_INTERNAL_PROCESS = False
    wandb.wandb_sdk.wandb_run.EXIT_TIMEOUT = 15
    wandb.wandb_sdk.wandb_setup._WandbSetup.instance = None
    wandb_dir = os.path.join(test_dir, "wandb")
    mkdir_exists_ok(wandb_dir)
    # root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    settings = wandb.Settings(
        _start_datetime=datetime.datetime.now(),
        _start_time=time.time(),
        api_key=DUMMY_API_KEY,
        base_url=live_mock_server.base_url,
        console="off",
        host="test",
        project="test",
        root_dir=test_dir,
        run_id=wandb.util.generate_id(),
        save_code=False,
    )
    yield settings
    # Just in case someone forgets to join in tests. ...well, please don't!
    if wandb.run is not None:
        wandb.run.finish()
Ejemplo n.º 6
0
    def __init__(self,
                 log_dir,
                 log_name,
                 post_fix='',
                 tb_on=True,
                 wandb_on=True,
                 project_name='faster-rcnn',
                 resume=False):
        self.log_dir = log_dir
        self.log_name = log_name
        self.tb_on = tb_on
        self.wandb_on = wandb_on

        log_filemode = 'a' if resume else 'w'
        logging.basicConfig(level=logging.INFO,
                            filename=os.path.join(
                                self.log_dir, 'log{}.txt'.format(post_fix)),
                            filemode=log_filemode)
        self.logger = logging.getLogger("Train")

        if self.tb_on:
            tb_dir = os.path.join(log_dir, 'tensorboard')
            if not os.path.exists(tb_dir):
                os.mkdir(tb_dir)
            self.tbWriter = tb.writer.FileWriter(tb_dir)
        if self.wandb_on:
            wandb.init(project=project_name,
                       settings=wandb.Settings(console='off'),
                       id=self.log_name,
                       name=self.log_name)
Ejemplo n.º 7
0
def agent(sweep_id, function=None, entity=None, project=None, count=None):
    """Generic agent entrypoint, used for CLI or jupyter.

    Args:
        sweep_id (dict): Sweep ID generated by CLI or sweep API
        function (func, optional): A function to call instead of the "program" specifed in the config
        entity (str, optional): W&B Entity
        project (str, optional): W&B Project
        count (int, optional): the number of trials to run.
    """
    in_jupyter = wandb._get_python_type() != "python"
    if in_jupyter:
        os.environ[wandb.env.JUPYTER] = "true"
        _api0 = InternalApi()
        if not _api0.api_key:
            wandb._jupyter_login(api=_api0)

    _ = wandb.Settings()
    return run_agent(
        sweep_id,
        function=function,
        in_jupyter=in_jupyter,
        entity=entity,
        project=project,
        count=count,
    )
Ejemplo n.º 8
0
def login(key, host, cloud, relogin, anonymously, no_offline=False):
    # TODO: handle no_offline
    anon_mode = "must" if anonymously else "never"
    wandb.setup(settings=wandb.Settings(
        _cli_only_mode=True, anonymous=anon_mode, base_url=host))
    api = _get_cling_api()

    if host == "https://api.wandb.ai" or (host is None and cloud):
        api.clear_setting("base_url", globally=True, persist=True)
        # To avoid writing an empty local settings file, we only clear if it exists
        if os.path.exists(Settings._local_path()):
            api.clear_setting("base_url", persist=True)
    elif host:
        if not host.startswith("http"):
            raise ClickException("host must start with http(s)://")
        api.set_setting("base_url",
                        host.strip("/"),
                        globally=True,
                        persist=True)
    key = key[0] if len(key) > 0 else None

    wandb.login(relogin=relogin,
                key=key,
                anonymous=anon_mode,
                host=host,
                force=True)
Ejemplo n.º 9
0
def test_multiproc_strict(live_mock_server, parse_ctx):
    run = wandb.init(settings=wandb.Settings(strict="true"))

    train(0)

    procs = []
    for i in range(2):
        procs.append(
            multiprocessing.Process(target=train, kwargs=dict(add_val=100)))

    try:
        for p in procs:
            p.start()
    finally:
        for p in procs:
            p.join()
            # expect fail
            assert p.exitcode != 0

    run.finish()

    ctx_util = parse_ctx(live_mock_server.get_ctx())

    summary = ctx_util.summary
    s = {k: v for k, v in dict(summary).items() if not k.startswith("_")}
    assert dict(val=3, val2=1, mystep=3) == s
Ejemplo n.º 10
0
def _get_cling_api():
    """Get a reference to the internal api with cling settings."""
    global _api
    if _api is None:
        # TODO(jhr): make a settings object that is better for non runs.
        wandb.setup(settings=wandb.Settings(_cli_only_mode=True))
        _api = InternalApi()
    return _api
def test_no_dirs(test_settings, runner):
    with runner.isolated_filesystem():
        s = wandb.Settings(mode="disabled")
        test_settings._apply_settings(s)
        run = wandb.init(settings=test_settings)
        run.log({"acc": 0.9})
        run.finish()
        assert not os.path.isdir("wandb")
Ejemplo n.º 12
0
 def build_wandb(self):
     if not self.args['wandb']['name']:
         self.wandb = None
     else:
         wandb.init(project=self.args['wandb']['name'],
                    settings=wandb.Settings(console='off'))
         wandb.config.update(self.args)
         self.wandb = wandb
Ejemplo n.º 13
0
def model_provider():
    """Build the model."""

    args = get_args()

    print_rank_0('building GPT2 model ...')
    if args.pipe_parallel_size == 0:
        model = GPT2Model(num_tokentypes=0, parallel_output=True)
    else:
        model = GPT2ModelPipe(num_tokentypes=0,
                              parallel_output=True,
                              topology=mpu.get_topology())
        # This is a hack to give us a reference to get_batch_pipe from within training.py
        # We need to call model.set_batch_fn after deepspeed.initialize
        model._megatron_batch_fn = get_batch_pipe

    ## Wandb
    use_wandb = get_wandb_api_key() is not None
    set_use_wandb(use_wandb)
    args_dict = vars(args)
    if use_wandb:
        # only display system stats from one worker per machine
        wandb_settings = wandb.Settings() if is_local_main(
        ) else wandb.Settings(_disable_stats=True)
        group_name = args_dict.get('wandb_group')
        name = f'{socket.gethostname()}-{local_rank()}' if group_name else None

        try:
            wandb.init(project="neox",
                       group=group_name,
                       name=name,
                       save_code=False,
                       force=False,
                       entity=args_dict.get('wandb_team'),
                       settings=wandb_settings)
        except UsageError as e:
            set_use_wandb(False)
            print(e)
            print(
                'Skipping wandb. Execute `wandb login` on local or main node machine to enable.'
            )

    if use_wandb:
        wandb.config.update(args_dict)

    return model
Ejemplo n.º 14
0
 def run_start(self, run_id):
     settings = wandb.Settings()
     settings._start_run()
     settings.update(run_id=run_id)
     files_dir = settings.files_dir
     os.makedirs(files_dir)
     log_user = settings.log_user
     os.makedirs(log_user)
     self._inform_init(settings)
Ejemplo n.º 15
0
 def init_wandb_func():
     wandb.init(
         project=cfg.wandb_project, entity=cfg.wandb_user, sync_tensorboard=True,
         id=wandb_unique_id,
         name=wandb_unique_id,
         group=wandb_group, job_type=cfg.wandb_job_type, tags=cfg.wandb_tags,
         resume=True,
         settings=wandb.Settings(start_method='fork'),
     )
Ejemplo n.º 16
0
    def setup(self, context):
        wandb_params = {
            'project': self.project_name,
            'settings': wandb.Settings(symlink=False),
        }

        if self.group_name is not None:
            wandb_params['group'] = self.group_name

        resuming_previous_run = 'wandb_id' in context.metadata
        if not resuming_previous_run:
            wandb_params['id'] = context.metadata[
                "wandb_id"] = wandb.util.generate_id()

            rw = RandomWords()
            context.name = f'{context.name}-{rw.random_word()}-{rw.random_word()}-{context.metadata["wandb_id"]}'
            wandb_params['name'] = context.name

            config = context.get_config()
            config = flatten_nested_dict(config)
            wandb_params['config'] = config
        else:
            wandb_params['id'] = context.metadata["wandb_id"]
            wandb_params['resume'] = 'allow'

        # Initialize directories for saving data
        if self.group_name is None:
            self.save_folder = os.path.join(self.logging_dir,
                                            self.project_name, context.name)
        else:
            self.save_folder = os.path.join(self.logging_dir,
                                            self.project_name, self.group_name,
                                            context.name)
        if not os.path.exists(self.save_folder):
            os.makedirs(self.save_folder)

        wandb_params['dir'] = self.save_folder

        for i in range(100):
            try:
                wandb.init(**wandb_params)
                break
            except Exception as e:
                print(f"wandb.init failed due to {e}\nRetrying in 10s...")
                time.sleep(10)

        wandb.define_metric("*", summary="max")
        wandb.define_metric("*", summary="min")
        wandb.define_metric("*", summary="mean")

        # Save code on first iteration
        if not resuming_previous_run:
            for file_path in context.file_paths:
                WandbLogger.wandb_save(file_path)

        print(str(context))
Ejemplo n.º 17
0
 def update_session(self, key):
     _logger = wandb.setup()._get_logger()
     settings: Settings = wandb.Settings()
     login_settings = dict(api_key=key) if key else dict(mode="offline")
     settings._apply_source_login(login_settings, _logger=_logger)
     self._wl._update(settings=settings)
     # Whenever the key changes, make sure to pull in user settings
     # from server.
     if not self._wl.settings._offline:
         self._wl._update_user_settings()
Ejemplo n.º 18
0
    def new(cls,
            entity: Optional[str] = None,
            project: Optional[str] = None) -> WandbLogger:
        """
        Creates a new logger.

        :return: The logger.
        """
        wandb.init(entity=entity,
                   project=project,
                   settings=wandb.Settings(start_method='fork'))
        return cls()
Ejemplo n.º 19
0
    def setup(self, kwargs):
        self.kwargs = kwargs

        # built up login settings
        login_settings = wandb.Settings()
        settings_param = kwargs.pop("_settings", None)
        if settings_param:
            login_settings._apply_settings(settings_param)
        login_settings._apply_login(kwargs)

        # make sure they are applied globally
        self._wl = wandb.setup(settings=login_settings)
        self._settings = self._wl._settings
Ejemplo n.º 20
0
def test_settings_unexpected_args_telemetry(runner, live_mock_server,
                                            parse_ctx, capsys):
    with runner.isolated_filesystem():
        run = wandb.init(settings=wandb.Settings(blah=3))
        captured = capsys.readouterr().err
        msg = "Ignoring unexpected arguments: ['blah']"
        assert msg in captured
        ctx_util = parse_ctx(live_mock_server.get_ctx())
        telemetry = ctx_util.telemetry
        # TelemetryRecord field 11 is Issues,
        # whose field 2 corresponds to unexpected arguments in Settings
        telemetry_issues = telemetry.get("11", [])
        assert 2 in telemetry_issues
        run.finish()
    def train():
        # Here we're in a different process. It's hard to communicate
        # back to the main process for assertions.

        settings = wandb.Settings(base_url="http://localhost",
                                  api_key=dummy_api_key)

        # TODO: Fix this.
        # There is an issue here, the agent sets the environment variable
        # WANDB_SWEEP_ID and wandb.init() should pick that up. But it doesn't,
        # I think because the settings object has been frozen at some other time.
        run = wandb.init(settings=settings)

        # If this assertion fails, the test will timeout (because we
        # never complete 1 agent run)
        assert run.sweep_id == 'test-sweep-id'
Ejemplo n.º 22
0
 def init(*args, **kwargs):
     try:
         mocks_from_args(mocker, default_wandb_args(), mock_server)
         #  TODO: likely not the right thing to do, we shouldn't be setting this
         wandb._IS_INTERNAL_PROCESS = False
         #  We want to run setup every time in tests
         wandb.wandb_sdk.wandb_setup._WandbSetup._instance = None
         mocker.patch("wandb.wandb_sdk.wandb_init.Backend",
                      utils.BackendMock)
         return wandb.init(settings=wandb.Settings(console="off",
                                                   mode="offline",
                                                   _except_exit=False),
                           *args,
                           **kwargs)
     finally:
         unset_globals()
Ejemplo n.º 23
0
    def setup(self, kwargs):
        self.kwargs = kwargs

        # built up login settings
        login_settings: Settings = wandb.Settings()
        settings_param = kwargs.pop("_settings", None)
        # note that this case does not come up anywhere except for the tests
        if settings_param is not None:
            if isinstance(settings_param, Settings):
                login_settings._apply_settings(settings_param)
            elif isinstance(settings_param, dict):
                login_settings.update(settings_param, source=Source.LOGIN)
        _logger = wandb.setup()._get_logger()
        # Do not save relogin into settings as we just want to relogin once
        self._relogin = kwargs.pop("relogin", None)
        login_settings._apply_login(kwargs, _logger=_logger)

        # make sure they are applied globally
        self._wl = wandb.setup(settings=login_settings)
        self._settings = self._wl.settings
Ejemplo n.º 24
0
        def wrapper(self, *args, settings=settings, **kwargs):
            if not isinstance(settings, wandb.sdk.wandb_settings.Settings):
                settings = wandb.Settings()

            settings.update(
                run_group=coalesce(settings.run_group,
                                   f"{current.flow_name}/{current.run_id}"),
                source=wandb.sdk.wandb_settings.Source.INIT,
            )
            settings.update(
                run_job_type=coalesce(settings.run_job_type,
                                      current.step_name),
                source=wandb.sdk.wandb_settings.Source.INIT,
            )

            with wandb.init(settings=settings) as run:
                with wb_telemetry.context(run=run) as tel:
                    tel.feature.metaflow = True
                proxy = ArtifactProxy(self)
                run.config.update(proxy.params)
                func(proxy, *args, **kwargs)

                for name, data in proxy.inputs.items():
                    wandb_use(
                        name,
                        data,
                        datasets=datasets,
                        models=models,
                        others=others,
                        run=run,
                    )

                for name, data in proxy.outputs.items():
                    wandb_track(
                        name,
                        data,
                        datasets=datasets,
                        models=models,
                        others=others,
                        run=run,
                    )
Ejemplo n.º 25
0
    def wandb_start_run(
        self,
        w_run_name: str | None = None,
        w_job_type: str | None = None,
        run_args: RunArgsType | None = None,
    ) -> Run:
        if wandb.run is not None:
            raise RuntimeError(f"W&B has registerred run {wandb.run.name}")

        with switched_aws_cfg(self._s3_credentials_file):
            wandb_run = wandb.init(
                project=self._wab_project_name,
                name=w_run_name,
                job_type=w_job_type,
                settings=wandb.Settings(start_method="fork"),
                config=run_args,  # type: ignore
                tags=self._try_get_neuro_tags(),
            )
        if not isinstance(wandb_run, Run):
            raise RuntimeError(
                f"Failed to initialize W&B run, got: {wandb_run:r}")
        return wandb_run
Ejemplo n.º 26
0
def wandb_init_run(request, runner, mocker, mock_server):
    marker = request.node.get_closest_marker('wandb_args')
    args = default_wandb_args()
    if marker:
        args.update(marker.kwargs)
    try:
        mocks_from_args(mocker, args, mock_server)
        for k, v in args["env"].items():
            os.environ[k] = v
        #  TODO: likely not the right thing to do, we shouldn't be setting this
        wandb._IS_INTERNAL_PROCESS = False
        #  We want to run setup every time in tests
        wandb.wandb_sdk.wandb_setup._WandbSetup._instance = None
        mocker.patch('wandb.wandb_sdk.wandb_init.Backend', utils.BackendMock)
        run = wandb.init(settings=wandb.Settings(console="off", mode="offline", _except_exit=False),
                         **args["wandb_init"])
        yield run
        wandb.join()
    finally:
        unset_globals()
        for k, v in args["env"].items():
            del os.environ[k]
Ejemplo n.º 27
0
def agent(sweep_id, function=None, entity=None, project=None, count=None):
    """Generic agent entrypoint, used for CLI or jupyter.

    Args:
        sweep_id (dict): Sweep ID generated by CLI or sweep API
        entity (str, optional): W&B Entity
        project (str, optional): W&B Project
        function (dict, optional): Configure sweep function
    """
    in_jupyter = wandb._get_python_type() != "python"
    if in_jupyter:
        os.environ[wandb.env.JUPYTER] = "true"
        _api0 = InternalApi()
        if not _api0.api_key:
            wandb._jupyter_login(api=_api0)

    settings = wandb.Settings()
    return run_agent(sweep_id,
                     function=function,
                     in_jupyter=in_jupyter,
                     entity=entity,
                     project=project,
                     count=count)
Ejemplo n.º 28
0
 def _send_tensorboard(self, tb_root, tb_logdirs, send_manager):
     if self._entity is None:
         viewer, server_info = send_manager._api.viewer_server_info()
         self._entity = viewer.get("entity")
     proto_run = wandb_internal_pb2.RunRecord()
     proto_run.run_id = self._run_id or wandb.util.generate_id()
     proto_run.project = self._project or wandb.util.auto_project_name(None)
     proto_run.entity = self._entity
     url = "{}/{}/{}/runs/{}".format(
         self._app_url,
         url_quote(proto_run.entity),
         url_quote(proto_run.project),
         url_quote(proto_run.run_id),
     )
     print("Syncing: %s ..." % url)
     sys.stdout.flush()
     record = send_manager._interface._make_record(run=proto_run)
     send_manager.send(record)
     settings = wandb.Settings(
         root_dir=TMPDIR.name,
         run_id=proto_run.run_id,
         _start_datetime=datetime.datetime.now(),
         _start_time=time.time(),
     )
     watcher = tb_watcher.TBWatcher(
         settings, proto_run, send_manager._interface, True
     )
     for tb in tb_logdirs:
         watcher.add(tb, True, tb_root)
         sys.stdout.flush()
     watcher.finish()
     # send all of our records like a boss
     while not send_manager._interface.record_q.empty():
         data = send_manager._interface.record_q.get(block=True)
         send_manager.send(data)
     sys.stdout.flush()
     send_manager.finish()
Ejemplo n.º 29
0
def test_offline_compression(console_settings, capfd, runner):
    with capfd.disabled():
        s = wandb.Settings(mode="offline")
        console_settings._apply_settings(s)

        run = wandb.init(settings=console_settings)

        for i in tqdm.tqdm(range(100), ncols=139, ascii=" 123456789#"):
            time.sleep(0.05)

        print("\n" * 1000)

        print("QWERT")
        print("YUIOP")
        print("12345")

        print("\x1b[A\r\x1b[J\x1b[A\r\x1b[1J")

        time.sleep(1)

        run.finish()
        binary_log_file = (
            os.path.join(os.path.dirname(run.dir), "run-" + run.id) + ".wandb")
        binary_log = runner.invoke(
            cli.sync, ["--view", "--verbose", binary_log_file]).stdout

        # Only a single output record per stream is written when the run finishes
        assert binary_log.count("Record: output") == 2

        # Only final state of progress bar is logged
        assert binary_log.count("#") == 100, binary_log.count

        # Intermediete states are not logged
        assert "QWERT" not in binary_log
        assert "YUIOP" not in binary_log
        assert "12345" not in binary_log
        assert "UIOP" in binary_log
Ejemplo n.º 30
0
    def __init__(self, config, variant):
        self.config = WandBLogger.get_default_config()
        self.config.update(config)

        if self.config.experiment_id == '':
            self.config.experiment_id = uuid.uuid4().hex

        if self.config.prefix != '':
            self.config.project = '{}--{}'.format(self.config.prefix,
                                                  self.config.project)

        if self.config.output_dir == '':
            self.config.output_dir = tempfile.mkdtemp()
        else:
            self.config.output_dir = os.path.join(self.config.output_dir,
                                                  self.config.experiment_id)
            os.makedirs(self.config.output_dir, exist_ok=True)

        self._variant = copy(variant)

        if 'hostname' not in self._variant:
            self._variant['hostname'] = gethostname()

        if self.config.random_delay > 0:
            time.sleep(np.random.uniform(0, self.config.random_delay))

        wandb.init(
            config=self._variant,
            project=self.config.project,
            dir=self.config.output_dir,
            id=self.config.experiment_id,
            settings=wandb.Settings(
                start_method="thread",
                _disable_stats=True,
            ),
            mode='online' if self.config.online else 'offline',
        )