Esempio n. 1
0
    def _command_run(self, command):
        logger.info('Agent starting run with config:\n' + '\n'.join([
            '\t{}: {}'.format(k, v['value'])
            for k, v in command['args'].items()
        ]))
        if self._in_jupyter:
            print('wandb: Agent Starting Run: {} with config:\n'.format(
                command.get('run_id')) + '\n'.join([
                    '\t{}: {}'.format(k, v['value'])
                    for k, v in command['args'].items()
                ]))

        run = wandb_run.Run(mode='run',
                            sweep_id=self._sweep_id,
                            storage_id=command.get('run_storage_id'),
                            run_id=command.get('run_id'))

        # save the the wandb config to reflect the state of the run that the
        # the server generated.
        run.config.set_run_dir(run.dir)
        run.config.update({k: v['value'] for k, v in command['args'].items()})

        env = dict(os.environ)
        sweep_env = command.get('env', {})
        env.update(sweep_env)
        run.set_environment(env)

        flags = [
            "--{}={}".format(name, config['value'])
            for name, config in command['args'].items()
        ]

        if self._function:
            proc = AgentProcess(function=self._function,
                                env=env,
                                run_id=command.get('run_id'),
                                in_jupyter=self._in_jupyter)
        else:
            command_list = ['/usr/bin/env'
                            ] if platform.system() != "Windows" else []
            command_list += ['python', command['program']] + flags
            proc = AgentProcess(command=command_list, env=env)
        self._run_processes[run.id] = proc

        # we keep track of when we sent the sigterm to give processes a chance
        # to handle the signal before sending sigkill every heartbeat
        self._run_processes[run.id].last_sigterm_time = None
        self._last_report_time = None
Esempio n. 2
0
    def _command_run(self, command):
        logger.info('Agent starting run with config:\n' +
                    '\n'.join(['\t{}: {}'.format(k, v['value']) for k, v in command['args'].items()]))
        if self._in_jupyter:
            print('wandb: Agent Starting Run: {} with config:\n'.format(command.get('run_id'))  +
                    '\n'.join(['\t{}: {}'.format(k, v['value']) for k, v in command['args'].items()]))

        run = wandb_run.Run(mode='run',
                            sweep_id=self._sweep_id,
                            storage_id=command.get('run_storage_id'),
                            run_id=command.get('run_id'))

        # save the the wandb config to reflect the state of the run that the
        # the server generated.
        run.config.set_run_dir(run.dir)
        run.config.update({k: v['value'] for k, v in command['args'].items()})

        env = dict(os.environ)
        sweep_env = command.get('env', {})
        env.update(sweep_env)
        run.set_environment(env)

        flags = ["--{}={}".format(name, config['value'])
                 for name, config in command['args'].items()]

        if self._function:
            proc = AgentProcess(function=self._function, env=env,
                    run_id=command.get('run_id'), in_jupyter=self._in_jupyter)
        else:
            sweep_vars = dict(interpretter=["python"], program=[command['program']], args=flags, env=["/usr/bin/env"])
            if platform.system() == "Windows":
                del sweep_vars["env"]
            command_list = []
            sweep_command = self._sweep_command or ["${env}", "${interpretter}", "${program}", "${args}"]
            for c in sweep_command:
                if c.startswith("${") and c.endswith("}"):
                    replace_list = sweep_vars.get(c[2:-1])
                    command_list += replace_list or []
                else:
                    command_list += [c]
            logger.info('About to run command: {}'.format(' '.join(command_list)))
            proc = AgentProcess(command=command_list, env=env)
        self._run_processes[run.id] = proc

        # we keep track of when we sent the sigterm to give processes a chance
        # to handle the signal before sending sigkill every heartbeat
        self._run_processes[run.id].last_sigterm_time = None
        self._last_report_time = None