예제 #1
0
파일: runner.py 프로젝트: Mistobaan/studio
    def create_experiments(hyperparam_tuples):
        experiments = []
        # experiment_names = {}
        for hyperparam_tuple in hyperparam_tuples:
            experiment_name = experiment_name_base
            experiment_name += "__opt__%s__%s" % (rand_string(32),
                                                  int(time.time()))
            experiment_name = experiment_name.replace('.', '_')

            workspace_new = fs_tracker.get_artifact_cache(
                'workspace', experiment_name)

            current_artifacts = artifacts.copy()
            current_artifacts.update({
                'workspace': {
                    'local': workspace_new,
                    'mutable': True
                }
            })

            rsync_cp(workspace_orig, workspace_new, ignore_arg, logger)
            # shutil.copytree(workspace_orig, workspace_new)

            for param_name, param_value in hyperparam_tuple.iteritems():
                if isinstance(param_value, np.ndarray):
                    array_filepath = '/tmp/%s.npy' % rand_string(32)
                    np.save(array_filepath, param_value)
                    assert param_name not in current_artifacts
                    current_artifacts[param_name] = {'local': array_filepath,
                                                     'mutable': False}
                else:
                    with open(os.path.join(workspace_new, exec_filename),
                              'rb') as f:
                        script_text = f.read()

                    script_text = re.sub(
                        '\\b' +
                        param_name +
                        '\\b(?=[^=]*\\n)',
                        str(param_value),
                        script_text)

                    with open(os.path.join(workspace_new, exec_filename),
                              'wb') as f:
                        f.write(script_text)

            experiments.append(model.create_experiment(
                filename=exec_filename,
                args=other_args,
                experiment_name=experiment_name,
                project=project,
                artifacts=current_artifacts,
                resources_needed=resources_needed,
                metric=runner_args.metric))
        return experiments
예제 #2
0
파일: runner.py 프로젝트: DSLituiev/studio
    def create_experiments(hyperparam_tuples):
        experiments = []
        experiment_names = {}
        for hyperparam_tuple in hyperparam_tuples:
            experiment_name = experiment_name_base
            for param_name, param_value in hyperparam_tuple.iteritems():
                experiment_name = experiment_name + '__' + \
                    param_name + '__' + str(param_value)
            experiment_name = experiment_name.replace('.', '_')

            # if experiments uses a previously used name, change it
            if experiment_name in experiment_names:
                new_experiment_name = experiment_name
                counter = 1
                while new_experiment_name in experiment_names:
                    counter += 1
                    new_experiment_name = "%s_v%s" % (experiment_name, counter)
                experiment_name = new_experiment_name
            experiment_names[experiment_name] = True

            workspace_orig = artifacts['workspace']['local'] \
                if 'workspace' in artifacts.keys() else '.'
            workspace_new = fs_tracker.get_artifact_cache(
                'workspace', experiment_name)

            current_artifacts = artifacts.copy()
            current_artifacts.update({
                'workspace': {
                    'local': workspace_new,
                    'mutable': True
                }
            })

            shutil.copytree(workspace_orig, workspace_new)

            with open(os.path.join(workspace_new, exec_filename), 'r') as f:
                script_text = f.read()

            for param_name, param_value in hyperparam_tuple.iteritems():
                script_text = re.sub('\\b' + param_name + '\\b(?=[^=]*\\n)',
                                     str(param_value), script_text)

            with open(os.path.join(workspace_new, exec_filename), 'w') as f:
                f.write(script_text)

            experiments.append(model.create_experiment(
                filename=exec_filename,
                args=other_args,
                experiment_name=experiment_name,
                project=project,
                artifacts=current_artifacts,
                resources_needed=resources_needed,
                metric=runner_args.metric))
        return experiments
예제 #3
0
파일: runner.py 프로젝트: Mistobaan/studio
def main(args=sys.argv):
    logger = logging.getLogger('studio-runner')
    parser = argparse.ArgumentParser(
        description='Studio runner. \
                     Usage: studio run <runner_arguments> \
                     script <script_arguments>')
    parser.add_argument('--config', help='configuration file', default=None)
    parser.add_argument('--project', help='name of the project', default=None)
    parser.add_argument(
        '--experiment', '-e',
        help='Name of the experiment. If none provided, ' +
             'random uuid will be generated',
        default=None)

    parser.add_argument(
        '--guest',
        help='Guest mode (does not require db credentials)',
        action='store_true')

    parser.add_argument(
        '--force-git',
        help='If run in a git directory, force running the experiment ' +
             'even if changes are not commited',
        action='store_true')

    parser.add_argument(
        '--gpus',
        help='Number of gpus needed to run the experiment',
        type=int,
        default=None)

    parser.add_argument(
        '--cpus',
        help='Number of cpus needed to run the experiment' +
             ' (used to configure cloud instance)',
        type=int,
        default=None)

    parser.add_argument(
        '--ram',
        help='Amount of RAM needed to run the experiment' +
             ' (used to configure cloud instance), ex: 10G, 10GB',
        default=None)

    parser.add_argument(
        '--hdd',
        help='Amount of hard drive space needed to run the experiment' +
             ' (used to configure cloud instance), ex: 10G, 10GB',
        default=None)

    parser.add_argument(
        '--queue', '-q',
        help='Name of the remote execution queue',
        default=None)

    parser.add_argument(
        '--cloud',
        help='Cloud execution mode. Could be gcloud, gcspot, ec2 or ec2spot',
        default=None)

    parser.add_argument(
        '--bid',
        help='Spot instance price bid, specified in USD or in percentage ' +
             'of on-demand instance price. Default is %(default)s',
        default='100%')

    parser.add_argument(
        '--capture-once', '-co',
        help='Name of the immutable artifact to be captured. ' +
        'It will be captured once before the experiment is run',
        default=[], action='append')

    parser.add_argument(
        '--capture', '-c',
        help='Name of the mutable artifact to be captured continuously',
        default=[], action='append')

    parser.add_argument(
        '--reuse', '-r',
        help='Name of the artifact from another experiment to use',
        default=[], action='append')

    parser.add_argument(
        '--verbose', '-v',
        help='Verbosity level. Allowed values: ' +
             'debug, info, warn, error, crit ' +
             'or numerical value of logger levels.',
        default=None)

    parser.add_argument(
        '--metric', '-m',
        help='Metric to show in the summary of the experiment, ' +
             'and to base hyperparameter search on. ' +
             'Refers a scalar value in tensorboard log ' +
             'example: --metric=val_loss[:final | :min | :max] to report ' +
             'validation loss in the end of the keras experiment ' +
             '(or smallest or largest throughout the experiment for :min ' +
             'and :max respectively)',
        default=None)

    parser.add_argument(
        '--hyperparam', '-hp',
        help='Try out multiple values of a certain parameter. ' +
             'For example, --hyperparam=learning_rate:0.01:0.1:l10 ' +
             'will instantiate 10 versions of the script, replace ' +
             'learning_rate with a one of the 10 values for learning ' +
             'rate that lies on a log grid from 0.01 to 0.1, create '
             'experiments and place them in the queue.',
             default=[], action='append')

    parser.add_argument(
        '--num-workers',
        help='Number of local or cloud workers to spin up',
        type=int,
        default=None)

    parser.add_argument(
        '--python-pkg',
        help='Python package not present in the current environment ' +
             'that is needed for experiment. Only compatible with ' +
             'remote and cloud workers for now',
        default=[], action='append')

    parser.add_argument(
        '--ssh-keypair',
        help='Name of the SSH keypair used to access the EC2 ' +
             'instances directly',
        default=None)

    parser.add_argument(
        '--optimizer', '-opt',
        help='Name of optimizer to use, by default is grid search. ' +
        'The name of the optimizer must either be in ' +
        'studio/optimizer_plugins ' +
        'directory or the path to the optimizer source file ' +
        'must be supplied. ',
        default='grid')

    parser.add_argument(
        '--cloud-timeout',
        help="Time (in seconds) that cloud workers wait for messages. " +
             "If negative, " +
             "wait for the first message in the queue indefinitely " +
             "and shut down " +
             "as soon as no new messages are available. " +
             "If zero, don't wait at all." +
             "Default value is %(default)d",
        type=int,
        default=300)

    parser.add_argument(
        '--user-startup-script',
        help='Path of script to run before running the remote worker',
        default=None)

    parser.add_argument(
        '--branch',
        help='Branch of studioml to use when running remote worker, useful ' +
             'for debugging pull requests. Default is current',
        default=None)

    # detect which argument is the script filename
    # and attribute all arguments past that index as related to the script
    py_suffix_args = [i for i, arg in enumerate(args) if arg.endswith('.py')]
    if len(py_suffix_args) < 1:
        print('At least one argument should be a python script ' +
              '(end with *.py)')
        parser.print_help()
        exit()

    script_index = py_suffix_args[0]
    runner_args = parser.parse_args(args[1:script_index])

    exec_filename, other_args = args[script_index], args[script_index + 1:]
    # TODO: Queue the job based on arguments and only then execute.

    config = model.get_config(runner_args.config)

    if runner_args.verbose:
        config['verbose'] = runner_args.verbose

    if runner_args.guest:
        config['database']['guest'] = True

    verbose = model.parse_verbosity(config['verbose'])
    logger.setLevel(verbose)

    if git_util.is_git() and not git_util.is_clean():
        logger.warn('Running from dirty git repo')
        if not runner_args.force_git:
            logger.error(
                'Specify --force-git to run experiment from dirty git repo')
            sys.exit(1)

    resources_needed = parse_hardware(runner_args, config['resources_needed'])
    logger.debug('resources requested: ')
    logger.debug(str(resources_needed))

    artifacts = {}
    artifacts.update(parse_artifacts(runner_args.capture, mutable=True))
    artifacts.update(parse_artifacts(runner_args.capture_once, mutable=False))
    with model.get_db_provider(config) as db:
        artifacts.update(parse_external_artifacts(runner_args.reuse, db))

    if runner_args.branch:
        config['cloud']['branch'] = runner_args.branch

    if runner_args.user_startup_script:
        config['cloud']['user_startup_script'] = \
            runner_args.user_startup_script

    if any(runner_args.hyperparam):
        if runner_args.optimizer is "grid":
            experiments = add_hyperparam_experiments(
                exec_filename,
                other_args,
                runner_args,
                artifacts,
                resources_needed,
                logger)

            queue_name = submit_experiments(
                experiments,
                config=config,
                logger=logger,
                cloud=runner_args.cloud)

            spin_up_workers(
                runner_args,
                config,
                resources_needed,
                queue_name=queue_name,
                verbose=verbose)
        else:
            opt_modulepath = os.path.join(
                os.path.dirname(os.path.abspath(__file__)),
                "optimizer_plugins",
                runner_args.optimizer + ".py")
            if not os.path.exists(opt_modulepath):
                opt_modulepath = os.path.abspath(
                    os.path.expanduser(runner_args.optimizer))
            logger.info('optimizer path: %s' % opt_modulepath)

            assert os.path.exists(opt_modulepath)
            sys.path.append(os.path.dirname(opt_modulepath))
            opt_module = importlib.import_module(
                os.path.basename(opt_modulepath.replace(".py", '')))

            h = HyperparameterParser(runner_args, logger)
            hyperparams = h.parse()
            optimizer = getattr(
                opt_module,
                "Optimizer")(
                hyperparams,
                config['optimizer'],
                logger)

            workers_started = False
            queue_name = None
            while not optimizer.stop():
                hyperparam_pop = optimizer.ask()
                hyperparam_tuples = h.convert_to_tuples(hyperparam_pop)

                experiments = add_hyperparam_experiments(
                    exec_filename,
                    other_args,
                    runner_args,
                    artifacts,
                    resources_needed,
                    logger,
                    optimizer=optimizer,
                    hyperparam_tuples=hyperparam_tuples)

                queue_name = submit_experiments(
                    experiments,
                    config=config,
                    logger=logger,
                    cloud=runner_args.cloud,
                    queue_name=queue_name)

                if not workers_started:
                    spin_up_workers(
                        runner_args,
                        config,
                        resources_needed,
                        queue_name=queue_name,
                        verbose=verbose)
                    workers_started = True

                fitnesses, behaviors = get_experiment_fitnesses(
                    experiments, optimizer, config, logger)

                # for i, hh in enumerate(hyperparam_pop):
                #     print fitnesses[i]
                #     for hhh in hh:
                #         print hhh
                try:
                    optimizer.tell(hyperparam_pop, fitnesses, behaviors)
                except BaseException:
                    optimizer.tell(hyperparam_pop, fitnesses)

                try:
                    optimizer.disp()
                except BaseException:
                    logger.warn('Optimizer has no disp() method')
    else:
        experiments = [model.create_experiment(
            filename=exec_filename,
            args=other_args,
            experiment_name=runner_args.experiment,
            project=runner_args.project,
            artifacts=artifacts,
            resources_needed=resources_needed,
            metric=runner_args.metric)]

        queue_name = submit_experiments(
            experiments,
            config=config,
            logger=logger,
            cloud=runner_args.cloud)

        spin_up_workers(
            runner_args,
            config,
            resources_needed,
            queue_name=queue_name,
            verbose=verbose)

    return