Esempio n. 1
0
def hash_parameters(parameters):
    # we can specify either None, directly parameters, or a Path
    if not parameters:
        params = {}
    elif isinstance(parameters, dict):
        params = parameters
    else:
        with parameters.open('r') as f:
            if parameters.suffix == '.yaml':
                params = yaml.load(f, Loader=yaml.SafeLoader)
            elif parameters.suffix == '.cde':
                from cde import Config
                params = Config.loads(f.read()).asdict()
            else:
                params = json.load(f)
    return make_hash(params)
Esempio n. 2
0
def load_tuning_search(tuning_search, tuning_search_file):
  if tuning_search and tuning_search_file:
    click.secho('Error: specify only one of --tuning-search or --tuning-search-file', fg='red', err=True)
    exit(1)
  if tuning_search_file:
    if not tuning_search_file.exists():
      click.secho('Error: could not find the file specified by --tuning-search-file', fg='red', err=True)
      exit(1)
    with tuning_search_file.open('r') as f:
      tuning_search = f.read()
    if tuning_search_file.suffix == '.yaml':
      tuning_search_dict = yaml.load(tuning_search, Loader=yaml.SafeLoader)
      filetype = 'yaml'
    elif tuning_search_file.suffix == '.cde':
      from cde import Config
      tuning_search_dict = Config.loads(f.read()).asdict()
      filetype = 'cde'
    else:
      tuning_search_dict = json.loads(tuning_search)
      filetype = 'json'
  else:
    tuning_search_dict = json.loads(tuning_search) if tuning_search else None
    filetype = 'json' # we default to json
  return tuning_search_dict, filetype
Esempio n. 3
0
def iter_parameters(tuning_search=None,
                    filetype='json',
                    extra_parameters=None):
    extra_params = extra_parameters if extra_parameters else {}
    if not tuning_search:
        tuning_search = {
            'parameter_search': {},
            'search_type': 'grid',
        }

    if isinstance(tuning_search['parameter_search'], list):
        for param_search in tuning_search['parameter_search']:
            yield from iter_parameters(tuning_search={
                **tuning_search, 'parameter_search':
                param_search
            },
                                       filetype=filetype,
                                       extra_parameters=extra_parameters)
        return

    parameter_search = tuning_search['parameter_search']

    ## Support for functions/ranges was removed - no one ever used them.
    # for parameter, values in parameter_search.items():
    #   if isinstance(values, dict):
    #     if not 'function' in values or not 'arguments' in values:
    #       raise ValueError
    #     if values['function'] == 'range':
    #       args = values['arguments']
    #       if 'start' not in args: args['start']=0
    #       if 'stop' not in args: args['stop']=0
    #       if 'step' not in args: args['step']=1
    #       tuning_search[parameter] = list(range(args['start'], args['stop'], args['step']))

    n_iter = tuning_search.get('search_options', {}).get('n_iter')
    if not parameter_search:
        params_iterator = [{}]
    elif tuning_search['search_type'] == 'grid':
        # http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterSampler.html#sklearn.model_selection.ParameterSampler
        from sklearn.model_selection import ParameterGrid
        # from search import ParameterGrid
        params_iterator = ParameterGrid(parameter_search)
    elif tuning_search['search_type'] == 'sampler':
        from sklearn.model_selection import ParameterSampler
        # from search import ParameterSampler
        params_iterator = ParameterSampler(parameter_search, n_iter=n_iter)
    else:
        raise ValueError

    for counter, params_ in enumerate(params_iterator):
        if n_iter and counter >= n_iter and n_iter > 0:
            click.secho(
                f"Stopping tuning combination after {n_iter} iterations",
                fg='yellow',
                err=True)
            return
        # the search overrides the extra parameters specified earlier
        params = {**extra_params, **params_}
        # we sort to avoid ordering issues; we want a unique hash per tuning configuration
        params_s = json.dumps(params, sort_keys=True)
        params_hash = make_hash(params)

        working_directory = Path('.')  # can we do something smarter?
        params_file = working_directory / 'configurations' / 'tuning' / make_pretty_tuning_filename(
            params_s, filetype)

        if params:
            params_file.parent.mkdir(parents=True, exist_ok=True)
            with params_file.open('w') as f:
                if filetype == 'json':
                    f.write(params_s)
                elif filetype == 'yaml':
                    yaml.dump(params, f)
                elif filetype == 'cde':
                    from cde import Config
                    config = Config()
                    config.load_fromdict(config_dict)
                    yaml.dump(params, f)
        yield params_file, params_hash, params
Esempio n. 4
0
def cli(ctx, platform, configuration, label, tuning, tuning_filepath, dryrun, share, database, input_type, offline):
  """Entrypoint to running your algo, launching batchs..."""
  # We want all paths to be relative to top-most qatools.yaml
  # it should be located at the root of the git repository
  if config_has_error:
    click.secho(f'Aborting: please first fix the configuration errrors in qatools.yaml', fg='red', err=True, bold=True)
    exit(1)

  # Click passes `ctx.obj` to downstream commands, we can use it as a scratchpad
  # http://click.pocoo.org/6/complex/
  ctx.obj = {}

  will_show_help = '-h' in sys.argv or '--help' in sys.argv
  get_command = 'get' in sys.argv
  if root_qatools != Path().resolve() and not will_show_help and not get_command:
    ctx.obj['previous_cwd'] = os.getcwd()
    click.echo(click.style("Working	directory changed to: ", fg='cyan') + click.style(str(root_qatools), fg='cyan', bold=True), err=True)
    os.chdir(root_qatools)

  # We want open permissions on outputs and artifacts
  # it makes collaboration among mutliple users / automated tools so much easier...
  os.umask(0)

  ctx.obj['project'] = config['project']['name']
  ctx.obj['HOST'] = os.environ.get('HOST', os.environ.get('HOSTNAME'))
  ctx.obj['user'] = user
  ctx.obj['dryrun'] = dryrun
  ctx.obj['share'] = share
  ctx.obj['offline'] = offline

  ctx.obj['commit_ci_dir'] = commit_ci_dir
  # Note: to support multiple databases per project,
  # either use / as database, or somehow we need to hash the db in the output path. 
  ctx.obj['raw_batch_label'] = label
  ctx.obj['batch_label'] = label if not share else f"@{user}| {label}"
  ctx.obj['platform'] = platform

  ctx.obj['input_type'] = input_type
  ctx.obj['inputs_settings'] = get_settings(input_type, config)
  ctx.obj['database'] = database if database else get_default_database(ctx.obj['inputs_settings'])
  ctx.obj['configuration'] = configuration if configuration else get_default_configuration(ctx.obj['inputs_settings'])
  ctx.obj['configurations'] = deserialize_config(ctx.obj['configuration'])
  ctx.obj['extra_parameters'] = {}
  if tuning:
    ctx.obj['extra_parameters'] = json.loads(tuning)
  elif tuning_filepath:
    ctx.obj['tuning_filepath'] = tuning_filepath
    with tuning_filepath.open('r') as f:
      if tuning_filepath.suffix == '.yaml':
        ctx.obj['extra_parameters'] = yaml.load(f, Loader=yaml.SafeLoader)
      elif tuning_filepath.suffix == '.cde':
        from cde import Config
        ctx.obj['extra_parameters'] = Config.loads(f.read()).asdict()
      else:
        ctx.obj['extra_parameters'] = json.load(f)
  # batch runs will override this since batches may have different configurations
  ctx.obj['prefix_output_dir'] = make_prefix_outputs_path(commit_ci_dir, ctx.obj['batch_label'], platform, ctx.obj['configuration'], ctx.obj['extra_parameters'] if tuning else tuning_filepath, share)

  # For convenience, we allow users to change environment variables using {ENV: {VAR: value}}
  # in configurations or tuning parameters
  environment_variables = {}
  for c in ctx.obj['configurations']:
    if not isinstance(c, dict): continue
    if 'ENV' in c: environment_variables.update(c['ENV'])
  if 'ENV' in ctx.obj['extra_parameters']:
    environment_variables.update(ctx.obj['extra_parameters']['ENV'])
  os.environ.update(environment_variables)

  # we manage stripping ansi color codes ourselfs since we redirect std streams
  # to both the original stream and a log file
  ctx.color = True
  # colors in log files colors will be interpreted in the UIs
  ctx.obj['color'] = is_ci or share