コード例 #1
0
def _evolutionary_launch(spark, map_fun, args_dict, name="no-name"):
    """ Run the wrapper function with each hyperparameter combination as specified by the dictionary

    Args:
        :spark_session: SparkSession object
        :map_fun: The TensorFlow function to run
        :args_dict: (optional) A dictionary containing hyperparameter values to insert as arguments for each TensorFlow job
    """

    global run_id

    sc = spark.sparkContext
    app_id = str(sc.applicationId)


    arg_lists = list(args_dict.values())
    num_executions = len(arg_lists[0])

    #Each TF task should be run on 1 executor
    nodeRDD = sc.parallelize(range(num_executions), num_executions)

    #Force execution on executor, since GPU is located on executor
    global generation_id
    global run_id

    #Make SparkUI intuitive by grouping jobs
    sc.setJobGroup(os.environ['ML_ID'], "{} | Differential Evolution, Generation: {}".format(name, generation_id))
    nodeRDD.foreachPartition(_prepare_func(app_id, generation_id, map_fun, args_dict, run_id, opt_key))

    generation_id += 1

    return experiment_utils._get_experiments_dir() + '/' + str(app_id) + "_" + str(run_id)
コード例 #2
0
def _get_return_file(param_string, app_id, generation_id, run_id):
    """

    Args:
        :param_string:
        :app_id:
        :generation_id:
        :run_id:

    Returns:

    """
    handle = hdfs.get()
    for i in range(generation_id):
        possible_result_path = experiment_utils._get_experiments_dir() + '/' + app_id + '_' \
                               + str(run_id) + '/generation.' + str(i) + '/' + param_string + '/.outputs.json'
        if handle.exists(possible_result_path):
            return_file_contents = hdfs.load(possible_result_path)
            return return_file_contents

    return None
コード例 #3
0
def _run(train_fn,
         search_dict,
         direction=Direction.MAX,
         generations=4,
         population=6,
         mutation=0.5,
         crossover=0.7,
         cleanup_generations=False,
         local_logdir=False,
         name="no-name",
         optimization_key=None):
    """

    Args:
        :train_fn:
        :search_dict:
        :direction:
        :generations:
        :population:
        :mutation:
        :crossover:
        :cleanup_generations:
        :local_logdir:
        :name:
        :optimization_key:

    Returns:

    """

    global run_id
    global local_logdir_bool
    local_logdir_bool = local_logdir

    global spark
    spark = util._find_spark()

    global objective_function
    objective_function = train_fn

    global cleanup
    cleanup = cleanup_generations

    global opt_key
    opt_key = optimization_key

    argcount = six.get_function_code(train_fn).co_argcount
    arg_names = six.get_function_code(train_fn).co_varnames

    ordered_arr = []

    app_id = spark.sparkContext.applicationId

    arg_lists = list(search_dict.values())
    for i in range(len(arg_lists)):
        if len(arg_lists[i]) != 2:
            raise ValueError(
                'Boundary list must contain exactly two elements, [lower_bound, upper_bound] for float/int '
                'or [category1, category2] in the case of strings')

    assert population > 3, 'population should be greater than 3'
    assert generations > 1, 'generations should be greater than 1'

    argIndex = 0
    while argcount != 0:
        ordered_arr.append(
            (arg_names[argIndex], search_dict[arg_names[argIndex]]))
        argcount = argcount - 1
        argIndex = argIndex + 1

    ordered_dict = OrderedDict(ordered_arr)

    bounds_list = []
    types_list = []

    for entry in ordered_dict:
        bounds_list.append((ordered_dict[entry][0], ordered_dict[entry][1]))

        if isinstance(ordered_dict[entry][0], int):
            types_list.append('int')
        elif isinstance(ordered_dict[entry][0], float):
            types_list.append('float')
        else:
            types_list.append('cat')

    global diff_evo
    diff_evo = DifferentialEvolution(_execute_all,
                                     bounds_list,
                                     types_list,
                                     ordered_dict,
                                     direction=direction,
                                     generations=generations,
                                     population=population,
                                     crossover=crossover,
                                     mutation=mutation,
                                     name=name)

    root_dir = experiment_utils._get_experiments_dir() + "/" + str(
        app_id) + "_" + str(run_id)

    best_param, best_metric = diff_evo._solve(root_dir)

    param_string = ''
    for hp in best_param:
        param_string = param_string + hp + '&'
    param_string = param_string[:-1]

    best_exp_logdir, return_dict = _get_best(str(root_dir), direction)

    print('Finished Experiment \n')

    return best_exp_logdir, experiment_utils._get_params_dict(
        best_exp_logdir), best_metric, return_dict