def _evolutionary_launch(spark, map_fun, args_dict, name="no-name"): """ Run the wrapper function with each hyperparameter combination as specified by the dictionary Args: :spark_session: SparkSession object :map_fun: The TensorFlow function to run :args_dict: (optional) A dictionary containing hyperparameter values to insert as arguments for each TensorFlow job """ global run_id sc = spark.sparkContext app_id = str(sc.applicationId) arg_lists = list(args_dict.values()) num_executions = len(arg_lists[0]) #Each TF task should be run on 1 executor nodeRDD = sc.parallelize(range(num_executions), num_executions) #Force execution on executor, since GPU is located on executor global generation_id global run_id #Make SparkUI intuitive by grouping jobs sc.setJobGroup(os.environ['ML_ID'], "{} | Differential Evolution, Generation: {}".format(name, generation_id)) nodeRDD.foreachPartition(_prepare_func(app_id, generation_id, map_fun, args_dict, run_id, opt_key)) generation_id += 1 return experiment_utils._get_experiments_dir() + '/' + str(app_id) + "_" + str(run_id)
def _get_return_file(param_string, app_id, generation_id, run_id): """ Args: :param_string: :app_id: :generation_id: :run_id: Returns: """ handle = hdfs.get() for i in range(generation_id): possible_result_path = experiment_utils._get_experiments_dir() + '/' + app_id + '_' \ + str(run_id) + '/generation.' + str(i) + '/' + param_string + '/.outputs.json' if handle.exists(possible_result_path): return_file_contents = hdfs.load(possible_result_path) return return_file_contents return None
def _run(train_fn, search_dict, direction=Direction.MAX, generations=4, population=6, mutation=0.5, crossover=0.7, cleanup_generations=False, local_logdir=False, name="no-name", optimization_key=None): """ Args: :train_fn: :search_dict: :direction: :generations: :population: :mutation: :crossover: :cleanup_generations: :local_logdir: :name: :optimization_key: Returns: """ global run_id global local_logdir_bool local_logdir_bool = local_logdir global spark spark = util._find_spark() global objective_function objective_function = train_fn global cleanup cleanup = cleanup_generations global opt_key opt_key = optimization_key argcount = six.get_function_code(train_fn).co_argcount arg_names = six.get_function_code(train_fn).co_varnames ordered_arr = [] app_id = spark.sparkContext.applicationId arg_lists = list(search_dict.values()) for i in range(len(arg_lists)): if len(arg_lists[i]) != 2: raise ValueError( 'Boundary list must contain exactly two elements, [lower_bound, upper_bound] for float/int ' 'or [category1, category2] in the case of strings') assert population > 3, 'population should be greater than 3' assert generations > 1, 'generations should be greater than 1' argIndex = 0 while argcount != 0: ordered_arr.append( (arg_names[argIndex], search_dict[arg_names[argIndex]])) argcount = argcount - 1 argIndex = argIndex + 1 ordered_dict = OrderedDict(ordered_arr) bounds_list = [] types_list = [] for entry in ordered_dict: bounds_list.append((ordered_dict[entry][0], ordered_dict[entry][1])) if isinstance(ordered_dict[entry][0], int): types_list.append('int') elif isinstance(ordered_dict[entry][0], float): types_list.append('float') else: types_list.append('cat') global diff_evo diff_evo = DifferentialEvolution(_execute_all, bounds_list, types_list, ordered_dict, direction=direction, generations=generations, population=population, crossover=crossover, mutation=mutation, name=name) root_dir = experiment_utils._get_experiments_dir() + "/" + str( app_id) + "_" + str(run_id) best_param, best_metric = diff_evo._solve(root_dir) param_string = '' for hp in best_param: param_string = param_string + hp + '&' param_string = param_string[:-1] best_exp_logdir, return_dict = _get_best(str(root_dir), direction) print('Finished Experiment \n') return best_exp_logdir, experiment_utils._get_params_dict( best_exp_logdir), best_metric, return_dict