def main(): ### Set parameters ### parameters = { 'x' : ['float',[0,15]], 'y' : ['float',[0,15]] } nugget = 1.e-10 n_clusters = 1 cluster_evol ='constant' corr_kernel = 'squared_exponential' mapWithNoise= False model_noise = None sampling_model = 'GCP' n_candidates= 300 n_random_init= 15 n_iter = 100 nb_iter_final = 0 acquisition_function = 'UCB' search = SmartSearch(parameters, estimator=scoring_function, corr_kernel = corr_kernel, acquisition_function = acquisition_function, GCP_mapWithNoise=mapWithNoise, model_noise = model_noise, model = sampling_model, n_candidates=n_candidates, n_iter = n_iter, n_init = n_random_init, n_final_iter=nb_iter_final, n_clusters=n_clusters, cluster_evol = cluster_evol, verbose=2, detailed_res = 0) search._fit()
def main(): ### Set parameters ### parameters = {'x': ['float', [0, 15]], 'y': ['float', [0, 15]]} nugget = 1.e-10 n_clusters = 1 cluster_evol = 'constant' corr_kernel = 'squared_exponential' mapWithNoise = False model_noise = None sampling_model = 'GCP' n_candidates = 300 n_random_init = 15 n_iter = 100 nb_iter_final = 0 acquisition_function = 'UCB' search = SmartSearch(parameters, estimator=scoring_function, corr_kernel=corr_kernel, acquisition_function=acquisition_function, GCP_mapWithNoise=mapWithNoise, model_noise=model_noise, model=sampling_model, n_candidates=n_candidates, n_iter=n_iter, n_init=n_random_init, n_final_iter=nb_iter_final, n_clusters=n_clusters, cluster_evol=cluster_evol, verbose=2, detailed_res=0) search._fit()
def hpo_custom_function(): parameters = {'kernel': ['cat', ['rbf', 'poly']], 'd': ['int', [1, 3]], 'C': ['float', [1, 10]]} def scoring_function(x): return [0.5] search = SmartSearch(parameters, model='GP', estimator=scoring_function, n_iter=20) search._fit()
def hpo_sklearn_pipeline(): iris = load_digits() X, y = iris.data, iris.target clf = RandomForestClassifier(n_estimators=20) # specify parameters and distributions to sample from parameters = {"max_depth": ['int', [3, 3]], "max_features": ['int', [1, 11]], "min_samples_split": ['int', [1, 11]], "min_samples_leaf": ['int', [1, 11]], "bootstrap": ['cat', [True, False]], "criterion": ['cat', ["gini", "entropy"]]} search = SmartSearch(parameters, estimator=clf, X=X, y=y, n_iter=20) search._fit()
if __name__ == '__main__': n_test = int(sys.argv[1]) sub_size = int(sys.argv[2]) print 'Running', n_test, 'tests with a subsample size of', sub_size data, clean_reviews = get_data() def scoring(parameters): return scoring_function(parameters, data=clean_reviews, Y=data["sentiment"], sub_size_=sub_size) search = SmartSearch(estimator=scoring, parameters=parameters, model='rand', n_init=n_test, n_iter=n_test, n_final_iter=0, detailed_res=1) tested_params, outputs = search._fit() f = open(("scoring_function/" + str(sub_size) + "_output.csv"), 'w') for line in outputs: print >> f, line np.savetxt("scoring_function/" + str(sub_size) + "_params.csv", tested_params, delimiter=",")
def runExperiment(first_exp, n_exp, dir_, parameters, subsize, model = 'GCP', n_random_init = 10, n_total_iter = 30, n_candidates = 500, corr_kernel = 'squared_exponential', acquisition_function = 'UCB', n_clusters = 1, cluster_evol = 'constant', GCP_mapWithNoise = False, GCP_useAllNoisyY = False, model_noise = None): last_exp = first_exp + n_exp print 'Run experiment', model, first_exp, 'to', last_exp # Load data output = [] f =open(( dir_ + 'scoring_function/' + str(subsize) + '_output.csv'),'r') for l in f: l = l[1:-3] string_l = l.split(',') output.append( [ float(i) for i in string_l] ) f.close() print 'Loaded output file,', len(output),'rows' params = np.genfromtxt((dir_ + 'scoring_function/' + str(subsize) + '_params.csv'), delimiter=',') print 'Loaded parameters file, shape :', params.shape KNN = NearestNeighbors() KNN.fit(params) # KNN.kneighbors(p,1,return_distance=False)[0] keys = sorted(parameters.keys()) # function that retrieves a performance evaluation from the stored results def get_cv_res(p_dict): p = np.zeros(len(parameters)) # convert dict parameter into array of numbers for k in xrange(len(keys)): if(parameters[keys[k]][0] != 'cat'): p[k] = p_dict[keys[k]] else: p[k] = parameters[keys[k]][1].index(p_dict[keys[k]]) idx = KNN.kneighbors(p, 1, return_distance=False)[0] all_o = output[idx] r = 0 if(len(all_o) > 5): r = np.random.randint(len(all_o)/5) return all_o[(5*r):(5*r+5)] model_dir = model + '/' + str(subsize) ### Run experiment ### if not os.path.exists(dir_ + 'exp_results'): os.mkdir(dir_ + 'exp_results') if not os.path.exists(dir_ + 'exp_results/' + model): os.mkdir(dir_ + 'exp_results/' + model) if not os.path.exists(dir_ + 'exp_results/' + model_dir): os.mkdir(dir_ + 'exp_results/' + model_dir) for n_exp in range(first_exp,last_exp): print ' **** Run exp',n_exp,' ****' ### set directory if not os.path.exists(dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp)): os.mkdir(dir_ + 'exp_results/'+ model_dir + '/exp' + str(n_exp)) else: print('Warning : directory already exists') search = SmartSearch(parameters, estimator = get_cv_res, corr_kernel = corr_kernel , GCP_mapWithNoise=GCP_mapWithNoise, GCP_useAllNoisyY=GCP_useAllNoisyY, model_noise = model_noise, model = model, n_candidates = n_candidates, n_iter = n_total_iter, n_init = n_random_init, n_clusters = n_clusters, cluster_evol = cluster_evol, verbose = 2, acquisition_function = acquisition_function, detailed_res = 2) all_parameters, all_search_path, all_raw_outputs, all_mean_outputs = search._fit() ## save experiment's data f =open((dir_ + 'exp_results/'+ model_dir + '/exp'+str(n_exp)+'/output.csv'),'w') for line in all_raw_outputs: print>>f,line f.close() np.savetxt((dir_ + 'exp_results/'+ model_dir + '/exp' + str(n_exp) +'/param.csv'), all_parameters, delimiter=',') np.savetxt((dir_ + 'exp_results/'+ model_dir + '/exp' + str(n_exp) +'/param_path.csv'), all_search_path, delimiter=',') print ' **** End experiment', n_exp, ' ****\n'
return cv_results if __name__ == "__main__": n_test = int(sys.argv[1]) sub_size = int(sys.argv[2]) print "Running", n_test, "tests with a subsample size of", sub_size data, clean_reviews = get_data() def scoring(parameters): return scoring_function(parameters, data=clean_reviews, Y=data["sentiment"], sub_size_=sub_size) search = SmartSearch( estimator=scoring, parameters=parameters, model="rand", n_init=n_test, n_iter=n_test, n_final_iter=0, detailed_res=1, ) tested_params, outputs = search._fit() f = open(("scoring_function/" + str(sub_size) + "_output.csv"), "w") for line in outputs: print >> f, line np.savetxt("scoring_function/" + str(sub_size) + "_params.csv", tested_params, delimiter=",")
def runExperiment(first_exp, n_exp, dir_, parameters, subsize, model='GCP', n_random_init=10, n_total_iter=30, n_candidates=500, corr_kernel='squared_exponential', acquisition_function='UCB', n_clusters=1, cluster_evol='constant', GCP_mapWithNoise=False, GCP_useAllNoisyY=False, model_noise=None): last_exp = first_exp + n_exp print 'Run experiment', model, first_exp, 'to', last_exp # Load data output = [] f = open((dir_ + 'scoring_function/' + str(subsize) + '_output.csv'), 'r') for l in f: l = l[1:-3] string_l = l.split(',') output.append([float(i) for i in string_l]) f.close() print 'Loaded output file,', len(output), 'rows' params = np.genfromtxt( (dir_ + 'scoring_function/' + str(subsize) + '_params.csv'), delimiter=',') print 'Loaded parameters file, shape :', params.shape KNN = NearestNeighbors() KNN.fit(params) # KNN.kneighbors(p,1,return_distance=False)[0] keys = sorted(parameters.keys()) # function that retrieves a performance evaluation from the stored results def get_cv_res(p_dict): p = np.zeros(len(parameters)) # convert dict parameter into array of numbers for k in xrange(len(keys)): if (parameters[keys[k]][0] != 'cat'): p[k] = p_dict[keys[k]] else: p[k] = parameters[keys[k]][1].index(p_dict[keys[k]]) idx = KNN.kneighbors(p, 1, return_distance=False)[0] all_o = output[idx] r = 0 if (len(all_o) > 5): r = np.random.randint(len(all_o) / 5) return all_o[(5 * r):(5 * r + 5)] model_dir = model + '/' + str(subsize) ### Run experiment ### if not os.path.exists(dir_ + 'exp_results'): os.mkdir(dir_ + 'exp_results') if not os.path.exists(dir_ + 'exp_results/' + model): os.mkdir(dir_ + 'exp_results/' + model) if not os.path.exists(dir_ + 'exp_results/' + model_dir): os.mkdir(dir_ + 'exp_results/' + model_dir) for n_exp in range(first_exp, last_exp): print ' **** Run exp', n_exp, ' ****' ### set directory if not os.path.exists(dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp)): os.mkdir(dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp)) else: print('Warning : directory already exists') search = SmartSearch(parameters, estimator=get_cv_res, corr_kernel=corr_kernel, GCP_mapWithNoise=GCP_mapWithNoise, GCP_useAllNoisyY=GCP_useAllNoisyY, model_noise=model_noise, model=model, n_candidates=n_candidates, n_iter=n_total_iter, n_init=n_random_init, n_clusters=n_clusters, cluster_evol=cluster_evol, verbose=2, acquisition_function=acquisition_function, detailed_res=2) all_parameters, all_search_path, all_raw_outputs, all_mean_outputs = search._fit( ) ## save experiment's data f = open((dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp) + '/output.csv'), 'w') for line in all_raw_outputs: print >> f, line f.close() np.savetxt((dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp) + '/param.csv'), all_parameters, delimiter=',') np.savetxt((dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp) + '/param_path.csv'), all_search_path, delimiter=',') print ' **** End experiment', n_exp, ' ****\n'