Example #1
0
def main():
	### Set parameters ###
	parameters = { 'x' : ['float',[0,15]],
				   'y' : ['float',[0,15]] }
	nugget = 1.e-10
	n_clusters = 1
	cluster_evol ='constant'
	corr_kernel = 'squared_exponential'
	mapWithNoise= False
	model_noise = None
	sampling_model = 'GCP'
	n_candidates= 300
	n_random_init= 15
	n_iter = 100
	nb_iter_final = 0
	acquisition_function = 'UCB'

	search = SmartSearch(parameters,
				estimator=scoring_function,
				corr_kernel = corr_kernel,
				acquisition_function = acquisition_function,
				GCP_mapWithNoise=mapWithNoise,
				model_noise = model_noise,
				model = sampling_model, 
				n_candidates=n_candidates,
				n_iter = n_iter,
				n_init = n_random_init,
				n_final_iter=nb_iter_final,
				n_clusters=n_clusters, 
				cluster_evol = cluster_evol,
				verbose=2,
				detailed_res = 0)

	search._fit()
Example #2
0
def main():
    ### Set parameters ###
    parameters = {'x': ['float', [0, 15]], 'y': ['float', [0, 15]]}
    nugget = 1.e-10
    n_clusters = 1
    cluster_evol = 'constant'
    corr_kernel = 'squared_exponential'
    mapWithNoise = False
    model_noise = None
    sampling_model = 'GCP'
    n_candidates = 300
    n_random_init = 15
    n_iter = 100
    nb_iter_final = 0
    acquisition_function = 'UCB'

    search = SmartSearch(parameters,
                         estimator=scoring_function,
                         corr_kernel=corr_kernel,
                         acquisition_function=acquisition_function,
                         GCP_mapWithNoise=mapWithNoise,
                         model_noise=model_noise,
                         model=sampling_model,
                         n_candidates=n_candidates,
                         n_iter=n_iter,
                         n_init=n_random_init,
                         n_final_iter=nb_iter_final,
                         n_clusters=n_clusters,
                         cluster_evol=cluster_evol,
                         verbose=2,
                         detailed_res=0)

    search._fit()
Example #3
0
def hpo_custom_function():
    parameters = {'kernel': ['cat', ['rbf', 'poly']],
                  'd': ['int', [1, 3]],
                  'C': ['float', [1, 10]]}

    def scoring_function(x):
        return [0.5]

    search = SmartSearch(parameters, model='GP', estimator=scoring_function, n_iter=20)
    search._fit()
Example #4
0
def hpo_sklearn_pipeline():
    iris = load_digits()
    X, y = iris.data, iris.target
    clf = RandomForestClassifier(n_estimators=20)

    # specify parameters and distributions to sample from
    parameters = {"max_depth": ['int', [3, 3]],
                  "max_features": ['int', [1, 11]],
                  "min_samples_split": ['int', [1, 11]],
                  "min_samples_leaf": ['int', [1, 11]],
                  "bootstrap": ['cat', [True, False]],
                  "criterion": ['cat', ["gini", "entropy"]]}

    search = SmartSearch(parameters, estimator=clf, X=X, y=y, n_iter=20)
    search._fit()
Example #5
0
if __name__ == '__main__':
    n_test = int(sys.argv[1])
    sub_size = int(sys.argv[2])

    print 'Running', n_test, 'tests with a subsample size of', sub_size
    data, clean_reviews = get_data()

    def scoring(parameters):
        return scoring_function(parameters,
                                data=clean_reviews,
                                Y=data["sentiment"],
                                sub_size_=sub_size)

    search = SmartSearch(estimator=scoring,
                         parameters=parameters,
                         model='rand',
                         n_init=n_test,
                         n_iter=n_test,
                         n_final_iter=0,
                         detailed_res=1)

    tested_params, outputs = search._fit()

    f = open(("scoring_function/" + str(sub_size) + "_output.csv"), 'w')
    for line in outputs:
        print >> f, line

    np.savetxt("scoring_function/" + str(sub_size) + "_params.csv",
               tested_params,
               delimiter=",")
def runExperiment(first_exp,
                  n_exp,
                  dir_,
                  parameters,
                  subsize,
                  model = 'GCP',
                  n_random_init = 10,
                  n_total_iter = 30,
                  n_candidates = 500,
                  corr_kernel = 'squared_exponential',
                  acquisition_function = 'UCB',
                  n_clusters = 1,
                  cluster_evol = 'constant',
                  GCP_mapWithNoise = False,
                  GCP_useAllNoisyY = False,
                  model_noise = None):
  
  last_exp = first_exp + n_exp
  print 'Run experiment', model, first_exp, 'to', last_exp

  # Load data
  output = []
  f =open(( dir_ + 'scoring_function/' + str(subsize) + '_output.csv'),'r')
  for l in f:
      l = l[1:-3]
      string_l = l.split(',')
      output.append( [ float(i) for i in string_l] )
  f.close()
  print 'Loaded output file,', len(output),'rows'

  params = np.genfromtxt((dir_ + 'scoring_function/' + str(subsize) + '_params.csv'), 
                         delimiter=',')
  print 'Loaded parameters file, shape :', params.shape

  KNN = NearestNeighbors()
  KNN.fit(params)
  # KNN.kneighbors(p,1,return_distance=False)[0]

  keys = sorted(parameters.keys())
  # function that retrieves a performance evaluation from the stored results
  def get_cv_res(p_dict):
      p = np.zeros(len(parameters))
      # convert dict parameter into array of numbers
      for k in xrange(len(keys)):
        if(parameters[keys[k]][0] != 'cat'):
          p[k] = p_dict[keys[k]]
        else:
          p[k] = parameters[keys[k]][1].index(p_dict[keys[k]])

      idx = KNN.kneighbors(p, 1, return_distance=False)[0]
      all_o = output[idx]
      r = 0
      if(len(all_o) > 5):
        r = np.random.randint(len(all_o)/5)
      return all_o[(5*r):(5*r+5)]


  model_dir = model + '/' + str(subsize)
  ###  Run experiment  ### 
  if not os.path.exists(dir_ + 'exp_results'):
    os.mkdir(dir_ + 'exp_results')
  if not os.path.exists(dir_ + 'exp_results/' + model):
    os.mkdir(dir_ + 'exp_results/' + model)
  if not os.path.exists(dir_ + 'exp_results/' + model_dir):
    os.mkdir(dir_ + 'exp_results/' + model_dir)

  for n_exp in range(first_exp,last_exp):
      print ' ****   Run exp',n_exp,'  ****'
      ### set directory
      if not os.path.exists(dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp)):
          os.mkdir(dir_ + 'exp_results/'+ model_dir + '/exp' + str(n_exp))
      else:
          print('Warning : directory already exists')

      search = SmartSearch(parameters,
                        estimator = get_cv_res,
                        corr_kernel = corr_kernel ,
                        GCP_mapWithNoise=GCP_mapWithNoise,
                        GCP_useAllNoisyY=GCP_useAllNoisyY,
                        model_noise = model_noise,
                        model = model, 
                        n_candidates = n_candidates,
                        n_iter = n_total_iter,
                        n_init = n_random_init, 
                        n_clusters = n_clusters,
                        cluster_evol = cluster_evol,
                        verbose = 2,
                        acquisition_function = acquisition_function,
                        detailed_res = 2)

      all_parameters, all_search_path, all_raw_outputs, all_mean_outputs = search._fit()

      ## save experiment's data
      f =open((dir_ + 'exp_results/'+ model_dir + '/exp'+str(n_exp)+'/output.csv'),'w')
      for line in all_raw_outputs:
          print>>f,line
      f.close()
      np.savetxt((dir_ + 'exp_results/'+ model_dir + '/exp' + str(n_exp) +'/param.csv'),
                 all_parameters,
                 delimiter=',')
      np.savetxt((dir_ + 'exp_results/'+ model_dir + '/exp' + str(n_exp) +'/param_path.csv'),
                 all_search_path,
                 delimiter=',')

      print ' ****   End experiment', n_exp, '  ****\n'
    return cv_results


if __name__ == "__main__":
    n_test = int(sys.argv[1])
    sub_size = int(sys.argv[2])

    print "Running", n_test, "tests with a subsample size of", sub_size
    data, clean_reviews = get_data()

    def scoring(parameters):
        return scoring_function(parameters, data=clean_reviews, Y=data["sentiment"], sub_size_=sub_size)

    search = SmartSearch(
        estimator=scoring,
        parameters=parameters,
        model="rand",
        n_init=n_test,
        n_iter=n_test,
        n_final_iter=0,
        detailed_res=1,
    )

    tested_params, outputs = search._fit()

    f = open(("scoring_function/" + str(sub_size) + "_output.csv"), "w")
    for line in outputs:
        print >> f, line

    np.savetxt("scoring_function/" + str(sub_size) + "_params.csv", tested_params, delimiter=",")
Example #8
0
def runExperiment(first_exp,
                  n_exp,
                  dir_,
                  parameters,
                  subsize,
                  model='GCP',
                  n_random_init=10,
                  n_total_iter=30,
                  n_candidates=500,
                  corr_kernel='squared_exponential',
                  acquisition_function='UCB',
                  n_clusters=1,
                  cluster_evol='constant',
                  GCP_mapWithNoise=False,
                  GCP_useAllNoisyY=False,
                  model_noise=None):

    last_exp = first_exp + n_exp
    print 'Run experiment', model, first_exp, 'to', last_exp

    # Load data
    output = []
    f = open((dir_ + 'scoring_function/' + str(subsize) + '_output.csv'), 'r')
    for l in f:
        l = l[1:-3]
        string_l = l.split(',')
        output.append([float(i) for i in string_l])
    f.close()
    print 'Loaded output file,', len(output), 'rows'

    params = np.genfromtxt(
        (dir_ + 'scoring_function/' + str(subsize) + '_params.csv'),
        delimiter=',')
    print 'Loaded parameters file, shape :', params.shape

    KNN = NearestNeighbors()
    KNN.fit(params)
    # KNN.kneighbors(p,1,return_distance=False)[0]

    keys = sorted(parameters.keys())

    # function that retrieves a performance evaluation from the stored results
    def get_cv_res(p_dict):
        p = np.zeros(len(parameters))
        # convert dict parameter into array of numbers
        for k in xrange(len(keys)):
            if (parameters[keys[k]][0] != 'cat'):
                p[k] = p_dict[keys[k]]
            else:
                p[k] = parameters[keys[k]][1].index(p_dict[keys[k]])

        idx = KNN.kneighbors(p, 1, return_distance=False)[0]
        all_o = output[idx]
        r = 0
        if (len(all_o) > 5):
            r = np.random.randint(len(all_o) / 5)
        return all_o[(5 * r):(5 * r + 5)]

    model_dir = model + '/' + str(subsize)
    ###  Run experiment  ###
    if not os.path.exists(dir_ + 'exp_results'):
        os.mkdir(dir_ + 'exp_results')
    if not os.path.exists(dir_ + 'exp_results/' + model):
        os.mkdir(dir_ + 'exp_results/' + model)
    if not os.path.exists(dir_ + 'exp_results/' + model_dir):
        os.mkdir(dir_ + 'exp_results/' + model_dir)

    for n_exp in range(first_exp, last_exp):
        print ' ****   Run exp', n_exp, '  ****'
        ### set directory
        if not os.path.exists(dir_ + 'exp_results/' + model_dir + '/exp' +
                              str(n_exp)):
            os.mkdir(dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp))
        else:
            print('Warning : directory already exists')

        search = SmartSearch(parameters,
                             estimator=get_cv_res,
                             corr_kernel=corr_kernel,
                             GCP_mapWithNoise=GCP_mapWithNoise,
                             GCP_useAllNoisyY=GCP_useAllNoisyY,
                             model_noise=model_noise,
                             model=model,
                             n_candidates=n_candidates,
                             n_iter=n_total_iter,
                             n_init=n_random_init,
                             n_clusters=n_clusters,
                             cluster_evol=cluster_evol,
                             verbose=2,
                             acquisition_function=acquisition_function,
                             detailed_res=2)

        all_parameters, all_search_path, all_raw_outputs, all_mean_outputs = search._fit(
        )

        ## save experiment's data
        f = open((dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp) +
                  '/output.csv'), 'w')
        for line in all_raw_outputs:
            print >> f, line
        f.close()
        np.savetxt((dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp) +
                    '/param.csv'),
                   all_parameters,
                   delimiter=',')
        np.savetxt((dir_ + 'exp_results/' + model_dir + '/exp' + str(n_exp) +
                    '/param_path.csv'),
                   all_search_path,
                   delimiter=',')

        print ' ****   End experiment', n_exp, '  ****\n'