예제 #1
0
def main(argv=None):

    #using the chocolate to tune
    space = create_space()
    conn = choco.SQLiteConnection(url="sqlite:///db2.db")
    cv = choco.Repeat(repetitions=3, reduce=np.mean, rep_col="_repetition_id")
    sampler = choco.Bayes(conn, space, crossvalidation=cv)
    #train(x_train, y_train, vocab_processor, x_dev, y_dev)
    token, params = sampler.next()
    print(type(token))
    print(token)
    x_train, y_train, x_dev, y_dev = preprocess(
        params["eps"], params["dev_sample_percentage"])
    loss = train(x_train, y_train, x_dev, y_dev, params)
    print(loss)
    sampler.update(token, loss)
    results = conn.results_as_dataframe()
    print(results)
    results = pd.melt(results,
                      id_vars=["_loss"],
                      value_name='value',
                      var_name="variable")
    sns.lmplot(x="value",
               y="_loss",
               data=results,
               col="variable",
               col_wrap=3,
               sharex=False)
    plt.show()
예제 #2
0
def example_run_bayesian():
    # initialisation
    mean = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    variances = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5]
    x1, y1, x2, y2 = 60, 342, 726, 725
    dimension = len(mean)
    number = 20
    space = {}
    for x in range(dimension):
        space["{}".format(x)] = choco.uniform(mean[x] - variances[x],mean[x] + variances[x])

    # pip install sclite3
    # sclite3 TEST.db
    conn = choco.SQLiteConnection("sqlite:///TEST.db")
    conn.lock()
    bay = choco.Bayes(conn, space, clear_db=True)
    (token, point_next) = bay.next()
    point = format_next(point_next)

    all_pos = []
    all_score = []
    for x in range(number):
        loss = extract_score(x, x1, y1, x2, y2, point)
        bay.update(token, loss)
        (token, point_next) = bay.next()
        point = format_next(point_next)
        print("\rProgress : {}%".format(100*x//number), end="")
        all_pos.append(point)
        all_score.append(1-loss)

    np.savetxt("Score_list", all_score)
    np.savetxt("Point_list", all_pos)

    return True
예제 #3
0
def readTune():
    # Establish a connection to a SQLite local database
    conn = choco.SQLiteConnection("sqlite:///hpTuning.db")
    results = conn.results_as_dataframe()
    results = pd.melt(results, id_vars=["_loss"], value_name='value', var_name="variable")

    sns.lmplot(x="value", y="loss", data=results, col="variable", col_wrap=3, sharex=False)

    plt.show()
예제 #4
0
 def __init__(self, algorithm_name, search_space):
     self.conn = choco.SQLiteConnection(DB_ADDRESS)
     self.search_space = search_space
     self.chocolate_optimizer = None
     self.create_optimizer(algorithm_name)
     # created_trials is the list of dicts with all created trials assignments, loss and trial name
     # _chocolate_id is the ID of the trial, Assignment names are encoded, _loss is the target metric, _trial_name is the Trial name
     # One row example:
     # {'_chocolate_id': 0, 'LS1scg==': 0.001, 'LS1udW0tZXBvY2hz': 1, 'LS1udW0tbGF5ZXJz': 2, "_loss": "0.97", "_trial_name": "grid-example-hsdvfdwl"}
     self.created_trials = []
     self.recorded_trials_names = []
예제 #5
0
def init_choco_sampler(args):
    conn = choco.SQLiteConnection(args['sqlite_dbase'])
    space = {
        "lr": choco.log(low=-5, high=-3, base=10),
        "lr_decay": choco.uniform(high=1, low=0),
        "reg_scale": choco.uniform(low=0, high=1),
        "last_reg_scale": choco.uniform(low=0, high=1),
        "weight": choco.uniform(low=1, high=50),
        "weight_decay": choco.uniform(low=0, high=1),
        "contrast": choco.uniform(low=-100, high=100)
    }
    sampler = choco.CMAES(conn, space)
    return (sampler, conn)
예제 #6
0
def main():
    X, y = load_boston(return_X_y=True)

    # Connect to sqlite database in current directory
    conn = choco.SQLiteConnection(url="sqlite:///gbt-boston.db")
    s = {"learning_rate": choco.uniform(0.001, 0.1),
         "n_estimators": choco.quantized_uniform(25, 525, 1),
         "max_depth": choco.quantized_uniform(2, 25, 1),
         "subsample": choco.uniform(0.7, 1.0)}

    sampler = choco.QuasiRandom(conn, s, random_state=110, skip=3)
    token, params = sampler.next()
    loss = score_gbt(X, y, params)
    sampler.update(token, loss)
예제 #7
0
def main():
    conn = choco.SQLiteConnection("sqlite:///my_db.db")
    # results = conn.results_as_dataframe()

    space = {"x": choco.uniform(-6, 6), "y": choco.uniform(-6, 6)}

    # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
    sampler = choco.QuasiRandom(conn, space, clear_db=True)
    #sampler = choco.MOCMAES(conn, space, mu=0.1, clear_db=True)

    # Token: {'_chocolate_id': 0}
    # Params: {'y': 1.4641226269602674, 'x': 2.5223111999723393}
    token, params = sampler.next()
    loss = himmelblau(**params)
    sampler.update(token, loss)
    print("Token: {}, loss: {}".format(token, loss))
    """
예제 #8
0
    def func(data):
        trn_x, trn_y, tst_x, tst_y, dbid = data
        conn = choco.SQLiteConnection(url="sqlite:///hpo/hpo_%s.db" % str(dbid))
        sampler = choco.Random(conn, space)
        searcher = choco.Bayes(conn, space)
        print('START %s' % dbid)

        for _ in range(nseed):
            token, params = sampler.next()
            # print('START % 4d %s' % (i, params['model']))
            loss = f1_score_model(trn_x, trn_y, tst_x, tst_y, **params)
            sampler.update(token, loss)
            # print('DONE  % 4d %s' % (i, params['model']))
        for _ in range(nruns):
            token, params = searcher.next()
            # print('START % 4d %s' % (i, params['model']))
            loss = f1_score_model(trn_x, trn_y, tst_x, tst_y, **params)
            searcher.update(token, loss)
def run_chocolate(X_train, X_test, Y_train, Y_test, y_scaler_nl,run,space,iterations,clear_db):

    # CREATE and Connect to sqlite database in current directory
    conn = choco.SQLiteConnection(url="sqlite:///"+run+".db")
    
    #repeat each model run three times and take average
    #cv = choco.Repeat(repetitions=3, reduce=np.mean, rep_col="_repetition_id")
    
    #search strategy - Bayes attempts to "learn" patterns from ALL previous runs
    sampler = choco.Bayes(conn, space, clear_db=False)#, crossvalidation=cv)
    #sampler = choco.Grid(conn, space, clear_db=clear_db)
    #lets run 10 times and see what if we get a better answer
    for i in range(0,iterations):
        #examine db and pick next experiment
        token, params = sampler.next()
        #run experiment
        loss = _score(X_train, X_test, Y_train, Y_test, y_scaler_nl, params)
        #print("finished iteration",str(i),"loss",str(loss))
        #add new result to database
        sampler.update(token, loss)
예제 #10
0
    def best(self, tune_db=None, connection=None):
        """Get (current) best set of hyper-parameters

        Parameters
        ----------
        connection : chocolate.SQLiteConnection, optional
            Existing connection to SQLite database.
        tune_db : str, optional
            Path to SQLite database where trial results will be stored. Has no
            effect when `connection` is provided.

        At least one of `tune_db` or `connection` must be provided.

        Returns
        -------
        status : dict
            ['loss'] (`float`) best loss so far
            ['params'] (`dict`) corresponding set of hyper-parameters
            ['n_trials'] (`int`) total number of trials
        """

        if connection is None:
            # start connection to SQLite database
            # (this is where trials are stored)
            connection = chocolate.SQLiteConnection(f'sqlite:///{tune_db}')

        # get current best set of hyper-parameter (and its loss)
        trials = connection.results_as_dataframe()
        best_params = dict(trials.iloc[trials['_loss'].idxmin()])
        best_loss = best_params.pop('_loss')
        best_params = {
            name: np.asscalar(value)
            for name, value in best_params.items()
        }

        return {
            'loss': best_loss,
            'params': best_params,
            'n_trials': len(trials)
        }
예제 #11
0
    def getSuggestions(self, search_space, trials, request_number):
        """
        Get the new suggested trials with chocolate algorithm.
        """

        # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in search_space.params:
            key = BaseChocolateService.encode(param.name)
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min), int(param.max), 1)
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min), float(param.max), float(param.step))
            elif param.type == CATEGORICAL:
                chocolate_search_space[key] = choco.choice(param.list)
            else:
                chocolate_search_space[key] = choco.choice(
                    [float(e) for e in param.list])

        conn = choco.SQLiteConnection("sqlite:///my_db.db")
        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if self.algorithm_name == "grid":
            sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
        # hyperopt-random is the default option in katib.
        elif self.algorithm_name == "chocolate-random":
            sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "chocolate-quasirandom":
            sampler = choco.QuasiRandom(conn,
                                        chocolate_search_space,
                                        clear_db=True)
        elif self.algorithm_name == "chocolate-bayesian-optimization":
            sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "chocolate-MOCMAES":
            mu = 1
            sampler = choco.MOCMAES(conn,
                                    chocolate_search_space,
                                    mu=mu,
                                    clear_db=True)
        else:
            raise Exception('"Failed to create the algortihm: {}'.format(
                self.algorithm_name))

        for index, trial in enumerate(trials):
            loss_for_choco = float(trial.target_metric.value)
            if search_space.goal == MAX_GOAL:
                loss_for_choco = -1 * loss_for_choco

            entry = {"_chocolate_id": index, "_loss": loss_for_choco}
            for param in search_space.params:
                param_assignment = None
                for assignment in trial.assignments:
                    if param.name == assignment.name:
                        param_assignment = assignment.value
                        break
                if param.type == INTEGER:
                    param_assignment = int(param_assignment)
                elif param.type == DOUBLE:
                    param_assignment = float(param_assignment)
                entry.update({
                    BaseChocolateService.encode(param.name):
                    param_assignment
                })
            logger.info(entry)
            # Should not use sampler.update(token, loss), because we will create
            # a new BaseChocolateService instance for every request. Thus we need
            # to insert all previous trials every time.
            conn.insert_result(entry)

        list_of_assignments = []

        for i in range(request_number):
            try:
                token, chocolate_params = sampler.next()
                list_of_assignments.append(
                    BaseChocolateService.convert(search_space,
                                                 chocolate_params))
            except StopIteration:
                logger.info(
                    "Chocolate db is exhausted, increase Search Space or decrease maxTrialCount!"
                )
        return list_of_assignments
예제 #12
0
    def tune_iter(self, tune_db, protocol, subset='development', sampler=None):
        """Tune pipeline forever

        Parameters
        ----------
        tune_db : str
            Path to SQLite database where trial results will be stored.
        protocol : pyannote.database.Protocol
            Protocol on which to tune the pipeline.
        subset : {'train', 'development', 'test'}, optional
            Subset on which to tune the pipeline. Defaults to 'development'.
        sampler : chocolate sampler, optional
            Defaults to chocolate.CMAES

        Yields
        ------
        status : dict
            ['latest']['loss'] (`float`) loss obtained by the latest trial
            ['latest']['params'] (`dict`) corresponding set of hyper-parameters
            ['latest']['n_trials'] (`int`) total number of trials in thes session
            ['new_best']['loss'] (`float`) best loss so far
            ['new_best']['params'] (`dict`) corresponding set of hyper-parameters
            ['new_best']['n_trials'] (`int`) total number of trials
        """

        # start connection to SQLite database
        # (this is where trials are stored)
        connection = chocolate.SQLiteConnection(f'sqlite:///{tune_db}')

        # get hyper-parameter space
        space = self.get_tune_space()

        # instantiate sampler
        if sampler is None:
            sampler = chocolate.CMAES
        sampler = sampler(connection, space)
        # TODO add option to use another sampler

        i = 0
        best = {'loss': np.inf}

        while True:
            i += 1

            # get next set of hyper-parameters to try
            token, params = sampler.next()

            # instantiate pipeline with this set of parameters
            # and compute the objective function
            loss = self.with_params(**params).objective(protocol,
                                                        subset=subset,
                                                        learning=True)

            latest = {'loss': loss, 'params': params, 'n_trials': i}

            # tell the sampler what was the result
            sampler.update(token, loss)

            if loss < best['loss'] or i == 1:
                # if loss is better than previous known best
                # check in the database what is the current best
                best = self.best(connection=connection)
                yield {'latest': latest, 'new_best': best}
            else:
                yield {'latest': latest}
예제 #13
0
    def get_new_suggestions(self, study, trials=[], number=1):
        """
        Get the new suggested trials with Chocolate algorithm.
        """

        # 1. Construct search space
        # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        # study = Study.objects.get(name=study_name)
        study_configuration_json = json.loads(study.study_configuration)
        params = study_configuration_json["params"]

        for param in params:
            param_name = param["parameterName"]

            if param["type"] == "INTEGER":
                # TODO: Support int type of search space)
                pass

            elif param["type"] == "DOUBLE":
                chocolate_search_space[param_name] = choco.uniform(
                    param["minValue"], param["maxValue"])

            elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL":
                feasible_point_list = [
                    value.strip()
                    for value in param["feasiblePoints"].split(",")
                ]
                chocolate_search_space[param_name] = choco.choice(
                    feasible_point_list)

        conn = choco.SQLiteConnection("sqlite:///my_db.db")

        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if self.algorithm_name == "Grid":
            sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "Random":
            sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "QuasiRandom":
            sampler = choco.QuasiRandom(conn,
                                        chocolate_search_space,
                                        clear_db=True)
        elif self.algorithm_name == "Bayes":
            sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "CMAES":
            sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "MOCMAES":
            mu = 1
            sampler = choco.MOCMAES(conn,
                                    chocolate_search_space,
                                    mu=mu,
                                    clear_db=True)

        # 2. Update with completed advisor trials
        # completed_advisor_trials = Trial.objects.filter(
        #    study_name=study_name, status="Completed")
        completed_advisor_trials = [
            i for i in trials if i.status == "Completed"
        ]

        for index, advisor_trial in enumerate(completed_advisor_trials):
            parameter_values_json = json.loads(advisor_trial.parameter_values)

            loss = advisor_trial.objective_value
            if study_configuration_json["goal"] == "MAXIMIZE":
                loss = -1 * loss

            entry = {"_chocolate_id": index, "_loss": loss}
            entry.update(parameter_values_json)
            # Should not use sampler.update(token, loss)
            conn.insert_result(entry)

        # 3. Run algorithm and construct return advisor trials
        return_trial_list = []

        for i in range(number):

            # Example: {'_chocolate_id': 1}
            # Example: {u'hidden2': u'32', u'learning_rate': 0.07122424534644338, u'l1_normalization': 0.8402644688674471, u'optimizer': u'adam'}
            token, chocolate_params = sampler.next()

            parameter_values_json = {}

            for param in params:

                if (param["type"] == "INTEGER" or param["type"] == "DOUBLE"
                        or param["type"] == "CATEGORICAL"):
                    parameter_values_json[
                        param["parameterName"]] = chocolate_params[
                            param["parameterName"]]
                elif param["type"] == "DISCRETE":
                    parameter_values_json[param["parameterName"]] = int(
                        chocolate_params[param["parameterName"]])

            new_advisor_trial = Trial.create(study.name, "ChocolateTrial")
            new_advisor_trial.parameter_values = json.dumps(
                parameter_values_json)
            # new_advisor_trial.save()
            return_trial_list.append(new_advisor_trial)

        return return_trial_list
            self.validation_error = 100000.0

if __name__ == '__main__':
    if len(sys.argv) == 1:
        # original params from article
        model=LatentAttention(frac_train=0.99, n_z=20, batchsize=100,
                              learning_rate=0.001, max_epochs=10,
                              e_h1=16, e_h2=32, d_h1=32, d_h2=16, run_id=-1);
        model.train()
        print("loss={}".format(float(model.validation_error)))
        exit(0)

    # Params from optimizer
    search_space = {
        "n_z": choco.quantized_uniform(5, 100, 1),
        "learning_rate": choco.log(-20, -8, 2),
        "max_epochs": choco.quantized_uniform(5, 200, 1),
        "e_h1": choco.quantized_uniform(16, 256, 1),
        "e_h2": choco.quantized_uniform(16, 256, 1),
        "d_h1": choco.quantized_uniform(16, 256, 1),
        "d_h2": choco.quantized_uniform(16, 256, 1),
    }
    connection = choco.SQLiteConnection("sqlite:///no_labels_results.sqlite3")
    sampler = choco.Bayes(connection, search_space)
    token, sample = sampler.next()
    print("Parameters: {} Token: {}".format(sample, token))
    run_id = token['_chocolate_id']
    model = LatentAttention(0.99, batchsize=150, run_id=run_id, **sample)
    model.train()
    sampler.update(token, float(model.validation_error))
예제 #15
0
    'XGBClassifier': XGBClassifier,
    'RandomForestClassifier': RandomForestClassifier,
    'GaussianNB': GaussianNB,
    'KNeighborsClassifier': KNeighborsClassifier,
}

if __name__ == "__main__":
    datafn = 'HOUR_00024.csv'
    dbid = datafn.split('_')[1].split('.')[0]
    # dbid = datetime.datetime.now().strftime('%m%d%y%H%M%S')
    # dbid = 1

    N_RUNS = 1024
    N_PROC = 8

    datafn = os.path.join(DATA_PATH, 'hour', datafn)
    ids_fn = os.path.join(RAW_PATH, 'd_ids_split.pickle')

    trn_x, trn_y, tst_x, tst_y = load_or_gen_data(datafn, ids_fn)

    conn = choco.SQLiteConnection(url="sqlite:///hpo/hpo_%s.db" % str(dbid))
    # searcher = choco.Random(conn, space)
    searcher = choco.Bayes(conn, space)

    f = getProcFunc(conn, searcher)
    with mp.Pool(processes=N_PROC) as pool:
        pool.map(f, range(N_RUNS))

    df = conn.results_as_dataframe()
    df.to_csv("hpo/hpo_%s.csv" % str(dbid))
예제 #16
0
def main(argv):
    # parse arguments
    args = parse_all_args()

    ''''''
    #Chocolate Code
    # Define the conditional search space 
    space = {
                "lr": choco.uniform(low=.001, high=.1)
            }

    # Establish a connection to a SQLite local database
    conn = choco.SQLiteConnection("sqlite:///hpTuning.db")

    # Construct the optimizer
    sampler = choco.Bayes(conn, space)

    # Sample the next point
    token, params = sampler.next()
    ''''''

    train_set = PrototypicalDataset(args.input_path, args.train_path, n_support=args.support, 
            n_query=args.query)
    dev_set = PrototypicalDataset(args.input_path, args.dev_path, apply_enhancements=False, 
            n_support=args.support, n_query=args.query)

    # Use the same minibatch size to make each dataset use the same episode size
    train_loader = torch.utils.data.DataLoader(train_set, shuffle=True,
            drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True,
            collate_fn=protoCollate)
    dev_loader = torch.utils.data.DataLoader(dev_set, shuffle=True,
            drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True,
            collate_fn=protoCollate)

    Filter_specs = parse_filter_specs(args.filter_specs)
    Pre_trained_filters = None
    if not args.pre_trained is None:
        Pre_trained_filters = torch.load(args.pre_trained)
    
    model = ConvNeuralNet(args.embed_dim, args.f1, train_set.image_shape, Filter_specs=Filter_specs, Pre_trained_filters=Pre_trained_filters)
    if (args.checkpoint_path):
        state = torch.load(args.checkpoint_path)
        model.load_state_dict(state)
        print("Loaded checkpoint %s" % (args.checkpoint_path))
        # torch saves the device the model was on, so we don't need to re-load to CUDA if it was saved from CUDA
    else:
        if (torch.cuda.is_available()):
            model = model.cuda()

    train_out = AggregatePerformanceRecord("train",args.out_path,dbg=args.print_reports)
    dev_out = AggregatePerformanceRecord("dev",args.out_path,dbg=args.print_reports)
    # test_out = PerformanceRecord("test",args.out_path,dbg=args.print_reports)

    N = len(train_set)
    # Calculate the loss for the sampled point (minimized)
    # This would be your training code
    loss = train(model,train_loader,dev_loader,train_out,dev_out,N,args,**params)

    # Add the loss to the database
    sampler.update(token, loss)
    
    # # Get test set performance
    # test_set = PrototypicalDataset(args.input_path, args.test_path, apply_enhancements=False, n_support=args.support, n_query=args.query)
    # test_loader = torch.utils.data.DataLoader(test_set, shuffle=True,
    #         drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True,
    #         collate_fn=protoCollate)
    
    # # we should make sure this fn works, but we should not run this on the actual test set even once before we are completely done training
    # # evaluate_test(model, test_loader, test_out, args)

    readTune()
예제 #17
0
def hyperparameter_job_(train_methyl_array,
						val_methyl_array,
						interest_col,
						n_bins,
						custom_loss,
						torque,
						search_strategy,
						total_time,
						delay_time,
						gpu,
						additional_command,
						additional_options,
						update,
						n_epochs,
						job,
						survival,
						optimize_time,
						random_state,
						capsule_choice,
						custom_capsule_file,
						retrain_top_job,
						batch_size,
						output_top_job_params,
						limited_capsule_names_file,
						min_capsule_len_low_bound,
						gsea_superset,
						tissue,
						number_sets,
						use_set,
						gene_context,
						select_subtypes,
						custom_hyperparameters,
						min_capsules,
						fit_spw,
						l1_l2):

	additional_params=dict(train_methyl_array=train_methyl_array,
							val_methyl_array=val_methyl_array,
							interest_col=interest_col,
							n_bins=n_bins,
							custom_loss=custom_loss,
							job=job,
							batch_size=batch_size,
							number_sets=number_sets,
							min_capsules=min_capsules
							)

	if n_epochs:
		additional_params['n_epochs']=n_epochs

	if gsea_superset:
		additional_params['gsea_superset']=gsea_superset

	if l1_l2:
		additional_params['l1_l2']=l1_l2

	if tissue:
		additional_params['tissue']=tissue

	if custom_capsule_file:
		additional_params['custom_capsule_file']=custom_capsule_file

	if output_top_job_params:
		retrain_top_job=True

	if limited_capsule_names_file:
		additional_params['limited_capsule_names_file']=limited_capsule_names_file

	if update and not (retrain_top_job and output_top_job_params):
		additional_params['capsule_choice']=capsule_choice
		select_subtypes=list(filter(None,select_subtypes))
		if select_subtypes:
			additional_params['select_subtypes']=select_subtypes
		if use_set:
			additional_params['use_set']=use_set
		if gene_context:
			additional_params['gene_context']=gene_context
		if fit_spw:
			additional_params['fit_spw']=fit_spw
	else:
		select_subtypes=list(filter(None,select_subtypes))
		if select_subtypes:
			additional_params['select_subtypes']=' -ss '.join(list(filter(None,select_subtypes)))
		additional_params['capsule_choice']=' -cc '.join(list(filter(None,capsule_choice)))
		if use_set:
			additional_params['use_set']=''
		if gene_context:
			additional_params['gene_context']=''
		if fit_spw:
			additional_params['fit_spw']=''

	if not survival:
		additional_params['gamma2']=1e-2

	def score_loss(params):
		#job=np.random.randint(0,1000000)
		start_time=time.time()

		params['hidden_topology']=','.join([str(int(params['el{}s'.format(j)])) for j in range(params['nehl']+1)])
		params['decoder_topology']=','.join([str(int(params['dl{}s'.format(j)])) for j in range(params['ndhl']+1)])

		del_params=['el{}s'.format(j) for j in range(params['nehl']+1)]+['dl{}s'.format(j) for j in range(params['ndhl']+1)]

		del_params=set(del_params+[k for k in params if k.startswith('el') or k.startswith('dl')])
		# for k in list(params.keys()):
		# 	if k.endswith('_size'):
		# 		del params[k]
		# print(params)
		# print(params['nehl'],params['ndhl'])
		# print(del_params)
		for param in del_params:
			del params[param]

		del params['nehl'], params['ndhl']

		params.update(additional_params)

		print(params)

		command='{} methylcaps-model model_capsnet {} || methylcaps-model report_loss -j {}'.format('CUDA_VISIBLE_DEVICES=0' if gpu and not torque else '',' '.join(['--{} {}'.format(k,v) for k,v in params.items() if v or k=='use_set']),params['job'])#,'&' if not torque else '')

		if output_top_job_params and retrain_top_job:
			print('Top params command: ')
			print('{} --predict'.format(command.split('||')[0]))
			exit()
		elif output_top_job_params:
			print('Continuing training of random parameters, please specify retrain_top_job.')

		if update:

			val_loss = model_capsnet_(**params)

		else:

			val_loss = return_val_loss(command, torque, total_time, delay_time, job, gpu, additional_command, additional_options)

		end_time=time.time()

		if optimize_time:
			return val_loss, start_time-end_time
		else:
			return val_loss

	grid=dict(n_epochs=dict(low=10, high=50, step=10),
				bin_len=dict(low=500000, high=1000000, step=100000),
				min_capsule_len=dict(low=min_capsule_len_low_bound, high=500, step=25),
				primary_caps_out_len=dict(low=10, high=100, step=5),
				caps_out_len=dict(low=10, high=100, step=5),
				nehl=dict(low=10,high=300,step=10,n_layers=3),
				ndhl=dict(low=100,high=300,step=10,n_layers=3),
				learning_rate=dict(low=-5,high=-1,step=1,base=10),
				gamma=dict(low=-5,high=-1,step=1,base=10),
				gamma2=dict(low=-5,high=-1,step=1,base=10),
				overlap=dict(low=0., high=.5, step=.1),
				routing_iterations=dict(low=2, high=4, step=1))

	if os.path.exists(custom_hyperparameters):
		from ruamel.yaml import safe_load as load
		with open(custom_hyperparameters) as f:
			new_grid = load(f)
		print(new_grid)
		for k in new_grid:
			for k2 in new_grid[k]:
				grid[k][k2]=new_grid[k][k2]


	n_layers=dict(encoder=grid['nehl'].pop('n_layers'),decoder=grid['ndhl'].pop('n_layers'))


	grid=dict(n_epochs=choco.quantized_uniform(**grid['n_epochs']),
				bin_len=choco.quantized_uniform(**grid['bin_len']),
				min_capsule_len=choco.quantized_uniform(**grid['min_capsule_len']),
				primary_caps_out_len=choco.quantized_uniform(**grid['primary_caps_out_len']),
				caps_out_len=choco.quantized_uniform(**grid['caps_out_len']),
				nehl={i: {'el{}s'.format(j):choco.quantized_uniform(**grid['nehl']) for j in range(i+1)} for i in range(n_layers['encoder'])},
				gamma=choco.quantized_log(**grid['gamma']),
				ndhl={i: {'dl{}s'.format(j):choco.quantized_uniform(**grid['ndhl']) for j in range(i+1)} for i in range(n_layers['decoder'])},
				learning_rate=choco.quantized_log(**grid['learning_rate']),
				routing_iterations=choco.quantized_uniform(**grid['routing_iterations']),
				overlap=choco.quantized_uniform(**grid['overlap']),
				gamma2=choco.quantized_log(**grid['gamma2'])
			) # ADD BATCH SIZE

	if n_epochs:
		grid.pop('n_epochs')

	if not survival:
		grid.pop('gamma2')

	if 'genomic_binned' not in list(capsule_choice):
		for k in ['overlap','bin_len']:
			grid.pop(k)

	if retrain_top_job:

		conn=choco.SQLiteConnection('sqlite:///hyperparameter_scan.db')
		results=conn.results_as_dataframe()
		results=results[~results['_loss'].isnull()]
		params=dict(results.iloc[np.argmin(results['_loss'].values)])
		for k in ['bin_len','caps_out_len','min_capsule_len','ndhl','nehl','primary_caps_out_len','routing_iterations']:
			if k in params:
				params[k]=int(params[k])

		del params['_loss']

		top_loss=score_loss(params)

		pickle.dump(top_loss,open('top_loss.pkl','wb'))

	else:

		optimization_method = search_strategy#'bayes'
		optimization_methods=['random','quasi','bayes']

		sampler_opts={}

		if optimization_method in ['random']:
			sampler_opts['n_bootstrap']=10000
			#sampler_opts['random_state']=random_state
		elif optimization_method in ['quasi']:
			sampler_opts['seed']=random_state
			sampler_opts['skip']=3
		elif optimization_method in ['bayes']:
			sampler_opts['n_bootstrap']=35
			sampler_opts['utility_function']='ei'
			sampler_opts['xi']=0.1
			#sampler_opts['random_state']=42

		#print(optimization_method)
		optimizer = dict(random=choco.Bayes,quasi=choco.QuasiRandom,bayes=choco.Bayes)[optimization_method] # Random

		hyp_conn = choco.SQLiteConnection(url="sqlite:///hyperparameter_scan.db")

		sampler = optimizer(hyp_conn, grid, **sampler_opts)

		#print(sampler)

		if 0 and optimization_method in ['bayes']:
			sampler.random_state=np.random.RandomState(42)

		token,params=sampler.next()

		loss=score_loss(params)

		if (loss if not optimize_time else loss[0])>=0:
			sampler.update(token, loss)
예제 #18
0
 file_name = "../validate_time/params_data_tpe/" + path + "/logs.txt"
 data_file_name = "../validate_time/params_data_tpe/" + path + "/data.txt"
 plot_data_path = "../validate_time/params_data_tpe/" + path + "/plot_data.csv"
 data_dict_file = "./data_dict/" + path + "/t_tpe.csv"
 t_main_3(data_manager, file_name, data_file_name, plot_data_path,
          data_dict_file)
 print("第" + str(i) + "次实验:" + "加载第" + str(j) + "个数据集***TPE***方法结束")
 # CMAES 方法
 print("第" + str(i) + "次实验:" + "加载第" + str(j) + "个数据集***CMAES***方法开始")
 n = 1
 os.mkdir("../validate_time/params_data_cmaes/" + str(i) + "_cmaes_" +
          str(j))
 path = str(i) + "_cmaes_" + str(j)
 file_name = "../validate_time/params_data_cmaes/" + path + "/data.csv"
 url_path = "sqlite:///" + "../validate_time/params_data_cmaes/" + path + "/mnistdb.db"
 conn = choco.SQLiteConnection(url=url_path)
 CMAES(data_manager, n, file_name, conn)
 print("第" + str(i) + "次实验:" + "加载第" + str(j) + "个数据集***CMAES***方法结束")
 # # agent加引导  手写数字
 # os.mkdir("../validate_time/params_data_agent(chen)/digits_bp_" + str(i))
 # os.mkdir("./data_dict/digits_bp_" + str(i))
 # path = "digits_bp_" + str(i)
 # plot_time_reward = "../validate_time/params_data_agent(chen)/" + path + "/plot_time_data.csv"
 # t_main_bp(data_manager_digits, plot_time_reward)
 # # agent加预测 手写数字
 # os.mkdir("../validate_time/params_data_agent(chen)/digits_pre_" + str(i))
 # os.mkdir("./data_dict/digits_pre_" + str(i))
 # path = "digits_pre_" + str(i)
 # plot_time_reward = "../validate_time/params_data_agent(chen)/" + path + "/plot_time_data.csv"
 # t_main_pre(data_manager_digits, plot_time_reward)
 # 基于贝叶斯优化 手写数字
예제 #19
0
			"mean": choco.choice([0.]),
			"decision_eps": choco.choice([1.]),
			"theta" : choco.choice([1.]),
			"invert_actions" : choco.choice([False]),
			"lr": choco.choice([1e-3, 5e-4, 1e-4, 5e-5, 1e-5]),
			},
		}

    # initialize chocolate
	i = 0
	sampler = None
	while sampler is None:
	    try:
	        # connect
	        database_dir = "sqlite:///roper_experiments" + "_".join(params["tunable_params"].keys()) + str(i) + ".db"
	        conn = choco.SQLiteConnection(database_dir)
	        sampler = choco.QuasiRandom(conn, params["hyperparam_mutations"], seed=2)
	        print("saving results in", database_dir)
	    except:
	        i += 1
	        pass

	k = 1
	for i in range(calculate_space_size(params)):
	    # Get one hyperparameter configuration from the space
	    token, next_params = sampler.next()
	    # Combine Tunable and Fixed Hyperparameters.
	    hyperparams = {**params, **next_params}
	    print("params", hyperparams)
	    # Create autoencoder model.
	    model = Trainable(hyperparams,token['_chocolate_id'], ...) # TODO: complete parameters