Ejemplo n.º 1
0
def example_run_bayesian():
    # initialisation
    mean = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    variances = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5]
    x1, y1, x2, y2 = 60, 342, 726, 725
    dimension = len(mean)
    number = 20
    space = {}
    for x in range(dimension):
        space["{}".format(x)] = choco.uniform(mean[x] - variances[x],mean[x] + variances[x])

    # pip install sclite3
    # sclite3 TEST.db
    conn = choco.SQLiteConnection("sqlite:///TEST.db")
    conn.lock()
    bay = choco.Bayes(conn, space, clear_db=True)
    (token, point_next) = bay.next()
    point = format_next(point_next)

    all_pos = []
    all_score = []
    for x in range(number):
        loss = extract_score(x, x1, y1, x2, y2, point)
        bay.update(token, loss)
        (token, point_next) = bay.next()
        point = format_next(point_next)
        print("\rProgress : {}%".format(100*x//number), end="")
        all_pos.append(point)
        all_score.append(1-loss)

    np.savetxt("Score_list", all_score)
    np.savetxt("Point_list", all_pos)

    return True
Ejemplo n.º 2
0
def main(argv=None):

    #using the chocolate to tune
    space = create_space()
    conn = choco.SQLiteConnection(url="sqlite:///db2.db")
    cv = choco.Repeat(repetitions=3, reduce=np.mean, rep_col="_repetition_id")
    sampler = choco.Bayes(conn, space, crossvalidation=cv)
    #train(x_train, y_train, vocab_processor, x_dev, y_dev)
    token, params = sampler.next()
    print(type(token))
    print(token)
    x_train, y_train, x_dev, y_dev = preprocess(
        params["eps"], params["dev_sample_percentage"])
    loss = train(x_train, y_train, x_dev, y_dev, params)
    print(loss)
    sampler.update(token, loss)
    results = conn.results_as_dataframe()
    print(results)
    results = pd.melt(results,
                      id_vars=["_loss"],
                      value_name='value',
                      var_name="variable")
    sns.lmplot(x="value",
               y="_loss",
               data=results,
               col="variable",
               col_wrap=3,
               sharex=False)
    plt.show()
Ejemplo n.º 3
0
    def create_optimizer(self, algorithm_name):

        # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in self.search_space.params:
            key = BaseChocolateService.encode(param.name)
            # Chocolate quantized_uniform distribution uses half-open interval: [low, high).
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min),
                    int(param.max) + int(param.step), int(param.step))
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min),
                    float(param.max) + float(param.step), float(param.step))
            # For Categorical and Discrete insert indexes to DB from list of values
            elif param.type == CATEGORICAL or param.type == DISCRETE:
                chocolate_search_space[key] = choco.choice(
                    [idx for idx, _ in enumerate(param.list)])

        if algorithm_name in DEPRECATED_ALGORITHM_NAME:
            warnings.warn(
                "Algorithm name '{}' is deprecated. Please use '{}'.".format(
                    algorithm_name,
                    DEPRECATED_ALGORITHM_NAME[algorithm_name],
                ),
                DeprecationWarning,
            )
            algorithm_name = DEPRECATED_ALGORITHM_NAME[algorithm_name]

        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if algorithm_name == "grid":
            self.chocolate_optimizer = choco.Grid(self.conn,
                                                  chocolate_search_space,
                                                  clear_db=True)
        # hyperopt-random is the default option in katib.
        elif algorithm_name == "random":
            self.chocolate_optimizer = choco.Random(self.conn,
                                                    chocolate_search_space,
                                                    clear_db=True)
        elif algorithm_name == "quasirandom":
            self.chocolate_optimizer = choco.QuasiRandom(
                self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "bayesianoptimization":
            self.chocolate_optimizer = choco.Bayes(self.conn,
                                                   chocolate_search_space,
                                                   clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "mocmaes":
            mu = 1
            self.chocolate_optimizer = choco.MOCMAES(self.conn,
                                                     chocolate_search_space,
                                                     mu=mu,
                                                     clear_db=True)
        else:
            raise Exception(
                '"Failed to create Chocolate optimizer for the algorithm: {}'.
                format(algorithm_name))
Ejemplo n.º 4
0
    def create_optimizer(self, algorithm_name):

        # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in self.search_space.params:
            key = BaseChocolateService.encode(param.name)
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min), int(param.max), int(param.step))
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min), float(param.max), float(param.step))
            elif param.type == CATEGORICAL:
                chocolate_search_space[key] = choco.choice(param.list)
            else:
                chocolate_search_space[key] = choco.choice(
                    [float(e) for e in param.list])

        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if algorithm_name == "grid":
            self.chocolate_optimizer = choco.Grid(self.conn,
                                                  chocolate_search_space,
                                                  clear_db=True)
        # hyperopt-random is the default option in katib.
        elif algorithm_name == "chocolate-random":
            self.chocolate_optimizer = choco.Random(self.conn,
                                                    chocolate_search_space,
                                                    clear_db=True)
        elif algorithm_name == "chocolate-quasirandom":
            self.chocolate_optimizer = choco.QuasiRandom(
                self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "chocolate-bayesian-optimization":
            self.chocolate_optimizer = choco.Bayes(self.conn,
                                                   chocolate_search_space,
                                                   clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "chocolate-mocmaes":
            mu = 1
            self.chocolate_optimizer = choco.MOCMAES(self.conn,
                                                     chocolate_search_space,
                                                     mu=mu,
                                                     clear_db=True)
        else:
            raise Exception(
                '"Failed to create Chocolate optimizer for the algorithm: {}'.
                format(algorithm_name))
Ejemplo n.º 5
0
    def func(data):
        trn_x, trn_y, tst_x, tst_y, dbid = data
        conn = choco.SQLiteConnection(url="sqlite:///hpo/hpo_%s.db" % str(dbid))
        sampler = choco.Random(conn, space)
        searcher = choco.Bayes(conn, space)
        print('START %s' % dbid)

        for _ in range(nseed):
            token, params = sampler.next()
            # print('START % 4d %s' % (i, params['model']))
            loss = f1_score_model(trn_x, trn_y, tst_x, tst_y, **params)
            sampler.update(token, loss)
            # print('DONE  % 4d %s' % (i, params['model']))
        for _ in range(nruns):
            token, params = searcher.next()
            # print('START % 4d %s' % (i, params['model']))
            loss = f1_score_model(trn_x, trn_y, tst_x, tst_y, **params)
            searcher.update(token, loss)
def run_chocolate(X_train, X_test, Y_train, Y_test, y_scaler_nl,run,space,iterations,clear_db):

    # CREATE and Connect to sqlite database in current directory
    conn = choco.SQLiteConnection(url="sqlite:///"+run+".db")
    
    #repeat each model run three times and take average
    #cv = choco.Repeat(repetitions=3, reduce=np.mean, rep_col="_repetition_id")
    
    #search strategy - Bayes attempts to "learn" patterns from ALL previous runs
    sampler = choco.Bayes(conn, space, clear_db=False)#, crossvalidation=cv)
    #sampler = choco.Grid(conn, space, clear_db=clear_db)
    #lets run 10 times and see what if we get a better answer
    for i in range(0,iterations):
        #examine db and pick next experiment
        token, params = sampler.next()
        #run experiment
        loss = _score(X_train, X_test, Y_train, Y_test, y_scaler_nl, params)
        #print("finished iteration",str(i),"loss",str(loss))
        #add new result to database
        sampler.update(token, loss)
Ejemplo n.º 7
0
    def get_new_suggestions(self, study, trials=[], number=1):
        """
        Get the new suggested trials with Chocolate algorithm.
        """

        # 1. Construct search space
        # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        # study = Study.objects.get(name=study_name)
        study_configuration_json = json.loads(study.study_configuration)
        params = study_configuration_json["params"]

        for param in params:
            param_name = param["parameterName"]

            if param["type"] == "INTEGER":
                # TODO: Support int type of search space)
                pass

            elif param["type"] == "DOUBLE":
                chocolate_search_space[param_name] = choco.uniform(
                    param["minValue"], param["maxValue"])

            elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL":
                feasible_point_list = [
                    value.strip()
                    for value in param["feasiblePoints"].split(",")
                ]
                chocolate_search_space[param_name] = choco.choice(
                    feasible_point_list)

        conn = choco.SQLiteConnection("sqlite:///my_db.db")

        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if self.algorithm_name == "Grid":
            sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "Random":
            sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "QuasiRandom":
            sampler = choco.QuasiRandom(conn,
                                        chocolate_search_space,
                                        clear_db=True)
        elif self.algorithm_name == "Bayes":
            sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "CMAES":
            sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "MOCMAES":
            mu = 1
            sampler = choco.MOCMAES(conn,
                                    chocolate_search_space,
                                    mu=mu,
                                    clear_db=True)

        # 2. Update with completed advisor trials
        # completed_advisor_trials = Trial.objects.filter(
        #    study_name=study_name, status="Completed")
        completed_advisor_trials = [
            i for i in trials if i.status == "Completed"
        ]

        for index, advisor_trial in enumerate(completed_advisor_trials):
            parameter_values_json = json.loads(advisor_trial.parameter_values)

            loss = advisor_trial.objective_value
            if study_configuration_json["goal"] == "MAXIMIZE":
                loss = -1 * loss

            entry = {"_chocolate_id": index, "_loss": loss}
            entry.update(parameter_values_json)
            # Should not use sampler.update(token, loss)
            conn.insert_result(entry)

        # 3. Run algorithm and construct return advisor trials
        return_trial_list = []

        for i in range(number):

            # Example: {'_chocolate_id': 1}
            # Example: {u'hidden2': u'32', u'learning_rate': 0.07122424534644338, u'l1_normalization': 0.8402644688674471, u'optimizer': u'adam'}
            token, chocolate_params = sampler.next()

            parameter_values_json = {}

            for param in params:

                if (param["type"] == "INTEGER" or param["type"] == "DOUBLE"
                        or param["type"] == "CATEGORICAL"):
                    parameter_values_json[
                        param["parameterName"]] = chocolate_params[
                            param["parameterName"]]
                elif param["type"] == "DISCRETE":
                    parameter_values_json[param["parameterName"]] = int(
                        chocolate_params[param["parameterName"]])

            new_advisor_trial = Trial.create(study.name, "ChocolateTrial")
            new_advisor_trial.parameter_values = json.dumps(
                parameter_values_json)
            # new_advisor_trial.save()
            return_trial_list.append(new_advisor_trial)

        return return_trial_list
Ejemplo n.º 8
0
def main(argv):
    # parse arguments
    args = parse_all_args()

    ''''''
    #Chocolate Code
    # Define the conditional search space 
    space = {
                "lr": choco.uniform(low=.001, high=.1)
            }

    # Establish a connection to a SQLite local database
    conn = choco.SQLiteConnection("sqlite:///hpTuning.db")

    # Construct the optimizer
    sampler = choco.Bayes(conn, space)

    # Sample the next point
    token, params = sampler.next()
    ''''''

    train_set = PrototypicalDataset(args.input_path, args.train_path, n_support=args.support, 
            n_query=args.query)
    dev_set = PrototypicalDataset(args.input_path, args.dev_path, apply_enhancements=False, 
            n_support=args.support, n_query=args.query)

    # Use the same minibatch size to make each dataset use the same episode size
    train_loader = torch.utils.data.DataLoader(train_set, shuffle=True,
            drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True,
            collate_fn=protoCollate)
    dev_loader = torch.utils.data.DataLoader(dev_set, shuffle=True,
            drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True,
            collate_fn=protoCollate)

    Filter_specs = parse_filter_specs(args.filter_specs)
    Pre_trained_filters = None
    if not args.pre_trained is None:
        Pre_trained_filters = torch.load(args.pre_trained)
    
    model = ConvNeuralNet(args.embed_dim, args.f1, train_set.image_shape, Filter_specs=Filter_specs, Pre_trained_filters=Pre_trained_filters)
    if (args.checkpoint_path):
        state = torch.load(args.checkpoint_path)
        model.load_state_dict(state)
        print("Loaded checkpoint %s" % (args.checkpoint_path))
        # torch saves the device the model was on, so we don't need to re-load to CUDA if it was saved from CUDA
    else:
        if (torch.cuda.is_available()):
            model = model.cuda()

    train_out = AggregatePerformanceRecord("train",args.out_path,dbg=args.print_reports)
    dev_out = AggregatePerformanceRecord("dev",args.out_path,dbg=args.print_reports)
    # test_out = PerformanceRecord("test",args.out_path,dbg=args.print_reports)

    N = len(train_set)
    # Calculate the loss for the sampled point (minimized)
    # This would be your training code
    loss = train(model,train_loader,dev_loader,train_out,dev_out,N,args,**params)

    # Add the loss to the database
    sampler.update(token, loss)
    
    # # Get test set performance
    # test_set = PrototypicalDataset(args.input_path, args.test_path, apply_enhancements=False, n_support=args.support, n_query=args.query)
    # test_loader = torch.utils.data.DataLoader(test_set, shuffle=True,
    #         drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True,
    #         collate_fn=protoCollate)
    
    # # we should make sure this fn works, but we should not run this on the actual test set even once before we are completely done training
    # # evaluate_test(model, test_loader, test_out, args)

    readTune()
Ejemplo n.º 9
0
    def getSuggestions(self, search_space, trials, request_number):
        """
        Get the new suggested trials with chocolate algorithm.
        """

        # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in search_space.params:
            key = BaseChocolateService.encode(param.name)
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min), int(param.max), 1)
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min), float(param.max), float(param.step))
            elif param.type == CATEGORICAL:
                chocolate_search_space[key] = choco.choice(param.list)
            else:
                chocolate_search_space[key] = choco.choice(
                    [float(e) for e in param.list])

        conn = choco.SQLiteConnection("sqlite:///my_db.db")
        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if self.algorithm_name == "grid":
            sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
        # hyperopt-random is the default option in katib.
        elif self.algorithm_name == "chocolate-random":
            sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "chocolate-quasirandom":
            sampler = choco.QuasiRandom(conn,
                                        chocolate_search_space,
                                        clear_db=True)
        elif self.algorithm_name == "chocolate-bayesian-optimization":
            sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "chocolate-MOCMAES":
            mu = 1
            sampler = choco.MOCMAES(conn,
                                    chocolate_search_space,
                                    mu=mu,
                                    clear_db=True)
        else:
            raise Exception('"Failed to create the algortihm: {}'.format(
                self.algorithm_name))

        for index, trial in enumerate(trials):
            loss_for_choco = float(trial.target_metric.value)
            if search_space.goal == MAX_GOAL:
                loss_for_choco = -1 * loss_for_choco

            entry = {"_chocolate_id": index, "_loss": loss_for_choco}
            for param in search_space.params:
                param_assignment = None
                for assignment in trial.assignments:
                    if param.name == assignment.name:
                        param_assignment = assignment.value
                        break
                if param.type == INTEGER:
                    param_assignment = int(param_assignment)
                elif param.type == DOUBLE:
                    param_assignment = float(param_assignment)
                entry.update({
                    BaseChocolateService.encode(param.name):
                    param_assignment
                })
            logger.info(entry)
            # Should not use sampler.update(token, loss), because we will create
            # a new BaseChocolateService instance for every request. Thus we need
            # to insert all previous trials every time.
            conn.insert_result(entry)

        list_of_assignments = []

        for i in range(request_number):
            try:
                token, chocolate_params = sampler.next()
                list_of_assignments.append(
                    BaseChocolateService.convert(search_space,
                                                 chocolate_params))
            except StopIteration:
                logger.info(
                    "Chocolate db is exhausted, increase Search Space or decrease maxTrialCount!"
                )
        return list_of_assignments
Ejemplo n.º 10
0
        X, y, stratify=y, test_size=config.train_val_split_pct)
    return x_train, y_train, x_val, y_val


if __name__ == '__main__':
    num_cols = [
        'amount_tsh', 'gps_height', 'population', 'total_missing', 'longitude',
        'latitude'
    ]
    train_x = read_data(config.d_xtrain)
    train_y = read_data(config.d_ytrain)
    test_x = read_data(config.d_xtrain)

    train_x = train_x[num_cols]
    test_x = test_x[num_cols]

    scaler = StandardScaler(with_mean=True, with_std=True)
    scaler.fit(train_x)

    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)

    x_train, y_train, x_val, y_val = split(train_x, train_y)

    conn = choco.MongoDBConnection("mongodb://localhost:27017/")
    sampler = choco.Bayes(conn, param_grid, clear_db=True)
    token, params = sampler.next()

    loss = model_train(x_train, y_train, x_val, y_val, params)
    sampler.update(token, loss)
            self.validation_error = 100000.0

if __name__ == '__main__':
    if len(sys.argv) == 1:
        # original params from article
        model=LatentAttention(frac_train=0.99, n_z=20, batchsize=100,
                              learning_rate=0.001, max_epochs=10,
                              e_h1=16, e_h2=32, d_h1=32, d_h2=16, run_id=-1);
        model.train()
        print("loss={}".format(float(model.validation_error)))
        exit(0)

    # Params from optimizer
    search_space = {
        "n_z": choco.quantized_uniform(5, 100, 1),
        "learning_rate": choco.log(-20, -8, 2),
        "max_epochs": choco.quantized_uniform(5, 200, 1),
        "e_h1": choco.quantized_uniform(16, 256, 1),
        "e_h2": choco.quantized_uniform(16, 256, 1),
        "d_h1": choco.quantized_uniform(16, 256, 1),
        "d_h2": choco.quantized_uniform(16, 256, 1),
    }
    connection = choco.SQLiteConnection("sqlite:///no_labels_results.sqlite3")
    sampler = choco.Bayes(connection, search_space)
    token, sample = sampler.next()
    print("Parameters: {} Token: {}".format(sample, token))
    run_id = token['_chocolate_id']
    model = LatentAttention(0.99, batchsize=150, run_id=run_id, **sample)
    model.train()
    sampler.update(token, float(model.validation_error))
Ejemplo n.º 12
0
    'XGBClassifier': XGBClassifier,
    'RandomForestClassifier': RandomForestClassifier,
    'GaussianNB': GaussianNB,
    'KNeighborsClassifier': KNeighborsClassifier,
}

if __name__ == "__main__":
    datafn = 'HOUR_00024.csv'
    dbid = datafn.split('_')[1].split('.')[0]
    # dbid = datetime.datetime.now().strftime('%m%d%y%H%M%S')
    # dbid = 1

    N_RUNS = 1024
    N_PROC = 8

    datafn = os.path.join(DATA_PATH, 'hour', datafn)
    ids_fn = os.path.join(RAW_PATH, 'd_ids_split.pickle')

    trn_x, trn_y, tst_x, tst_y = load_or_gen_data(datafn, ids_fn)

    conn = choco.SQLiteConnection(url="sqlite:///hpo/hpo_%s.db" % str(dbid))
    # searcher = choco.Random(conn, space)
    searcher = choco.Bayes(conn, space)

    f = getProcFunc(conn, searcher)
    with mp.Pool(processes=N_PROC) as pool:
        pool.map(f, range(N_RUNS))

    df = conn.results_as_dataframe()
    df.to_csv("hpo/hpo_%s.csv" % str(dbid))