GDSC_drug_id,
                            GDSC_drug_response_df,
                            normalized_data_df[source_data_key])

y_values = y.values.astype(np.float32)
X_values = X.values.astype(np.float32)

## Cross-validation
# Splitting data
split_folds = KFold(n_splits=5, shuffle=True, random_state=random_state)
split_folds.get_n_splits()


# Import parameters
param_dict = load(open('./params/sakellaropoulos_param_grid.pkl', 'rb'))
parameter_grid = ParameterGrid(param_dict)

if GDSC_drug_name not in os.listdir(output_folder):
    os.mkdir(output_folder + GDSC_drug_name)
output_folder += GDSC_drug_name + '/'

# Cross-validation for each parameter in the grid
for param_idx, param in enumerate(parameter_grid):
    print('START PARAM NUMBER %s'%(param_idx))

    param['n_input'] = X_values.shape[1]
    network_folder = make_figure_folder(output_folder, param)

    y_predict_nn = np.zeros(y_values.shape[0])

    for split_idx, (train_index, valid_index) in enumerate(split_folds.split(X_values, y_values)):
Exemple #2
0
    def GridSearch(self,
                   batch_size_,
                   num_epochs,
                   random_state,
                   gmout="GridSearchResult"):
        """
        Run GridSearch to find best parameters.
        """
        # train_keys, test_keys = MDCTrainTestSplit(self.target, 0)
        # train_keys, test_keys = DISCTrainTestSplit(self.target)
        train_keys, test_keys = TrainTestSplit(list(self.target.keys()),
                                               test_size=0.20,
                                               random_state=random_state)
        print("Train set size: %d Test set size %d" %
              (len(train_keys), len(test_keys)))

        # train_steps_per_epoch = ceil(len(train_keys)/loat(batch_size_))
        # train_generator = self.DataGenerator(train_keys, batch_size_)

        x_train, y_train = self.GenData(train_keys)

        # This is unstable
        # test_steps_per_epoch = ceil(len(train_keys)/float(batch_size_))
        # test_generator = self.DataGenerator(test_keys, batch_size_)
        # This is more stable
        x_test, y_test = self.GenData(test_keys)

        # PARAMETER DEFINITIONS
        # simple architecture

        param = {}
        param["nunits"] = [200, 400, 800]
        param["ndense_layers"] = [2, 4, 6]
        # param["dropout"] = ["on", "off"]
        param["dropout"] = ["on"]
        # param["activation"] = ["relu", "leakyrelu"]
        param["activation"] = ["relu"]
        """
        # resnet architecture
        param = {}
        param["nunits"] = [200, 400,  600, 800]
        param["ndense_layers"] = [2, 4, 6]
        """

        all_combo = list(ParameterGrid(param))
        print("Evaluating %d combinations of parameters" % (len(all_combo)))

        already_computed_combo = []
        if Path(gmout).is_file():
            fi = open(gmout, "r")
            for line in fi:
                v = str.split(line.strip(), " ")
                # simple architecture
                units = v[0]
                layers = v[1]
                act = v[2]
                drop = v[3]
                s = ("%s-%s-%s-%s" % (units, layers, act, drop))
                """

                # resnet architecture
                units = v[0]
                layers = v[1]
                s = ("%s-%s" % (units, layers))
                """

                already_computed_combo.append(s)
            fi.close()

        for c in all_combo:
            # simple architecture
            s = ("%s-%s-%s-%s" % (c["nunits"], c["ndense_layers"],
                                  c["activation"], c["dropout"]))
            """
            # resnet architecture
            s = ("%s-%s" % (c["nunits"], c["ndense_layers"]))
            """

            if s in already_computed_combo:
                print("%s already computed... skip..." % (s))
            else:
                model = build_gridsearch_model(self.nfeatures,
                                               c["ndense_layers"], c["nunits"],
                                               c["activation"], c["dropout"])
                """
                model = build_dnn_resnet_model(self.nfeatures,
                                               c["nunits"],
                                               c["ndense_layers"])
                """

                print(model.summary())
                b = batch_size_
                """
                model_name = ("#b%d_#e%d_#u%d_#dl%d_act-%s_dp-%s" % (b,
                                                                     num_epochs,
                                                                     c["nunits"],
                                                                     c["ndense_layers"],
                                                                     c["activation"],
                                                                     c["dropout"]))
                """

                model_name = ("#b%d_#e%d_#u%d_#dl%d" %
                              (b, num_epochs, c["nunits"], c["ndense_layers"]))
                log_dir_ = ("./logs/%s" % (model_name))

                log_dir_ += time.strftime("%Y%m%d%H%M%S")

                model_output = "%s.h5" % (model_name)
                callbacks_ = [
                    TensorBoard(log_dir=log_dir_,
                                histogram_freq=0,
                                write_graph=False,
                                write_images=False),
                    ModelCheckpoint(model_output,
                                    monitor='val_loss',
                                    verbose=0,
                                    save_best_only=True)
                ]
                """
                callbacks_ = [TensorBoard(log_dir=log_dir_,
                                          histogram_freq=0,
                                          write_graph=False,
                                          write_images=False),
                              EarlyStopping(monitor='val_loss',
                                            min_delta=0,
                                            patience=50,
                                            verbose=0,
                                            mode='auto')]
                """

                model.fit(x_train,
                          y_train,
                          epochs=num_epochs,
                          batch_size=b,
                          verbose=self.verbose,
                          validation_data=(x_test, y_test),
                          callbacks=callbacks_)

                bestmodel = GetLoadModelFnc()(model_output)

                yrecalc_train = bestmodel.predict(x_train)
                """

                model.fit_generator(train_generator,
                                    steps_per_epoch=train_steps_per_epoch,
                                    epochs=num_epochs,
                                    verbose=1,
                                    validation_data=(x_test, y_test),
                                    # validation_data=test_generator,
                                    # validation_steps=test_steps_per_epoch,
                                    callbacks=callbacks_)


                yrecalc_train = []
                y_train = []
                for key in train_keys:
                    a = np.array([self.X_raw[key]])
                    yrecalc_train.extend(model.predict(a))
                    y_train.append(self.target[key])
                """
                ypred_test = bestmodel.predict(x_test)
                r2 = RSQ(y_train, yrecalc_train)
                mse_train = MSE(y_train, yrecalc_train)
                mae_train = MAE(y_train, yrecalc_train)
                q2 = RSQ(y_test, ypred_test)
                mse_test = MSE(y_test, ypred_test)
                mae_test = MAE(y_test, ypred_test)
                print("R2: %.4f  Q2: %.4f" % (r2, q2))

                fo = open("%s" % (gmout), "a")
                """
                # simple architecture
                fo.write("%d %d %s %s %f %f %f %f %f %f %f %f\n" % (c["nunits"],
                                                                    c["ndense_layers"],
                                                                    c["activation"],
                                                                    c["dropout"],
                                                                    mse_train,
                                                                    mae_train,
                                                                    r2,
                                                                    train_score,
                                                                    mse_test,
                                                                    mae_test,
                                                                    q2,
                                                                    test_score))
                """
                # resnet architecture
                fo.write("%d %d %f %f %f %f %f %f\n" %
                         (c["nunits"], c["ndense_layers"], mse_train,
                          mae_train, r2, mse_test, mae_test, q2))
                fo.close()
Exemple #3
0
# Necessary to add cwd to path when script run
# by SLURM (since it executes a copy)
sys.path.append(os.getcwd())

df = pd.read_csv('notes.csv') #2,083,180 notes
df = df[~(df['iserror'] == 1)] #886 notes with errors

termsToRemove = ['Admission Date:', 'Discharge Date:', 'Service:', 'ADDENDUM:', 'Dictated By:', 'Completed by:', 'D:', 'T:', 'JOB#:', '\?\?\?\?\?\?', 'INTERPRETATION:', 'Findings', 'Attending Physician:', 'Referral date:']

texts = df['text'].str.replace(r'(%s)'%('|'.join(termsToRemove)),'').str.replace(r'\[\*\*.*?\*\*\]', '')
toks = texts.str.lower().apply(nltk.word_tokenize).values
print(sum(map(len, toks)), 'tokens')

#param_grid = {'size': [100, 200, 300],
#        'min_count': [5, 10, 50],
#        'window': [1, 5, 20]}

param_grid = {'size': [200, 300],
        'min_count': [2,3,4],
        'window': [20]        
        }

for i in ParameterGrid(param_grid):
    #skipgrams
    model = gensim.models.Word2Vec(toks, sg=1, workers = 80, seed =42, **i)
    pickle.dump(model, open('embeddings/%sd_%sc_%sw'%(i['size'], i['min_count'], i['window']), 'wb'))



Exemple #4
0
        # },
        # {
        #     "h_sizes": [80, 80, 80],
        #     "dense_size": 40,
        #     "pos_emb_size": 50,
        #     "cnn_win_size": 7,
        #     "suffix_prefix_dims": 70,
        #     "suffix_prefix_buckets": 3000,
        #     "target_emb_dim": 25,
        #     "mention_emb_dim": 25,
        # }
    ],
    "learning_rate": [0.0001, 0.001, 0.00001],
    "learning_rate_decay": [0.998]  # 0.991
}


def flatten(params):
    new_params = {}
    for key, val in params.items():
        if type(val) is dict:
            new_params.update(val)
        else:
            new_params[key] = val
    return new_params


cnn_params = list(map(flatten, ParameterGrid(cnn_params_grids)))

att_params = list(map(flatten, ParameterGrid(att_params_grids)))
Exemple #5
0
 def _get_param_iterator(self):
     return ParameterGrid(self.param_grid)
from utils import df2mapk

EPOCH_NUM = 1

PARAM_SEARCHED = [
    {
        'a': [0.01, 0.03, 0.1],
        'b': [1.0],  # based on convention
        'l1': [8.0, 10.0, 13.0],
        'l2': [1.0],
        'n': [2**29],
        'interaction': [True],
        'epoch': [1],
    },
]
FTRL_PARAM = list(ParameterGrid(PARAM_SEARCHED))
#     {'a': 0.1, 'b': 1., 'l1': 1., 'l2': 0.001, 'n': 2**24, 'epoch': EPOCH_NUM, 'interaction': False}


def main():
    data_fname_base = sys.argv[1]
    subtrain_fname = '../subtrain_' + data_fname_base
    subtrain_id_fname = subtrain_fname + '.id'
    validation_fname = '../validation_' + data_fname_base
    validation_id_fname = validation_fname + '.id'
    test_fname = '../test_' + data_fname_base
    test_id_fname = test_fname + '.id'

    print('Load subtrain data...')
    x_subtrain, y_subtrain = load_svmlight_file(subtrain_fname)
    print('Load validation data...')
Exemple #7
0
    iImage = iImage.resize(Resize)
    iImage = numpy.array(iImage)
    iImage = iImage.astype("float32")
    iImage = iImage / 255
    Valid["Image"].append(iImage)
Valid["Image"] = numpy.array(Valid["Image"])
Valid["EncodeLabelCode"] = to_categorical(Valid["Label"])
Valid["Variable"] = Valid["Table"][Variable]
################################################################################
##  Parameter control
Parameter = {}
Parameter["Batch"] = [16, 16, 16]
Parameter["Epoch"] = [50]
Parameter["LearnRate"] = [0.0001,0.0001]
Parameter["Optimizer"] = ["Adam"]
Parameter = list(ParameterGrid(Parameter))
##
##  Tune result
TuneResult = {}
TuneResult["Batch"] = []
TuneResult["Epoch"] = []
TuneResult["LearnRate"] = []
TuneResult["TrainAUC"] = []
TuneResult["TrainAccuracy"] = []
TuneResult["ValidAUC"] = []
TuneResult["ValidAccuracy"] = []
##
##  The better model
TheBetterModel = "Empty"
print("Prepare!")
################################################################################
def optimize_coefficients(num_coeff=3,
                          loss_func=None,
                          phi=1.0,
                          max_cost=2.0,
                          search_per_coeff=4,
                          sort_by_loss=False,
                          save_coeff=True,
                          tol=None,
                          verbose=True):
    """
    Computes the possible values of any number of coefficients,
    given a cost function, phi and max cost permissible.

    Takes into account the search space per coefficient
    so that the subsequent grid search does not become
    prohibitively large.

    # Arguments:
        num_coeff: number of coefficients that must be optimized.
        cost_func: coefficient cost function that minimised to
            satisfy the least squares solution. The function can
            be user defined, in which case it must accept a numpy
            vector of length `num_coeff` defined above. It is
            suggested to use MSE against a pre-refined `max_cost`.
        phi: The base power of the parameters. Kept as 1
            for initial search of base parameters.
        max_cost: The maximum cost of permissible. User
            defined constant generally set to 2.
        search_per_coeff: int declaring the number of values tried
            per coefficient. Constructs a search space of size
            `search_per_coeff` ^ `num_coeff`.
        sort_by_loss: bool. Whether to sort the result set by its loss
            value, in order of lowest loss first.
        save_coeff: bool, whether to save the resulting coefficients
            into the file `param_coeff.npy` in current working dir.
        tol: float tolerance of error in the cost function. Used to
            select candidates which have a cost less than the tolerance.
        verbose: bool, whether to print messages during execution.

    # Returns:
        A numpy array of shape [search_per_coeff ^ num_coeff, num_coeff],
        each row defining the value of the coefficients which minimise
        the cost function satisfactorily (to some machine precision).
    """
    phi = float(phi)
    max_cost = float(max_cost)
    search_per_coeff = int(search_per_coeff)

    # if user defined cost function is not provided, use the one from
    # the paper in reference.
    if loss_func is None:
        loss_func = get_compound_coeff_func(phi, max_cost)

    # prepare inequality constraints
    ineq_constraints = {'type': 'ineq', 'fun': lambda x: x - 1.}

    # Prepare a matrix to store results
    num_samples = search_per_coeff**num_coeff
    param_range = [num_samples, num_coeff]

    # sorted by ParameterGrid acc to its key value, assuring sorted
    # behaviour for Python < 3.7.
    grid = {
        i: np.linspace(1.0, max_cost, num=search_per_coeff)
        for i in range(num_coeff)
    }

    if verbose:
        print("Preparing parameter grid...")
        print("Number of parameter combinations :", num_samples)

    param_grid = ParameterGrid(grid)

    if _joblib_available:
        with Parallel(n_jobs=-1, verbose=10 if verbose else 0) as parallel:
            param_set = parallel(
                delayed(_joblib_optimize)(param, loss_func, num_coeff,
                                          ineq_constraints)
                for param in param_grid)

        param_set = np.asarray(param_set)
    else:
        if verbose and num_samples > 1000:
            print("Consider using `joblib` library to speed up sequential "
                  "computation of {} combinations of parameters".format(
                      num_samples))

        param_set = np.zeros(param_range)
        param_set = _sequential_optimize(param_grid,
                                         param_set,
                                         loss_func,
                                         num_coeff=num_coeff,
                                         ineq_constraints=ineq_constraints,
                                         verbose=verbose)

    # compute a minimum tolerance of the cost function
    # to select it in the candidate list.
    if tol is not None:
        if verbose:
            print("Filtering out samples below tolerance threshold...")

        tol = float(tol)
        cost_scores = np.asarray([loss_func(xi) for xi in param_set])
        param_set = param_set[np.where(cost_scores <= tol)]
    else:
        cost_scores = None

    # sort by lowest loss first
    if sort_by_loss:
        if verbose:
            print("Sorting by loss...")

        if cost_scores is None:
            cost_scores = ([loss_func(xi) for xi in param_set])
        else:
            cost_scores = cost_scores.tolist()

        cost_scores_id = [(idx, loss) for idx, loss in enumerate(cost_scores)]
        cost_scores_id = sorted(cost_scores_id, key=lambda x: x[1])

        ids = np.asarray([idx for idx, loss in cost_scores_id])
        # reorder the original param set
        param_set = param_set[ids, ...]

    if save_coeff:
        np.save('param_coeff.npy', param_set)

    return param_set
Exemple #9
0
def optimize_coefficients(num_coeff=3, cost_func=None, phi=1.0, max_cost=2.0,
                          search_per_coeff=4, save_coeff=True, tol=None):
    """
    Computes the possible values of any number of coefficients,
    given a cost function, phi and max cost permissible.

    Takes into account the search space per coefficient
    so that the subsequent grid search does not become
    prohibitively large.

    # Arguments:
        num_coeff: number of coefficients that must be optimized.
        cost_func: coefficient cost function that minimised to
            satisfy the least squares solution. The function can
            be user defined, in which case it must accept a numpy
            vector of length `num_coeff` defined above. It is
            suggested to use MSE against a pre-refined `max_cost`.
        phi: The base power of the parameters. Kept as 1
            for initial search of base parameters.
        max_cost: The maximum cost of permissible. User
            defined constant generally set to 2.
        search_per_coeff: int declaring the number of values tried
            per coefficient. Constructs a search space of size
            `search_per_coeff` ^ `num_coeff`.
        save_coeff: bool, whether to save the resulting coefficients
            into the file `param_coeff.npy` in current working dir.
        tol: float tolerance of error in the cost function. Used to
            select candidates which have a cost less than the tolerance.

    # Returns:
        A numpy array of shape [search_per_coeff ^ num_coeff, num_coeff],
        each row defining the value of the coefficients which minimise
        the cost function satisfactorily (to some machine precision).
    """
    phi = float(phi)
    max_cost = float(max_cost)
    search_per_coeff = int(search_per_coeff)

    # if user defined cost function is not provided, use the one from
    # the paper in reference.
    if cost_func is None:
        cost_func = get_compound_coeff_func(phi, max_cost)

    # prepare inequality constraints
    ineq_constraints = {
        'type': 'ineq',
        'fun': lambda x: x - 1.
    }

    # Prepare a matrix to store results
    param_range = [search_per_coeff ** num_coeff, num_coeff]
    param_set = np.zeros(param_range)

    # sorted by ParameterGrid acc to its key value, assuring sorted
    # behaviour for Python < 3.7.
    grid = {i: np.linspace(1.0, max_cost, num=search_per_coeff)
            for i in range(num_coeff)}

    param_grid = ParameterGrid(grid)
    for ix, param in enumerate(param_grid):
        # create a vector for the cost function and minimise using SLSQP
        x0 = np.array([param[i] for i in range(num_coeff)])
        res = minimize(cost_func, x0, method='SLSQP', constraints=ineq_constraints)
        param_set[ix] = res.x

    # compute a minimum tolerance of the cost function
    # to select it in the candidate list.
    if tol is not None:
        tol = float(tol)
        cost_scores = np.array([cost_func(xi) for xi in param_set])
        param_set = param_set[np.where(cost_scores <= tol)]

    if save_coeff:
        np.save('param_coeff.npy', param_set)

    return param_set
    show_decision_boundary(rp.clf, 'Decision boundary for LogisticRegression (+rankpruning) trained with noisy labels.\n Test Accuracy: ' + str(round(rp_score, 3)))
except:
    print("Plotting is only supported in an iPython interface.")


# In[5]:


param_grid = {
    "prune_method": ["prune_by_noise_rate", "prune_by_class", "both"],
    "converge_latent_estimates": [True, False],
}

# Fit LearningWithNoisyLabels across all parameter settings.
from sklearn.model_selection import ParameterGrid
params = ParameterGrid(param_grid)
scores = []
for param in params:
    clf = LogisticRegression(solver = 'lbfgs', multi_class = 'auto')
    rp = LearningWithNoisyLabels(clf = clf, **param)
    _ = rp.fit(X_train, s) # s is the noisy y_train labels
    scores.append(accuracy_score(rp.predict(X_test), y_test))

# Print results sorted from best to least
for i in np.argsort(scores)[::-1]:
    print("Param settings:", params[i])
    print(
        "Accuracy (using confident learning):\t", 
        round(scores[i], 2),
        "\n"
    )
def tune(x_train, y_train_orig, cols):
    logger.info('{}'.format(cols))
    x_train = x_train[cols].values  # [:, FEATURE]
    if IS_LOG:
        y_train = np.log1p(y_train_orig)
    else:
        y_train = y_train_orig

    logger.info('x_shape: {}'.format(x_train.shape))
    # x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=4242)
    all_params = {
        'eta': [0.05],
        'max_depth': [5],
        'subsample': [0.7],
        'colsample_bytree': [0.7],
        'objective': ['reg:linear'],
        #'eval_metric': [['rmse', rmsel_metric]],
        'silent': [1]
    }
    min_score = (100, 100, 100)
    min_params = None
    use_score = 0
    cv = np.arange(x_train.shape[0])

    for params in list(ParameterGrid(all_params)):
        #cv = TimeSeriesSplit(n_splits=5).split(x_train)
        cnt = 0
        list_score = []
        list_score2 = []
        list_best_iter = []
        all_pred = np.zeros(y_train.shape[0])
        for train, test in [[cv[:-VALID_NUM], cv[-VALID_NUM:]]]:
            trn_x = x_train[train]
            val_x = x_train[test]

            trn_y = y_train[train]
            val_y = y_train[test]

            dtrain = xgb.DMatrix(trn_x, trn_y)
            dtest = xgb.DMatrix(val_x, val_y)

            clf = xgb.train(
                params,
                dtrain,
                feval=rmsel_metric,
                evals=[(dtest, 'val')],
                num_boost_round=1000,  # 384,
                early_stopping_rounds=100)
            pred = clf.predict(dtest)
            all_pred[test] = pred

            _score = rmsel(val_y, pred)
            _score2 = rmse(
                val_y,
                pred)  # np.exp(pred) - 1)  # - roc_auc_score(val_y, pred)
            # logger.debug('   _score: %s' % _score)
            list_score.append(_score)
            list_score2.append(_score2)
            if clf.best_iteration != -1:
                list_best_iter.append(clf.best_iteration)
            else:
                list_best_iter.append(params['n_estimators'])

        # with open('tfidf_all_pred2_7.pkl', 'wb') as f:
        #    pickle.dump(all_pred, f, -1)

        logger.info('trees: {}'.format(list_best_iter))
        params['n_estimators'] = np.mean(list_best_iter, dtype=int)
        score = (np.mean(list_score), np.min(list_score), np.max(list_score))
        score2 = (np.mean(list_score2), np.min(list_score2),
                  np.max(list_score2))

        logger.info('param: %s' % (params))
        logger.info('loss: {} (avg min max {})'.format(score[use_score],
                                                       score))
        logger.info('score: {} (avg min max {})'.format(
            score2[use_score], score2))
        if min_score[use_score] > score[use_score]:
            min_score = score
            min_score2 = score2
            min_params = params
        logger.info('best score: {} {}'.format(min_score[use_score],
                                               min_score))
        logger.info('best score2: {} {}'.format(min_score2[use_score],
                                                min_score2))
        logger.info('best_param: {}'.format(min_params))

    gc.collect()
    return min_score[use_score]
Exemple #12
0
    'DROPOUT': [0.2],
    'ACTIVATION2': ['relu'],
    'OPTIMIZER': ['Adam']
}

counter = 0
grid_results = pd.DataFrame(columns=[
    'counter', 'col_selection', 'HISTORY', 'STEP_FACTOR', 'LEVEL', 'WAVELET',
    'FILTERS1', 'KERNEL_SIZE1', 'PADDING1', 'DILATION_RATE1', 'ACTIVATION1',
    'CONV2', 'POOL_SIZE1', 'CONV_POOL2', 'DENSE_UNITS', 'DROPOUT',
    'ACTIVATION2', 'OPTIMIZER', 'mse', 'mae', 'train_samples', 'test_samples',
    'train_time'
])
output_path = results_path + 'gridsearch_results.csv'

param_grid = ParameterGrid(parameters)
for dict_ in param_grid:
    start = time()
    conv_mse, mae, train_shape, test_shape, predictions = grid_search(
        df, dict_['col_selection'], dict_['HISTORY'], dict_['STEP_FACTOR'],
        dict_['LEVEL'], dict_['WAVELET'], dict_['FILTERS1'],
        dict_['KERNEL_SIZE1'], dict_['PADDING1'], dict_['DILATION_RATE1'],
        dict_['ACTIVATION1'], dict_['CONV2'], dict_['POOL_SIZE1'],
        dict_['CONV_POOL2'], dict_['DENSE_UNITS'], dict_['DROPOUT'],
        dict_['ACTIVATION2'], dict_['OPTIMIZER'], counter)
    train_time = round(time() - start, 2)
    grid_results = grid_results.append(
        {
            'counter': counter,
            'col_selection': dict_['col_selection'],
            'HISTORY': dict_['HISTORY'],
    def find_best_params_GridSearchCV(self, gridsearch, X_local, y_local):
        """
        Find best set of parameters given a grid-search round 
        across various folds.
        """
        """ Generates all combination of hyperparameters """
        hyperparam_space = list(ParameterGrid(gridsearch))

        if len(hyperparam_space) == 1:
            return hyperparam_space[0]
        """ Splits local folds """
        kf = KFold(n_splits=self.cross_validation)
        kf.get_n_splits(self.X_train)

        folds = []
        for train_index, test_index in kf.split(X_local):
            folds.append([pd.Series(train_index), pd.Series(test_index)])

        if self.verbose:
            n_folds = len(folds)
            n_combinations = len(hyperparam_space)

            msg = "Fitting %s models...\n"
            print(msg % (n_combinations))
        """ Performs gridsearch """
        #Stores performance, Stores classification reports
        performance = []
        for params_it, params in enumerate(hyperparam_space):
            time_start = time.time()

            #Evaluation rounds
            local_results = []
            for fold_it, fold in enumerate(folds):
                X_train = X_local.iloc[fold[0]]
                X_test = X_local.iloc[fold[1]]
                y_train = y_local.iloc[fold[0]]
                y_test = y_local.iloc[fold[1]]

                params['eval_set'] = [(X_test, y_test)]

                alg = xgbw.XGBWrapper(**params)
                alg.fit(X_train, y_train)

                pred_test = alg.predict(X_test)

                local_report = class_report(y_true=y_test, y_pred=pred_test)

                local_results.append(local_report)

            #Stores performance evaluation given the performance-policy
            metric, statistic = self.performance_politic.split('__')
            local_performance = []

            for local_report in local_results:
                local_report = local_report.drop('Global')
                metric_results = local_report[metric]

                if statistic == 'min':
                    metric_stat = metric_results.min()
                elif statistic == 'max':
                    metric_stat = metric_results.max()
                elif statistic == 'mean':
                    metric_stat = metric_results.mean()

                local_performance.append(metric_stat)

            local_performance = pd.Series(local_performance)
            performance.append(local_performance)

            time_end = time.time()
            elapsed_time = (time_end - time_start)

            if self.gridsearch_verbose:
                msg = "%s of %s - %s: %s  - %s s" % (
                    (params_it + 1), n_combinations, self.performance_politic,
                    round(local_performance.mean(), 4), round(elapsed_time, 2))

                print(msg)

                for param_name in params.keys():
                    if param_name != 'eval_set':
                        msg = "\t%s: %r" % (param_name, params[param_name])
                        if self.verbose:
                            print(msg)
                print('')

        performance = pd.DataFrame(performance)

        mean_performance = performance.mean(axis=1)
        idx_best = mean_performance.idxmax()
        best_parameters = hyperparam_space[idx_best]

        return best_parameters
    def run_gridsearch(self, cv, cv_score: str) -> None:
        """
		Performs a gridsearch over the tuning hyperparameters. Determines the 
		best hyperparameters based on the average validation performance 
		calculated over cross-validation folds.
		:param cv: A cross-validarion generator that determines the 
		cross-validation strategy.
		:param cv_score: Measure to evaluate predictions on the validation set. 
		"""
        # Setting fixed parameters
        params = self.fixed_params.copy()

        # Fix seed
        np.random.seed(1)
        tf.set_random_seed(2)

        # Start Gridsearch
        best_AUC = 0.5
        for tune in ParameterGrid(self.tuning_params):
            params.update(tune)

            AUC_val = []
            for train, val in cv.split(self.X_tr, self.y_tr):
                X_train, y_train = self.X_tr.iloc[train], self.y_tr.iloc[train]
                X_val, y_val = self.X_tr.iloc[val], self.y_tr.iloc[val]

                e_stop = EarlyStopping(
                    monitor=params["monitor"],
                    min_delta=params["min_delta"],
                    patience=params["iter_patience"],
                    mode=params["mode"],
                )
                callbacks = [e_stop]
                optimizer = eval("keras.optimizers." + params["optimizer"])(
                    lr=params["learning_rate"]
                )

                model = Sequential()
                model.add(
                    Dense(
                        params["num_neurons"],
                        input_dim=len(list(self.X_tr)),
                        kernel_initializer=params["weight_init"],
                        activation=params["hidden_activation"],
                        kernel_regularizer=keras.regularizers.l1(params["l1_ratio"]),
                    )
                )
                model.add(Dropout(params["dropout_rate"]))
                model.add(
                    Dense(
                        1,
                        kernel_initializer=params["weight_init"],
                        activation=params["out_activation"],
                        kernel_regularizer=keras.regularizers.l1(params["l1_ratio"]),
                    )
                )

                model.compile(loss=params["loss_func"], optimizer=optimizer)

                history = model.fit(
                    X_train,
                    y_train,
                    callbacks=callbacks,
                    validation_data=(X_val, y_val),
                    epochs=params["epochs"],
                    batch_size=params["batch_size"],
                    verbose=0,
                )

                validation_AUC = calc_perf_score(
                    data=X_val,
                    labels=np.array(y_val.astype("float")),
                    model=model,
                    model_name=self.name,
                    score_name=cv_score,
                )
                AUC_val.append(validation_AUC)

            AUC_val = np.mean(AUC_val)

            if AUC_val > best_AUC:
                best_AUC = AUC_val
                self.best_tuning_params = tune

            keras.backend.clear_session()
Exemple #15
0
			"8,4,2", "8,2,2"
		],
		"rnns": [
			"2000,2000",
			"1000,1000",
			"1000"
		],
		"mlps": [		# Last layer will automatically be added to map toward a q_level space
			"1000,1000",
			"2000,2000"
		],
		"emb_size": [256],
		"q_levels": [256],
	}

	hparams = list(ParameterGrid(hparams))

	for index, hparam in enumerate(hparams):
		
		print("Training config {:d}".format(index))

		# Create folder
		param_folder = LOGDIR_ROOT + '/' + str(index)
		os.mkdir(param_folder)

		# Write config
		with open(param_folder + '/hparam.json', 'w') as fp:
			json.dump(hparam, fp)

		main_command = """python main.py \\
	--batch_size=64 \\
Exemple #16
0
            data[key] = load_all_data(
                basedir=BASEDIR,
                class_one=CLASS_ONE,
                class_two=CLASS_TWO,
                ending=ENDING,
            )
        else:
            logger.info("data was pre-built, continuing")

        data_train = data[key][job["data_prefix"] + "train"]
        data_test = data[key][job["data_prefix"] + "test"]

        labels_train = data[key]["labels_train"]
        labels_test = data[key]["labels_test"]

        t_hparam_grid = ParameterGrid(training_hparams)
        m_hparam_grid = ParameterGrid(model_hparams)

        logger.info("selecting {} combinations".format(nb_selections))

        candidates = list(product(list(t_hparam_grid), list(m_hparam_grid)))

        hparam_grid = random.sample(population=candidates, k=nb_selections)

        # if nb_proc > 1:
        for count, (t_hparams, m_hparams) in enumerate(hparam_grid):
            logger.info("performing grid element {} of {}".format(
                count + 1, nb_selections))

            train_caloclf_model(
                model_fn=model_fn,
Exemple #17
0
    # Read arguments.
    if len(sys.argv) > 1:
        run_from_args()
    else:
        from sklearn.model_selection import ParameterGrid

        # Get task grid.
        Edit_Cost_List = ['BIPARTITE', 'IPFP']
        Dataset_list = [
            'Alkane_unlabeled', 'Acyclic', 'Chiral', 'Vitamin_D', 'Steroid'
        ]
        Dis_List = ['euclidean', 'manhattan']
        task_grid = ParameterGrid({
            'edit_cost': Edit_Cost_List[0:1],
            'dataset': Dataset_list[1:2],
            'distance': Dis_List[:]
        })

        unlabeled = False  # @todo: Not actually used.
        mode = 'reg'
        # Run.
        for task in list(task_grid):
            print()
            print(task)

            output_result = 'outputs/results.' + '.'.join([
                task['dataset'], task['edit_cost'], task['distance']
            ]) + '.pkl'
            if not os.path.isfile(output_result):
                run_xp(task['dataset'], output_result, unlabeled, mode,
Exemple #18
0
    from sklearn.cross_validation import train_test_split
    #{'colsample_bytree': 0.7, 'max_bin': 255, 'seed': 2261, 'n_estimators': 8488, 'min_child_samples': 10, 'learning_rate': 0.01, 'max_depth': 10, 'boosting_type': 'gbdt', 'reg_alpha': 1, 'reg_lambda': 1, 'min_child_weight': 3, 'num_leaves': 200, 'min_split_gain': 0, 'subsample': 0.9}
    # x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=4242)
    all_params = {
        'C': [0.1],
        'n_jobs': [-1],  # [0.06, 0.1, 0.2],
        'solver': ['sag'],
        'random_state': [2261]
    }
    min_score = (100, 100, 100)
    min_params = None
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=871)
    use_score = 0

    for params in ParameterGrid(all_params):
        list_score = []
        list_score2 = []
        list_best_iter = []
        all_pred = np.zeros(y_train.shape[0])
        for train, test in cv.split(x_train, y_train):
            trn_x = x_train[train]
            val_x = x_train[test]
            trn_y = y_train[train]
            val_y = y_train[test]
            trn_w = sample_weight[train]
            val_w = sample_weight[test]

            clf = LogisticRegression(**params)
            # pred_x = cross_val_predict(reg, trn_x, trn_y, cv=5, n_jobs=-1)
            # trn_x = np.c_[trn_x, pred_x]
Exemple #19
0
def build_a_complex_brain():
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.feature_extraction.text import TfidfTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import cross_val_score
    from sklearn.model_selection import GridSearchCV
    from sklearn.model_selection import ParameterGrid
    from sklearn.ensemble import VotingClassifier # Voting will help us win the competition! We can combine the top models together!

    # Models we will use: (TODO: Read the documentation and come up with better Classifiers and Parameters)
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.neural_network import MLPClassifier
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.tree import ExtraTreeClassifier
    from sklearn.ensemble import ExtraTreesClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import NearestCentroid
    from sklearn.neighbors import RadiusNeighborsClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import RidgeClassifier
    from sklearn.svm import NuSVC
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.linear_model import SGDClassifier
    from sklearn.ensemble import AdaBoostClassifier
    # Models end
    # All the models we are going to use
    candidate_models = [
        AdaBoostClassifier,
        # SGDClassifier,
        MultinomialNB,
        BernoulliNB,
        DecisionTreeClassifier,
        ExtraTreeClassifier,
        ExtraTreesClassifier,
        KNeighborsClassifier,
        # LinearSVC,
        LogisticRegression,
        NearestCentroid,
        RandomForestClassifier,
        RidgeClassifier,
        # NuSVC,
        GradientBoostingClassifier,
    ]
    # The parameters that could pass to model
    candidate_parameters = {
        "SGDClassifier":            {"max_iter": [10, 100], "tol" : [1e-3], "loss": ["log", "modified_huber"]}, 
        "ExtraTreesClassifier":     {"n_estimators": [20, 200]},
        "LinearSVC":                {"max_iter": [200, 2000], "multi_class": ["crammer_singer"]},
        "LogisticRegression":       {"solver": ["lbfgs"], "multi_class": ["auto"]},
        "RandomForestClassifier":   {"n_estimators": [10,30]},
        "NuSVC":                    {"gamma": ["scale", "auto"]},
    }
    # Build the model
    brains = []
    for this_model in candidate_models:
        if this_model.__name__ in candidate_parameters:
            parameter_grid = candidate_parameters[this_model.__name__]
            parameters = list(ParameterGrid(parameter_grid)) # make grid for parameter-pairs
            for i, parameter in enumerate(parameters):
                brain = Pipeline([
                    ("vect", CountVectorizer()),
                    ("tfidf", TfidfTransformer()),
                    ("clf", this_model(**parameter)),
                ])
                brains.append( (f"{this_model.__name__}-{i}", brain) )
        else:
            brain = Pipeline([
                ("vect", CountVectorizer()),
                ("tfidf", TfidfTransformer()),
                ("clf", this_model()),
            ])
            brains.append( (f"{this_model.__name__}", brain) )
    brain = VotingClassifier( estimators=brains, voting="hard" )
    return brain
Exemple #20
0
    # x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=4242)
    all_params = {
        'eta': [0.05],
        'max_depth': [5],
        'subsample': [0.7],
        'colsample_bytree': [0.7],
        'objective': ['reg:linear'],
        #'eval_metric': [['rmse', rmsel_metric]],
        'silent': [1]
    }
    min_score = (100, 100, 100)
    min_params = None
    use_score = 0
    cv = np.arange(x_train.shape[0])

    for params in tqdm(list(ParameterGrid(all_params))):
        #cv = TimeSeriesSplit(n_splits=5).split(x_train)
        cnt = 0
        list_score = []
        list_score2 = []
        list_best_iter = []
        all_pred = np.zeros(y_train.shape[0])
        for train, test in [[cv[:-VALID_NUM], cv[-VALID_NUM:]]]:
            trn_x = x_train[train]
            val_x = x_train[test]

            trn_y = y_train[train]
            val_y = y_train[test]

            dtrain = xgb.DMatrix(trn_x, trn_y)
            dtest = xgb.DMatrix(val_x, val_y)
    args = parser.parse_args()
    random.seed(args.seed)
    np.random.seed(args.seed)
    # Generate a grid of hyperparameters
    param_grid_base = {
        "lr": [1e-3, 1e-4, 1e-5],
        "batch_size": [128, 256, 512],
        "drop_prob": [0.0, 0.25, 0.5, 0.75],
        "num_epochs": [150],
        "num_hidden": [1, 2, 3],
        "hidden_dim": [128, 256],
        "gamma": [1.0],
        "early_stopping": [True],
        "early_stopping_patience": [10],
    }
    the_grid = list(ParameterGrid(param_grid_base))
    np.random.shuffle(the_grid)
    the_grid = the_grid[:args.grid_size]
    print(args.grid_size)
    for task in args.tasks:
        # the_grid = [{**args.__dict__, **x} for x in the_grid]
        grid_df = pd.DataFrame(the_grid)
        config_path = os.path.join(args.data_path, "experiments",
                                   args.experiment_name, "config", task)
        os.makedirs(config_path, exist_ok=True)
        grid_df.to_csv(os.path.join(config_path, "config.csv"),
                       index_label="id")

        for i, config_dict in enumerate(the_grid):
            config_dict["label_col"] = task
            yaml_write(config_dict,
Exemple #22
0
        "draw": SP_DRAW,
        "print_train":
        SP_PRINT_TRAIN,  # 0: nothing, 1 : full detail, 2: short version
    }
    paras_name = "hs_{}-ep_{}-act_{}".format(item["hidden_size"],
                                             item["epoch"], item["activation"])
    root_hybrid_paras = {
        "hidden_size": item["hidden_size"],
        "activation": item["activation"],
        "epoch": item["epoch"],
        "domain_range": item["domain_range"],
        "paras_name": paras_name
    }
    two_paras = {"epoch": item["epoch"], "pop_size": item["pop_size"]}
    md = OTwoElm(root_base_paras=root_base_paras,
                 root_hybrid_paras=root_hybrid_paras,
                 two_paras=two_paras)
    md._running__()


for _ in range(SP_RUN_TIMES):
    for loop in range(len(SP_DATA_FILENAME)):
        filename = SP_LOAD_DATA_FROM + SP_DATA_FILENAME[loop]
        dataset = load_dataset(filename, cols=SP_DATA_COLS[loop])
        feature_size = len(SP_DATA_COLS[loop])
        multi_output = SP_DATA_MULTI_OUTPUT[loop]
        output_index = SP_OUTPUT_INDEX[loop]
        # Create combination of params.
        for item in list(ParameterGrid(param_grid)):
            train_model(item)
Exemple #23
0
listValidImage = []
for index, data in dataValid.iterrows():
    strResult = str(data["result"])
    strImage = str(data["id"]) + ".jpg"
    objectImage = pil.open("Holdout/Valid/Image/" + strResult + "/" + strImage).resize(tupleResize)
    arrayImage  = numpy.array(objectImage) / 255
    listValidImage.append(arrayImage)
arrayValidImage = numpy.array(listValidImage).astype("float32")
##
##  Parameter
dictParameter = {}
dictParameter["Batch"] = [64]
dictParameter["Epoch"] = [2, 2, 2]
dictParameter["Eta"] = [0.0001]
dictParameter["Optimizer"] = ["Adam"]
listParameter = list(ParameterGrid(dictParameter))
##
##  Result
dictTuneResult = TuneResult()
strResultPath  = strOutputPath + str.split(str(timeit.default_timer()), ".")[1] + "/"
os.makedirs(strResultPath)
##
##  Model
for i, p in enumerate(listParameter):
    ##
    ##  Initial
    floatStart = timeit.default_timer()
    random.seed(2)
    numpy.random.seed(2018)
    tensorflow.set_random_seed(2018)
    os.environ['PYTHONHASHSEED'] = "1"
 def create_parameter_grid(param_dict):
     from sklearn.model_selection import ParameterGrid
     return ParameterGrid(param_dict)
Exemple #25
0
    def _get_params(self):
        """Parameters to pass to `fit`.

        By default, a GridSearch over ``self.parameters`` is used.
        """
        return ParameterGrid(self.parameters)
Exemple #26
0
from sklearn.model_selection import ParameterGrid
import tensorflow as tf

from models.gans.mnist import generator_forward
from models.tasks.mnist import classifier_forward
from tasks.semi_classify import run_task
from utils.data_utils import MNISTLoader

NAME_STYLE = "pub_%(public_num)d_final_%(gen_frac_final).1f_step_%(gen_frac_step)d"

num_images = 50000

GRID = ParameterGrid({
        "gen_frac_init": [0.00],
        "gen_frac_final": [0.0, 0.1, 0.2, 0.4, 0.5, 0.6, 0.8, 0.9, 1.0],
        "gen_frac_step": [0, 100, 200, 400, 500, 600, 800],
        "public_num": [100, 250, 500, 750, 1000],
    }
)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("model_path", metavar="MODELPATH")
    parser.add_argument("--log-dir", dest="log_dir")
    parser.add_argument("--save-dir", dest="save_dir")
    parser.add_argument("--data-dir", dest="data_dir", default="/tmp/mnist")
    parser.add_argument("--batch-size", dest="batch_size", type=int, default=128)
    parser.add_argument("-e", "--num-epoch", dest="num_epoch", type=int, default=10)
    parser.add_argument("--dim", dest="dim", default=64, type=int)
    parser.add_argument("--public-num", dest="public_num", type=int, default=50000)
Exemple #27
0
def basic_gridsearch(ex,
                     grid,
                     param_drops=None,
                     verbose=2,
                     handle_completed_state=True,
                     logging='WARNING'):
    """Basic gridsearch for a sacred experiment.

    Given an experiment and a parameter grid, this will iterate over all possible combinations of parameters specified
    in the grid. In an iteration, the experiment configuration is updated and the experiment is run.

    Args:
        ex (sacred.Experiment): A sacred experiment.
        grid (dict, list): Parameter grid, analogous setup to as with sklearn gridsearches. If a list is specified then
            it will assume it is from a restart and continue as normal.
        param_drops (list): A list of functions that take one argument that acts on params and drops certain options
            from being run.
        verbose (int): Output verbosity level.
        handle_completed_state (bool): Set True to examine whether the parameters have already been run and additionally
            if that run was marked as completed. NOTE: You need to be careful with this option. It will only work if the
            completion state is being set at the end of an experiment run. If it is not being set then it will always
            delete and rerun.
        logging (str): The logging level.
        seed (int): Random state to set. Set None for a random seed, this will be stored in the run config.

    Examples:
        Param drops
        ---------------
        The param drops argument is used to remove certain parameter combinations from the parameter grid that would
        otherwise have been run. For example:
        >>> param_drops = [
        >>>    lambda params: (params['model_type'] in ['rnn', 'gru']) and (params['depth'] > 1)
        >>> ]
        >>> basic_gridsearch(ex, grid, param_drops=param_drops)
        will run the gridesarch as normal, but in any instances where model_type was 'rnn' or 'gru' and 'depth' was > 1
        the gridsearch will skip these options.

    Returns:
        None
    """
    # Set logging
    set_logger(ex, level=logging)

    # Setup the grid
    assert any([isinstance(grid, dict), isinstance(grid, list)])
    if isinstance(grid, dict):
        param_grid = list(ParameterGrid(grid))
    else:
        param_grid = grid

    # Perform the param drops
    if param_drops is not None:
        param_grid = [
            p for p in param_grid
            if not any([drop_bool(p) for drop_bool in param_drops])
        ]

    # Get num iters
    grid_len = len(param_grid)

    for i, params in tqdm(enumerate(param_grid)):
        # Print info
        if verbose > 0:
            print('\n\n\nCONFIGURATION {} of {}\n'.format(i + 1, grid_len) +
                  '-' * 100)
            pprint(params)
            print('-' * 100)

        # Set the random seed
        handle_seed(params)

        # Skip if done
        if handle_completed_state:
            if check_run_existence(ex, params):
                continue

        # Update the ingredient configuration
        params = update_ingredient_params(ex, params)

        # Update configuration and run
        try:
            # Update with hyperopt if specified
            # This is in the try except in case hyperparams cannot be found, error is caught and printed
            if 'hyperopt_metric' in params:
                metric = params['hyperopt_metric']
                assert isinstance(metric,
                                  str), "Hyperopt metric must be a string."
                hyperparams = get_hyperparams(ex,
                                              params['model_type'],
                                              metric=metric)
                params.update(hyperparams)

            # Run and mark completed if successful
            ex.run(config_updates=params, info={})
            save_pickle(
                True, ex.current_run.save_dir + '/other/completion_state.pkl')
        except Exception as e:
            handle_error(ex.current_run, e, print_error=True)
    activation = ['logistic', 'tanh', 'relu']
    solver = ['lbfgs', 'sgd', 'adam']
    alpha = [x for x in np.linspace(0.0001, 1, num=1000)]
    max_iter = [5000]

    grid = {
        'hidden_layer_sizes': numLayers,
        'activation': activation,
        'solver': solver,
        'alpha': alpha,
        'max_iter': max_iter
    }

    print('Searching')
    randomCombinations = random.sample(list(ParameterGrid(grid)), numSamples)
    score_list = []
    combination_list = []
    train_acc_list = []
    test_acc_list = []
    for combination in randomCombinations:
        skf = StratifiedKFold(n_splits=cvCount,
                              random_state=seed,
                              shuffle=True)
        s = 0
        tr_acc = 0
        te_acc = 0
        for train_idx, test_idx in skf.split(X_train, Y_train):
            split_x_train, split_x_test = X_train[train_idx], X_train[test_idx]
            y_true_train, y_true_test = Y_train[train_idx], Y_train[test_idx]
            mlp = MLPClassifier(**combination)
            # 'custom_action_dist': 'gaussian_copula_action_distribution',
            'custom_options': {
                'num_layers': 3,
                'layer_size': 64,
                'activation': 'relu',
                'reward_to_go': True,
                'use_vf_adv': True
            }
        }
    }

    param_grid_spec = {
        'env': ['Reverse-v0', 'Copy-v0', 'ReversedAddition3-v0']
        # 'env_config.env_name': ['Reverse-v0', 'Copy-v0', 'RepeatCopy-v0', 'DuplicatedInput-v0', 'ReversedAddition3-v0']
    }
    param_grid = list(ParameterGrid(param_grid_spec))

    for params in param_grid:
        exp_config = merge_configs(base_config, params)
        env_name = exp_config['env']
        exp_name = 'ppo_baseline_{}_{}'.format(model_name, env_name)
        export_dir = '{}/{}'.format(base_export_dir, exp_name)

        run_experiment_repeatedly(exp_name, 'PPO', num_iter=num_iter, 
                base_export_dir=export_dir, config=exp_config, seeds=seeds)
    
    # run_experiment(exp_name, PGCopulaTrainer, num_iter=100, export_dir=export_dir, 
    #         config=config)
    
    # results = tune.run(
    #     PGCopulaTrainer,
Exemple #30
0
def main(runname, expstatslog, mlflowlog, earlystop):
    if mlflowlog:
        pass
    else:
        global mlflow
        mlflow = dumbflow()
    if expstatslog:
        exp_status_write = open(EXP_STATUS, "a")
    else:
        exp_status_write = sys.stdout

    exp_status_write.write("\n\n\n\n")
    exp_status_write.write("--------------------------")
    exp_status_write.write("  BEGINNING NEW EXECUTION (" + runname + ") AT " +
                           str(time_utils.readable_time("%Y-%m-%d %H:%M:%S")))
    exp_status_write.write("  ------------------------" + "\n\n")
    # We are tracking drift adaptivity
    # namely labeled drift detection

    # Set up explicit drift detection params
    explicit_drift_param_grid = {
        "allow_explicit_drift": [(True, "ExpDr")],
        "explicit_drift_class": [("LabeledDriftDetector", "LDD")],
        "explicit_drift_mode": [("PageHinkley", "PageHinkley"),
                                ("ADWIN", "ADWIN"), ("EDDM", "EDDM"),
                                ("DDM", "DDM")],
        "explicit_update_mode": [("all", "A"), ("errors", "E")],
        "allow_unlabeled_drift": [(False, "")],
        "allow_update_schedule": [(False, "")],
        "weight_method": [("unweighted", "U"), ("performance", "P")],
        "select_method": [("recent", "RR"), ("recent-new", "RN"),
                          ("recent-updates", "RU")],
        "filter_method": [("no-filter", "F"), ("top-k", "T"),
                          ("nearest", "N")],
        "kval": [(5, "5"), (10, "10")]
    }
    explicit_drift_params = ParameterGrid(explicit_drift_param_grid)

    for param_set in explicit_drift_params:
        # This is an experiment
        if param_set["explicit_update_mode"][0] == "all":
            continue
        # Load up configuration file
        mlepConfig = io_utils.load_json('./MLEPServer.json')

        # Update config file and generate an experiment name
        experiment_name = ''
        for _param in param_set:
            if param_set[_param][1] != "":
                experiment_name += param_set[_param][1] + '-'
            mlepConfig["config"][_param] = param_set[_param][0]
        experiment_name = experiment_name[:-1]

        # Now we have the Experimental Coonfig we can use for running an experiment
        # generate an experiment name
        exp_status_write.write("--STATUS-- " + experiment_name + "   ")
        exp_status_write.flush()
        try:
            runExperiment(runname, mlepConfig, experiment_name, expstatslog,
                          earlystop)
            exp_status_write.write("SUCCESS\n")
        except Exception as e:
            exp_status_write.write("FAILED\n")
            exp_status_write.write(traceback.format_exc())
            exp_status_write.write(str(e))
            exp_status_write.write("\n")
            exp_status_write.flush()
            mlflow.end_run()
        exp_status_write.flush()

    exp_status_write.write("\n\n")
    exp_status_write.write("--------------------------")
    exp_status_write.write("  FINISHED EXECUTION OF (" + runname + ") AT " +
                           str(time_utils.readable_time("%Y-%m-%d %H:%M:%S")))
    exp_status_write.write("  ------------------------" + "\n\n")
    exp_status_write.close()