def C_Hyp(trainDir2, valDir2, testDir2, modelEpochs=20):
    import skopt
    from skopt import gp_minimize
    from skopt import gp_minimize, forest_minimize
    from skopt.space import Real, Categorical, Integer
    from skopt.plots import plot_convergence
    from skopt.plots import plot_objective, plot_evaluations
    from tensorflow.python.keras import backend as K
    from keras.models import Model, load_model, Sequential
    from keras.layers import Input, Dense, LSTM, RepeatVector, TimeDistributed
    from skopt.utils import use_named_args
    import numpy as np

    path_best_model = '19_best_model.keras'
    dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform',
                             name='learning_rate')
    dim_activation = Categorical(categories=['relu', 'softmax'],
                                 name='activation_function')
    optimizer = Categorical(categories=['Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Nadam', 'SGD'], name='optimizer')
    loss = Categorical(
        categories=['binary_crossentropy', 'categorical_crossentropy', 'categorical_hinge', 'mean_absolute_error',
                    'mean_absolute_percentage_error', 'mean_squared_error', 'mean_squared_logarithmic_error'],
        name='loss')
    dim_num_dense_layers = Integer(low=0, high=1, name='num_dense_layers')
    batch_size = Integer(low=32, high=128, name='batch_size')

    dimensions = [dim_learning_rate,
                  dim_activation,
                  optimizer,
                  loss,
                  dim_num_dense_layers,
                  batch_size]

    trainDir2 = pd.DataFrame(trainDir2)
    valDir2 = pd.DataFrame(valDir2)

    @use_named_args(dimensions=dimensions)
    def C_hyper(learning_rate, activation_function, optimizer, loss, num_dense_layers, batch_size):
        modelEpochs = 20
        valDir = valDir2
        sequenceLength2 = 100
        trainDir = trainDir2
        df = trainDir
        fault = 'Class'
        col = df.columns
        for elements in col:
            if "FaultNumber" or "Class" or "class" or "faultnumber" in elements:
                fault = elements
        # df = df.loc[df[fault]==0]
        df = df.drop([fault], axis=1)
        trainingData = pd.DataFrame(df)
        print("loading_data")

        #global numOfColumns
        # global valDir
        testDir = testDir2

        # global sequenceLength
        def data_3d_reshape(Data):

            print(Data.shape, "DATA_3D_Reshaping")
            sequenceLength = sequenceLength2
            numOfSequences = int(Data.shape[0] / sequenceLength)
            numOfColumns = int(Data.shape[1])
            print("sequenceLength: ", sequenceLength)
            print("numOfSequences: ", numOfSequences)
            print("numOfColumns: ", numOfColumns)
            boot = Data.shape[0]
            bootr = sequenceLength * numOfSequences
            print(bootr)

            final = Data[0:bootr, :]
            print(final.shape, 'shape of final ')
            return final.reshape(numOfSequences, sequenceLength, numOfColumns)

        def myround(x, base):
            a = base * np.round(x / base)
            if a > x:
                return a - 50
            else:
                return a
            # return base*np.round(x/base)

        valx = myround(trainingData.shape[0], sequenceLength2)
        valx = int(valx)
        print(valx, "heeee")
        trainingData = trainingData.iloc[0:valx, :]

        trainDataColumns = trainingData.columns
        print(trainingData.shape, "HEREE")
        # setting up number of sequences and number of columns
        numOfSequences = int(len(trainingData.index) / sequenceLength2)
        numOfColumns = len(trainingData.columns)

        # scaling the data & saving the scaler for later use
        scaler = MinMaxScaler()
        scaled_trainingData = scaler.fit_transform(trainingData)
        # scaler_filename = "scaler.save"
        # joblib.dump(scaler, scaler_filename)
        scaler = scaler
        dfval = valDir
        col = dfval.columns

        valDir = dfval
        valdirfault = valDir[fault]
        valDir = valDir.drop([fault], axis=1)
        valval = myround(valDir.shape[0], sequenceLength2)
        valval = int(valval)
        valDir = valDir.iloc[:valval, :]

        valDir = scaler.fit_transform(valDir)
        alDir = data_3d_reshape(valDir)

        # for test
        testfault = testDir[fault]
        testDir = testDir.drop([fault], axis=1)
        valtest = myround(testDir.shape[0], sequenceLength2)
        valtest = int(valtest)
        testDir = testDir.iloc[:valtest, :]
        testfault = testfault.iloc[:valtest]
        testDir = scaler.fit_transform(testDir)
        testDir = data_3d_reshape(testDir)

        # converting the scaled data to dataFrame

        trainingData = data_3d_reshape(scaled_trainingData)
        scaledTrainingData = trainingData

        sequenceLength = sequenceLength2
        numOfColumns = numOfColumns

        def autoencoder_model(trainingData):
            """
            Desctiption:
            ------------
                This function creates the lstm autoencoder model with the given class parameters settings.
            """

            lstm_autoencoder = Sequential()
            # Encoder
            lstm_autoencoder.add(
                LSTM(sequenceLength, activation=activation_function, input_shape=(None, numOfColumns), return_sequences=True))
            lstm_autoencoder.add(LSTM(120, activation=activation_function, return_sequences=True))
            lstm_autoencoder.add(LSTM(120, activation=activation_function, return_sequences=True))
            lstm_autoencoder.add(LSTM(60, activation=activation_function))
            lstm_autoencoder.add(RepeatVector(sequenceLength))
            # Decoder
            lstm_autoencoder.add(LSTM(sequenceLength, activation=activation_function, return_sequences=True))
            lstm_autoencoder.add(LSTM(120, activation='relu', return_sequences=True))
            lstm_autoencoder.add(LSTM(120, activation='relu', return_sequences=True))
            lstm_autoencoder.add(TimeDistributed(Dense(numOfColumns)))


            # global modelEpochs



            lstm_autoencoder.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
            history = lstm_autoencoder.fit(trainingData, trainingData, epochs=modelEpochs, verbose=1,
                                           batch_size=batch_size)
            history_dict = history.history
            accuracy = history.history['accuracy'][-1]
            return accuracy
        accuracy = autoencoder_model(trainingData)


        global best_accuracyC
        global path_best_model
        # If the classification accuracy of the saved model is improved ...
        if accuracy > best_accuracyC:
            # Save the new model to harddisk.
            # autoencoder.save(path_best_model)learning_rate,  activation_function,  optimizer, loss ,num_dense_layers, batch_size
            bestresultC.append(learning_rate)
            bestresultC.append(activation_function)
            bestresultC.append(optimizer)
            bestresultC.append(loss)
            bestresultC.append(num_dense_layers)

            bestresultC.append(batch_size)

            # Update the classification accuracy.
            best_accuracyC = accuracy

        # Delete the Keras model with these hyper-parameters from memory.
        #del lstm_autoencoder

        # Clear the Keras session, otherwise it will keep adding new
        # models to the same TensorFlow graph each time we create
        # a model with a different set of hyper-parameters.
        K.clear_session()

        # NOTE: Scikit-optimize does minimization so it tries to
        # find a set of hyper-parameters with the LOWEST fitness-value.
        # Because we are interested in the HIGHEST classification
        # accuracy, we need to negate this number so it can be minimized.
        return -accuracy

    default_parameters = [1e-5, 'relu', 'Adam', 'mean_absolute_error', 1, 83]
    search_result = gp_minimize(func=C_hyper, dimensions=dimensions, acq_func='EI', n_calls=50, x0=default_parameters)

    print(bestresultC)
    print(best_accuracyC)
    return bestresultC[-6:]
Ejemplo n.º 2
0
def SVMhyp(X_train,
           X_val,
           X_test,
           y_train,
           y_val,
           y_test,
           threshold_fn_percentage=0.10):
    min_max_scaler = preprocessing.MinMaxScaler()
    X_train = min_max_scaler.fit_transform(X_train.astype(np.float))
    X_val = min_max_scaler.transform(X_val.astype(np.float))
    X_test = min_max_scaler.transform(X_test.astype(np.float))

    threshold_fn_percentage = threshold_fn_percentage
    X_train = X_train
    X_val = X_val
    X_test = X_test
    y_train = y_train
    y_val = y_val
    y_test = y_test
    # model params
    nu = Categorical(categories=[
        '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9'
    ],
                     name='nu')
    kernel = Categorical(categories=['linear', 'poly', 'rbf'], name='kernel')
    # gamma = Integer(low=1, high=5, name='gamma')
    gamma = Real(low=1e-3, high=2, prior='log-uniform', name='gamma')
    dimensions = [kernel, gamma, nu]

    @use_named_args(dimensions=dimensions)
    def svmHyper(kernel, gamma, nu):
        nu = float(nu)
        # global best_resultssvm
        clf = svm.OneClassSVM(gamma=gamma, kernel=kernel, nu=nu)
        print('\ntraining the classifier  start time: ', str(datetime.now()))
        print('\n', clf)
        clf = clf.fit(X_train)
        val_score = clf.score_samples(X_val)
        threshold = 0
        best_threshold = 0
        acceptable_n_FP = threshold_fn_percentage * len(y_train)
        print("\nacceptable_n_FP: ", acceptable_n_FP)
        TN = 0
        FP = 0
        print("\ncalculating threshold......")
        while (threshold <= 1):
            # print ('**************************')
            # print (threshold)
            threshold += 0.005
            y_pred = [1 if e > threshold else 0 for e in val_score]
            y_Pred = np.array(y_pred)
            # Confusion Matrix
            from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
            conf_matrix = confusion_matrix(y_val, y_pred, labels=[0, 1])
            tn, fp, fn, tp = conf_matrix.ravel()
            # print(conf_matrix)
            # print("tn: " , tn)
            # print("fp: " ,fp)
            if fp < acceptable_n_FP:
                break
        best_threshold = threshold
        y_pred = [1 if e >= best_threshold else 0 for e in val_score]

        from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

        precision_W, recall_W, fscore_W, xyz = precision_recall_fscore_support(
            y_val, y_pred, average='weighted')
        global best_resultssvm
        # global bestresultssvmlist
        print(fscore_W)
        if fscore_W > best_resultssvm:
            # Save the new model to harddisk.
            # autoencoder.save(path_best_model)learning_rate,  activation_function,  optimizer, loss ,num_dense_layers, batch_size
            bestresultssvmlist.append(kernel)
            bestresultssvmlist.append(gamma)
            bestresultssvmlist.append(nu)
            # Update the classification accuracy.
            best_resultssvm = fscore_W

        # accuracy, we need to negate this number so it can be minimized.
        return -fscore_W

    default_parameters = ['rbf', 1e-3, '0.2']
    search_result = gp_minimize(func=svmHyper,
                                dimensions=dimensions,
                                acq_func='EI',
                                n_calls=50,
                                x0=default_parameters)

    return bestresultssvmlist[-3:]
Ejemplo n.º 3
0

# In[26]:


embedding_matrix_glove = get_GloVe_embedding_matrix(glove_model)
embedding_matrix_word2vec = get_word_embedding_matrix(word2vec_model,300)
embedding_matrix_fast_text = get_word_embedding_matrix(fast_text_model,100)
embedding_matrix_godin = get_word_embedding_matrix(godin_model,400)


# In[29]:


para_learning_rate = Real(low=1e-4, high=1e-2, prior='log-uniform',name='learning_rate')
para_dropout = Categorical(categories=[0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],name = 'dropout')
# para_em = Categorical(categories=['embedding_matrix_fast_text','embedding_matrix_godin','embedding_matrix_word2vec','embedding_matrix_glove'],name='em')
para_em = Categorical(categories=['embedding_matrix_word2vec'],name='em')
para_em_trainable_flag = Categorical(categories=[True,False],name='em_trainable_flag')
para_batch_size = Categorical(categories=[8,16,32,64],name='batch_size')
para_epoch = Categorical(categories=[5,10,20,50,100],name='epoch')

# para_units_out = Categorical(categories=[64,128,256,512], name='units_out')

# para_dropout_cnn_lstm = Categorical(categories=[0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],name = 'dropout')

para_n_dense = Categorical(categories=[100,200,300,400], name='n_dense')
para_n_filters = Categorical(categories=[32,100,200,300],name='n_filters')
para_filter_size = Integer(low=1,high=8,name = 'filter_size')

Ejemplo n.º 4
0
    count = 0
    for y_p, y_t in zip(y_predict, y_target):
        if y_p == y_t:
            count += 1
    return float(count) / len(y_predict)


"""
search space for hyper-parameters
"""
search_space = [
    Real(low=0, high=1, name='dropout_rate'),
    Integer(low=1, high=10, name='n_hidden_layers'),
    Integer(low=32, high=150, name='dim_B'),
    Integer(low=32, high=150, name='dim_C'),
    Categorical(categories=['8', '16', '32', '64'], name='batch_size'),
    Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate')
]


@use_named_args(dimensions=search_space)
def fitness(dropout_rate, n_hidden_layers, dim_B, dim_C, batch_size,
            learning_rate):
    """
    tuning process for a single group of hyper-parameters
    """
    weight_decay = 0
    batch_size = int(batch_size)
    model = skorch.classifier.NeuralNetBinaryClassifier(
        module=DiscriminatorMLPPremiumForTuning,
        module__hidden_size=HIDDEN_SIZE,
Ejemplo n.º 5
0
def test_space_consistency():
    # Reals (uniform)

    s1 = Space([Real(0.0, 1.0)])
    s2 = Space([Real(0.0, 1.0)])
    s3 = Space([Real(0, 1)])
    s4 = Space([(0.0, 1.0)])
    s5 = Space([(0.0, 1.0, "uniform")])
    s6 = Space([(0, 1.0)])
    s7 = Space([(np.float64(0.0), 1.0)])
    s8 = Space([(0, np.float64(1.0))])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    a4 = s4.rvs(n_samples=10, random_state=0)
    a5 = s5.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_equal(s1, s3)
    assert_equal(s1, s4)
    assert_equal(s1, s5)
    assert_equal(s1, s6)
    assert_equal(s1, s7)
    assert_equal(s1, s8)
    assert_array_equal(a1, a2)
    assert_array_equal(a1, a3)
    assert_array_equal(a1, a4)
    assert_array_equal(a1, a5)

    # Reals (log-uniform)
    s1 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")])
    s2 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")])
    s3 = Space([Real(10**-3, 10**3, prior="log-uniform")])
    s4 = Space([(10**-3.0, 10**3.0, "log-uniform")])
    s5 = Space([(np.float64(10**-3.0), 10**3.0, "log-uniform")])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    a4 = s4.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_equal(s1, s3)
    assert_equal(s1, s4)
    assert_equal(s1, s5)
    assert_array_equal(a1, a2)
    assert_array_equal(a1, a3)
    assert_array_equal(a1, a4)

    # Integers
    s1 = Space([Integer(1, 5)])
    s2 = Space([Integer(1.0, 5.0)])
    s3 = Space([(1, 5)])
    s4 = Space([(np.int64(1.0), 5)])
    s5 = Space([(1, np.int64(5.0))])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_equal(s1, s3)
    assert_equal(s1, s4)
    assert_equal(s1, s5)
    assert_array_equal(a1, a2)
    assert_array_equal(a1, a3)

    # Categoricals
    s1 = Space([Categorical(["a", "b", "c"])])
    s2 = Space([Categorical(["a", "b", "c"])])
    s3 = Space([["a", "b", "c"]])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_array_equal(a1, a2)
    assert_equal(s1, s3)
    assert_array_equal(a1, a3)

    s1 = Space([(True, False)])
    s2 = Space([Categorical([True, False])])
    s3 = Space([np.array([True, False])])
    assert s1 == s2 == s3
Ejemplo n.º 6
0
# %% global variables
X_labelled = pd.read_csv(PATH_X_LABELLED, index_col=INDEX_COLS)
y_labelled = pd.read_csv(PATH_Y_LABELLED, index_col=INDEX_COLS)
X_pool = pd.read_csv(PATH_X_POOL, index_col=INDEX_COLS)
y_pool = pd.read_csv(PATH_Y_POOL, index_col=INDEX_COLS)

runs_skopt = 0
N_FEATURES_AUD = X_labelled.filter(regex='aud', axis=1).shape[-1]
N_FEATURES_VID = X_labelled.filter(regex='vid', axis=1).shape[-1]
pool_size = 10
SEQUENCE_LENGTH = 100

# %%
X_labelled.head()
# %% search space
space = [Categorical([8, 16, 32], name='batch_size'),
         Real(0.1, 0.4, name='dropout'),
         Real(0.0, 0.5, name='rec_dropout'),
         Real(1e-06, 1e-04, prior='log-uniform', name='rec_l2'),
         Real(1e-06, 1e-04, prior='log-uniform', name='kernel_l2'),
         Integer(42, 44, name='n_neurons_hid_aud'),
         Integer(90, 180, name='n_neurons_hid_vid')

         ]
# %%
early_stopper = EarlyStopping(
    monitor='val_pred_reg_arou_loss', mode='min',
    min_delta=0.001, patience=70, verbose=0,
)
# %% Start hyperparameters:
x_0 = [32, 0.12, 0.08, 2e-06, 2e-06, 44, 120]
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     URM_train_last_test=None,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=35,
                                     n_random_starts=5,
                                     resume_from_saved=False,
                                     save_model="best",
                                     allow_weighting=True,
                                     similarity_type_list=None):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation_earlystopping,
        "lower_validations_allowed": 5,
        "validation_metric": metric_to_optimize,
    }

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, GlobalEffects, Random]:
            """
            TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed
            """

            parameterSearch = SearchSingleCase(
                recommender_class,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values={},
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
            )

            return

        ##########################################################################################################

        if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]:

            if similarity_type_list is None:
                similarity_type_list = [
                    'cosine', 'jaccard', "asymmetric", "dice", "tversky"
                ]

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNRecommender_on_similarity_type,
                recommender_input_args=recommender_input_args,
                parameter_search_space={},
                parameterSearch=parameterSearch,
                n_cases=n_cases,
                n_random_starts=n_random_starts,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize,
                allow_weighting=allow_weighting,
                recommender_input_args_last_test=
                recommender_input_args_last_test)

            if parallelizeKNN:
                pool = multiprocessing.Pool(
                    processes=multiprocessing.cpu_count(), maxtasksperchild=1)
                pool.map(run_KNNCFRecommender_on_similarity_type_partial,
                         similarity_type_list)

                pool.close()
                pool.join()

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(3, 50)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary["beta"] = Real(low=0,
                                                            high=2,
                                                            prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([1, 0])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["num_factors"] = Categorical(
                [200])
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical([1])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Integer(20, 500)
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 50)
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["positive_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["negative_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None
                })

        ##########################################################################################################

        if recommender_class is IALSRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary[
                "confidence_scaling"] = Categorical(["linear", "log"])
            hyperparameters_range_dictionary["alpha"] = Real(
                low=1e-3, high=50.0, prior='log-uniform')
            hyperparameters_range_dictionary["epsilon"] = Real(
                low=1e-3, high=10.0, prior='log-uniform')
            hyperparameters_range_dictionary["reg"] = Real(low=1e-5,
                                                           high=1e-2,
                                                           prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 1000)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is NMFRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 100)
            hyperparameters_range_dictionary["solver"] = Categorical(
                ["coordinate_descent", "multiplicative_update"])
            hyperparameters_range_dictionary["init_type"] = Categorical(
                ["random", "nndsvda"])
            hyperparameters_range_dictionary["beta_loss"] = Categorical(
                ["frobenius", "kullback-leibler"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 2000)
            hyperparameters_range_dictionary["epochs"] = Categorical(
                [1200, 1500, 1700])
            hyperparameters_range_dictionary["symmetric"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            hyperparameters_range_dictionary["lambda_i"] = Real(
                low=1e-7, high=1e1, prior='log-uniform')
            hyperparameters_range_dictionary["lambda_j"] = Real(
                low=1e-7, high=1e1, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-6, high=1e-3, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None,
                    'train_with_sparse_weights': None
                })

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')
            hyperparameters_range_dictionary["alpha"] = Real(low=1e-3,
                                                             high=1.0,
                                                             prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test
        else:
            recommender_input_args_last_test = None

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_input_args,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            n_random_starts=n_random_starts,
            resume_from_saved=resume_from_saved,
            save_model=save_model,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize,
            recommender_input_args_last_test=recommender_input_args_last_test)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Ejemplo n.º 8
0
    def get_sk_dimensions(api_config, transform="normalize"):
        """Help routine to setup skopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        sk_dims = []
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values,
                    param_values,
                    kind="nearest",
                    fill_value="extrapolate")

            if param_type == "int":
                # Integer space in sklearn does not support any warping => Need
                # to leave the warping as linear in skopt.
                sk_dims.append(
                    Integer(param_range[0],
                            param_range[-1],
                            transform=transform,
                            name=param_name))
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                sk_dims.append(
                    Integer(0, 1, transform=transform, name=param_name))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                # Leave x-form to one-hot as per skopt default
                sk_dims.append(Categorical(param_values, name=param_name))
            elif param_type == "real":
                # Skopt doesn't support all our warpings, so need to pick
                # closest substitute it does support.
                prior = "log-uniform" if param_space in (
                    "log", "logit") else "uniform"
                sk_dims.append(
                    Real(param_range[0],
                         param_range[-1],
                         prior=prior,
                         transform=transform,
                         name=param_name))
            else:
                assert False, "type %s not handled in API" % param_type
        return sk_dims, round_to_values
__version__ = "1.0.0"

from skopt.space import Categorical, Integer, Real

from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from utility import HyperParameters, Runner
from model import load_sample_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters({
    'dt__criterion':
    Categorical(['gini', 'entropy']),
    'dt__max_depth':
    Integer(4, 24),
    'dt__min_samples_leaf':
    Real(0.000001, 0.001),
    'dt__min_samples_split':
    Real(0.000002, 0.002)
})

decision_tree_basic = Pipeline([('mapper', ordinal_data_mapper),
                                ('dt', DecisionTreeClassifier())])


def test_decision_tree_basic():
    runner = Runner('model/experiment/output/decision_tree_basic',
                    load_sample_data_frame(), 'violation', decision_tree_basic,
Ejemplo n.º 10
0
    )
    Y_train = np.load(
        '/Users/kefei/Documents/Dataset/NTU/single_poses/labels/y_train_fall_aug_trimmed.npy'
    )
    #X_train = X_train[:1000]
    #Y_train = Y_train[:1000]
    print(X_train.shape)
    Y_train = np.asarray([1 if x == 43 else 0 for x in Y_train])
    return X_train, Y_train


X_train, y_train = load_data()
#model parameter
input_dim = 51
#hidden_dims = Integer(low=512, high=1024, name='hidden_dim')
hidden_dims = Categorical([256, 512, 768, 1024, 1280], name='hidden_dim')
#latent_dims = Integer(low=128, high=256, name='latent_dim')
latent_dims = Categorical([64, 128, 192, 256], name='latent_dim')
W_regularizer_vals = Real(low=0.001, high=0.01, name='hidden_W_regularizer')
dropout_rates = Real(low=0.01, high=0.12, name='dropout_rate')
#classifier_dense_dims = Integer(low=32, high=128, name='classifier_dense_dim')
classifier_dense_dims = Categorical([32, 64, 96, 128],
                                    name='classifier_dense_dim')
learning_rates = Real(low=0.00001, high=0.0005, name='learning_rate')
param_space = [
    hidden_dims, latent_dims, W_regularizer_vals, dropout_rates,
    classifier_dense_dims, learning_rates
]
#for simplicty sake, just do annealing first

Ejemplo n.º 11
0
 def get_model(self, X, y):
     search_space = {'solver'  : Categorical(['svd', 'lsqr', 'eigen'])}
     model = BayesSearchCV(LinearDiscriminantAnalysis(), search_space, random_state = 0, n_iter = 1, cv = 3, n_jobs = -1)
     model.fit(X, y)
     return model
Ejemplo n.º 12
0
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize

# This defines the hyperparameter search space, add more parameters here
# as you see fit :)
space = [
    Real(1e-6, 0.01, "log-uniform", name='learning_rate'),
    Real(0.1, 0.8, name='dropout'),
    Real(0.8, 1.0, name='momentum'),
    Real(0.9, 1.0, name='beta_1'),
    Real(0.99, 1.0, name='beta_2'),
    Integer(low=5, high=20, name='epochs'),
    Integer(low=50, high=225, name='num_dense_nodes'),
    Categorical(categories=['SGD', 'Adam'], name='optimizer_type')
]

# Define a Callback class that stops training once accuracy reaches 90%
#class myCallback(tf.keras.callbacks.Callback):
#def on_epoch_end(self, epoch, logs={}):
# if(logs.get('acc')>0.87):
#print("\nReached 90% accuracy so cancelling training!")
#self.model.stop_training = True

# make the model - isolating this as a function to be called for each HP search
# iteration to pass in the updated parameters


def make_model(learning_rate, dropout, momentum, beta_1, beta_2,
               num_dense_nodes, optimizer_type):
Ejemplo n.º 13
0
#param drid models for BayessearchCV
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

#skopt
lr_param_grid = {
  'lr__C': Real(1e-3, 100, 'log-uniform'),
  'lr__fit_intercept': Categorical([True, False]), 
  'lr__max_iter': Integer(1e+2, 1e+5, 'log-uniform'),
  'lr__penalty':  Categorical(['l1', 'l2']),
  'lr__solver': Categorical(['liblinear', 'saga']),
  'lr__tol': Real(1e-5, 1e-3, 'log-uniform'),
  'lr__class_weight':  Categorical([None, 'balanced']),

}




#skopt
xgb_param_grid = {
        'xgb__colsample_bylevel' : Real(1e-1, 1, 'uniform'),
        'xgb__colsample_bytree' : Real(6e-1, 1, 'uniform'),
        'xgb__gamma' :  Real(5e-1, 6, 'log-uniform'),
        'xgb__learning_rate' : Real(10**-5, 10**0, "log-uniform"),
        'xgb__max_depth' : Integer(1, 25, 'uniform'),
        'xgb__min_child_weight' : Integer(1, 10, 'uniform'),
        'xgb__n_estimators' : Integer(50, 400, 'log-uniform'),
        'xgb__reg_alpha' : Real(1e-2, 1, 'log-uniform'),
        'xgb__reg_lambda' : Real(1e-2, 1, 'log-uniform'),
        'xgb__subsample' : Real(6e-1, 1, 'uniform'),
Ejemplo n.º 14
0
    def optimize(self, x, y):

        self.iterations = []

        space = [
            Integer(5, 20, name='hidden_layer_sizes'),
            Categorical(['constant', 'invscaling', 'adaptive'],
                        name='learning_rate'),
            Real(1e-5, 1e-3, name='learning_rate_init'),
            Integer(5, 20, name='max_iter'),
            Real(1e-5, 1e-3, name='tol')
        ]

        @use_named_args(space)
        def objective(hidden_layer_sizes, learning_rate, learning_rate_init,
                      max_iter, tol):
            try:
                scores = []

                params = {
                    'hidden_layer_sizes': int(hidden_layer_sizes),
                    'learning_rate': learning_rate,
                    'learning_rate_init': learning_rate_init,
                    'max_iter': int(max_iter),
                    'tol': tol,
                    'random_state': self.random_state
                }

                if isinstance(self.fixed_parameters, dict):
                    params.update(self.fixed_parameters)

                skf = StratifiedKFold(self.n_folds,
                                      shuffle=self.shuffle,
                                      random_state=self.random_state)

                for train_index, valid_index in skf.split(x, y):

                    x_train, y_train = x[train_index, :], y[train_index]
                    x_valid, y_valid = x[valid_index, :], y[valid_index]

                    mlp = MLPClassifier(**params)

                    mlp.fit(x_train, y_train)

                    y_valid_hat = mlp.predict(x_valid)

                    loss_valid = log_loss(y_valid, y_valid_hat)

                    scores.append(loss_valid)

                result = np.mean(scores)

                self.iterations.append((params, result))

                return result

            except ValueError:

                return np.inf

        return self.execute_optimization(objective, space)
Ejemplo n.º 15
0
    def optimize(self, x, y):

        self.iterations = []

        space = [
            Integer(1, 30, name='n_neighbors'),
            Categorical(['uniform', 'distance'], name='weights'),
            Integer(1, 30, name='leaf_size'),
            Integer(1, 5, name='p')]

        @use_named_args(space)
        def objective(
            n_neighbors,
            weights,
            leaf_size,
            p
        ):
            try:
                scores = []

                params = {
                    'n_neighbors': int(n_neighbors),
                    'weights': weights,
                    'leaf_size': int(leaf_size),
                    'p': int(p),

                    'n_jobs': self.n_jobs}

                if isinstance(self.fixed_parameters, dict):
                    params.update(self.fixed_parameters)

                skf = StratifiedKFold(self.n_folds,
                                      shuffle=self.shuffle,
                                      random_state=self.random_state)

                for train_index, valid_index in skf.split(x, y):

                    x_train, y_train = x[train_index, :], y[train_index]
                    x_valid, y_valid = x[valid_index, :], y[valid_index]

                    knn = KNeighborsClassifier(**params)

                    knn.fit(x_train, y_train)

                    y_valid_hat = knn.predict(x_valid)

                    loss_valid = log_loss(y_valid, y_valid_hat)

                    scores.append(loss_valid)

                result = np.mean(scores)

                self.iterations.append((params, result))

                return result

            except ValueError:

                return np.inf

        return self.execute_optimization(objective, space)
Ejemplo n.º 16
0
    'Own-child': 0,
    'Other-relative': 0,
    'Husband': 1,
    'Wife': 1
})
data.head()
X = data[[
    'workclass_num', 'education.num', 'marital_num', 'race_num', 'sex_num',
    'rel_num', 'capital.gain', 'capital.loss'
]]
y = data.over50K

start = timeit.default_timer()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)

opt = BayesSearchCV(SVC(), {
    'C': Real(0.001, 10, prior='log-uniform'),
    'gamma': Real(0.001, 1, prior='log-uniform'),
    'kernel': Categorical(['linear', 'rbf']),
},
                    n_iter=40)
opt.fit(X_train, y_train)
stop = timeit.default_timer()

print(opt.score(X_test, y_test))
print(opt.best_params_)
print('Time: ', stop - start)
Ejemplo n.º 17
0
def pow10map(x):
    return 10.0**x


def pow2intmap(x):
    return int(2.0**x)


def nop(x):
    return x


nnparams = {
    # up to 1024 neurons
    'hidden_layer_sizes': (Real(1.0, 10.0), pow2intmap),
    'activation': (Categorical(['identity', 'logistic', 'tanh', 'relu']), nop),
    'solver': (Categorical(['lbfgs', 'sgd', 'adam']), nop),
    'alpha': (Real(-5.0, -1), pow10map),
    'batch_size': (Real(5.0, 10.0), pow2intmap),
    'learning_rate': (Categorical(['constant', 'invscaling',
                                   'adaptive']), nop),
    'max_iter': (Real(5.0, 8.0), pow2intmap),
    'learning_rate_init': (Real(-5.0, -1), pow10map),
    'power_t': (Real(0.01, 0.99), nop),
    'momentum': (Real(0.1, 0.98), nop),
    'nesterovs_momentum': (Categorical([True, False]), nop),
    'beta_1': (Real(0.1, 0.98), nop),
    'beta_2': (Real(0.1, 0.9999999), nop),
}

MODELS = {
Ejemplo n.º 18
0
    def _Losgistic(self, df):
        print("Start Logistic...")
        param_grid = [
            Categorical(["newton-cg", "lbfgs", "liblinear"], name="solver"),
            Categorical(["l2"], name="penalty"),
            Real(1e-5, 100, name="C"),
        ]

        # set up the logistic regressoion classifier

        lgt = LogisticRegression(random_state=0)
        kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0))

        gpr = GaussianProcessRegressor(kernel=kernel,
                                       normalize_y=True,
                                       noise="gaussian",
                                       n_restarts_optimizer=2)
        # the decorator allows our objective function to receive the parameters as

        @use_named_args(param_grid)
        def objective(**params):

            # model with new parameters
            lgt.set_params(**params)

            # optimization function (hyperparam response function)
            value = np.mean(
                cross_val_score(
                    lgt,
                    df.feature_train,
                    df.target_train,
                    cv=5,
                    n_jobs=-4,
                    scoring="roc_auc",
                )  # "accuracy"
            )

            # negate because we need to minimize
            return -value

        gp_ = gp_minimize(
            objective,
            dimensions=param_grid,
            base_estimator=gpr,
            n_initial_points=5,
            acq_optimizer="sampling",
            random_state=42,
        )
        params = {
            "solver": gp_.x[0],
            "penalty": gp_.x[1],
            "C": gp_.x[2],
        }
        final_model = LogisticRegression(
            solver=gp_.x[0],
            penalty=gp_.x[1],
            C=gp_.x[2],
        )
        final_model.fit(df.feature_train, df.target_train)
        y_preds = final_model.predict(df.feature_test)
        joblib.dump(final_model, "./log/Logistic.pkl")
        return {
            "Logistic": {
                "Best_score": metrics.roc_auc_score(y_preds, df.target_test),
                "Best_accuracy":
                metrics.accuracy_score(y_preds, df.target_test),
                "params": params,
                "model": final_model,
            }
        }
Ejemplo n.º 19
0
from experiment.experiment import Experiment
from experiment.hyper_param_opt import GridSearch
from models.tensorflow.model import Model
from models.tensorflow.tf_train_eval import TfTrainEvalModelFactory

if __name__ == '__main__':
    exp = Experiment('density/synthetic/sin_normal')

    conf.num_workers = 4
    conf.visible_device_list = [0, 1]
    conf.eval_batch_size = {'0': 10000, '1': 10000}

    exp.data_loader = registry.sin_normal_noise()

    exp.model_factory = TfTrainEvalModelFactory(Model(name="MONDE_AR_MADE"))

    exp.hyper_param_search = GridSearch([
        Categorical(['sigm'], name='tr'),
        Categorical([32, 64, 128], name='sh'),
        Categorical([1, 2, 3], name='nh'),
        Categorical([16], name='xs'),
        Categorical([128], name='bs'),
        Categorical([1], name='rs'),
        Categorical(['AdamOptimizer'], name='opt'),
        Categorical([1e-4, 1e-3, 1e-2], name='opt_lr'),
    ])

    exp.early_stopping = EarlyStop(monitor_every_epoch=1, patience=[30])

    exp.run()
Ejemplo n.º 20
0
    def _RandomForest(self, df):
        print("Start RandomForest...")
        param_grid = [
            Categorical(["sqrt", "log2", None], name="max_features"),
            Integer(120, 1200, name="n_estimators"),
            Integer(5, 30, name="max_depth"),
            Integer(2, 15, name="min_samples_split"),
            Integer(1, 10, name="min_samples_leaf"),
        ]

        # set up the logistic regressoion classifier

        rf = RandomForestClassifier(random_state=0)
        kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0))

        gpr = GaussianProcessRegressor(kernel=kernel,
                                       normalize_y=True,
                                       noise="gaussian",
                                       n_restarts_optimizer=2)
        # the decorator allows our objective function to receive the parameters as

        @use_named_args(param_grid)
        def objective(**params):

            # model with new parameters
            rf.set_params(**params)

            # optimization function (hyperparam response function)
            value = np.mean(
                cross_val_score(
                    rf,
                    df.feature_train,
                    df.target_train,
                    cv=5,
                    n_jobs=-4,
                    scoring="roc_auc",
                )  # "accuracy"
            )

            # negate because we need to minimize
            return -value

        gp_ = gp_minimize(
            objective,
            dimensions=param_grid,
            base_estimator=gpr,
            n_initial_points=5,
            acq_optimizer="sampling",
            random_state=42,
        )
        params = {
            "max_features": gp_.x[0],
            "n_estimators": gp_.x[1],
            "max_depth": gp_.x[2],
            "min_samples_split": gp_.x[3],
            "min_samples_leaf": gp_.x[4],
        }
        final_model = RandomForestClassifier(
            max_features=gp_.x[0],
            n_estimators=gp_.x[1],
            max_depth=gp_.x[2],
            min_samples_split=gp_.x[3],
            min_samples_leaf=gp_.x[4],
        )
        final_model.fit(df.feature_train, df.target_train)
        y_preds = final_model.predict(df.feature_test)
        joblib.dump(final_model, "./log/RandomForest.pkl")
        return {
            "RandomForest": {
                "Best_score": metrics.roc_auc_score(y_preds, df.target_test),
                "Best_accuracy":
                metrics.accuracy_score(y_preds, df.target_test),
                "params": params,
                "model": final_model,
            }
        }
def run_KNNRecommender_on_similarity_type(
        similarity_type,
        parameterSearch,
        parameter_search_space,
        recommender_input_args,
        n_cases,
        n_random_starts,
        resume_from_saved,
        save_model,
        output_folder_path,
        output_file_name_root,
        metric_to_optimize,
        allow_weighting=False,
        recommender_input_args_last_test=None):
    original_parameter_search_space = parameter_search_space

    hyperparameters_range_dictionary = {}
    hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
    hyperparameters_range_dictionary["shrink"] = Integer(0, 1000)
    hyperparameters_range_dictionary["similarity"] = Categorical(
        [similarity_type])
    hyperparameters_range_dictionary["normalize"] = Categorical([1, 0])

    is_set_similarity = similarity_type in [
        "tversky", "dice", "jaccard", "tanimoto"
    ]

    if similarity_type == "asymmetric":
        hyperparameters_range_dictionary["asymmetric_alpha"] = Real(
            low=0, high=2, prior='uniform')
        hyperparameters_range_dictionary["normalize"] = Categorical([1])

    elif similarity_type == "tversky":
        hyperparameters_range_dictionary["tversky_alpha"] = Real(
            low=0, high=2, prior='uniform')
        hyperparameters_range_dictionary["tversky_beta"] = Real(
            low=0, high=2, prior='uniform')
        hyperparameters_range_dictionary["normalize"] = Categorical([1])

    elif similarity_type == "euclidean":
        hyperparameters_range_dictionary["normalize"] = Categorical([1, 0])
        hyperparameters_range_dictionary["normalize_avg_row"] = Categorical(
            [1, 0])
        hyperparameters_range_dictionary[
            "similarity_from_distance_mode"] = Categorical(
                ["lin", "log", "exp"])

    if not is_set_similarity:

        if allow_weighting:
            hyperparameters_range_dictionary[
                "feature_weighting"] = Categorical(["none", "BM25", "TF-IDF"])

    local_parameter_search_space = {
        **hyperparameters_range_dictionary,
        **original_parameter_search_space
    }

    parameterSearch.search(
        recommender_input_args,
        parameter_search_space=local_parameter_search_space,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        resume_from_saved=resume_from_saved,
        save_model=save_model,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root + "_" + similarity_type,
        metric_to_optimize=metric_to_optimize,
        recommender_input_args_last_test=recommender_input_args_last_test)
Ejemplo n.º 22
0
    def _GradientBoosting(self, df):
        print("Start GradientBoosting...")
        param_grid = [
            Integer(10, 120, name="n_estimators"),
            Real(0, 0.999, name="min_samples_split"),
            Integer(1, 5, name="max_depth"),
            Categorical(["deviance", "exponential"], name="loss"),
        ]

        # set up the gradient boosting classifier

        gbm = GradientBoostingClassifier(random_state=0)
        kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0))

        gpr = GaussianProcessRegressor(kernel=kernel,
                                       normalize_y=True,
                                       noise="gaussian",
                                       n_restarts_optimizer=2)
        # the decorator allows our objective function to receive the parameters as

        @use_named_args(param_grid)
        def objective(**params):

            # model with new parameters
            gbm.set_params(**params)

            # optimization function (hyperparam response function)
            value = np.mean(
                cross_val_score(
                    gbm,
                    df.feature_train,
                    df.target_train,
                    cv=5,
                    n_jobs=-4,
                    scoring="roc_auc",
                )  # "accuracy"
            )

            # negate because we need to minimize
            return -value

        gp_ = gp_minimize(
            objective,
            dimensions=param_grid,
            base_estimator=gpr,
            n_initial_points=5,
            acq_optimizer="sampling",
            random_state=42,
        )
        params = {
            "n_estimators": gp_.x[0],
            "min_samples_split": gp_.x[1],
            "max_depth": gp_.x[2],
            "loss": gp_.x[3],
        }
        final_model = GradientBoostingClassifier(
            n_estimators=gp_.x[0],
            min_samples_split=gp_.x[1],
            max_depth=gp_.x[2],
            loss=gp_.x[3],
        )
        final_model.fit(df.feature_train, df.target_train)
        y_preds = final_model.predict(df.feature_test)
        joblib.dump(final_model, "./log/GradientBoosting.pkl")
        return {
            "GradientBoosting": {
                "Best_score": metrics.roc_auc_score(y_preds, df.target_test),
                "Best_accuracy":
                metrics.accuracy_score(y_preds, df.target_test),
                "params": params,
                "model": final_model,
            }
        }
Ejemplo n.º 23
0
        assert_equal(reals.distance(4.1234, i), abs(4.1234 - i))


@pytest.mark.parametrize("dimension, bounds",
                         [(Real, (2, 1)), (Integer, (2, 1)),
                          (Real, (2, 2)), (Integer, (2, 2))])
def test_dimension_bounds(dimension, bounds):
    with pytest.raises(ValueError) as exc:
        dim = dimension(*bounds)
        assert "has to be less than the upper bound " in exc.value.args[0]


@pytest.mark.parametrize("dimension, name",
                         [(Real(1, 2, name="learning rate"), "learning rate"),
                          (Integer(1, 100, name="no of trees"), "no of trees"),
                          (Categorical(["red, blue"], name="colors"), "colors")])
def test_dimension_name(dimension, name):
    assert dimension.name == name


@pytest.mark.parametrize("dimension",
                         [Real(1, 2), Integer(1, 100), Categorical(["red, blue"])])
def test_dimension_name_none(dimension):
    assert dimension.name is None


def test_dimension_name():
    notnames = [1, 1., True]
    for n in notnames:
        with pytest.raises(ValueError) as exc:
            real = Real(1, 2, name=n)
Ejemplo n.º 24
0
    K.set_learning_phase(True)
    setCPUCores(4)

    if args.test_only:
        runParamTests(args)
        exit()

    logfile = "skopt_current.txt"
    holder = Holder(args.dirs, log_to=logfile)

    max_batch = 128

    space = [
        Real(10**-10, 10**-3, "log-uniform", name='l2_reg'),
        Categorical(["low", "mid", "high", "up", "down"], name='dropouts'),
        Real(10**-9, 10**-1, "log-uniform", name='learning_rate'),
        Integer(5, max_batch, name='batch_size'),
        Categorical(["RMSProp", "Adagrad", "Adadelta", "Adam"],
                    name='optimizer'),
    ]

    with open(logfile, 'a') as f:
        f.write("#{} {} {}\n".format("FC", "DK", datetime.datetime.now()))

    x0 = None
    y0 = None
    if args.prev is not None:
        x0, y0 = previousRuns(args.prev)

    res_gp = gp_minimize(holder,
Ejemplo n.º 25
0
def check_categorical(vals, random_val):
    x = Categorical(vals)
    assert_equal(x, Categorical(vals))
    assert_not_equal(x, Categorical(vals[:-1] + ("zzz",)))
    assert_equal(x.rvs(random_state=1), random_val)
Ejemplo n.º 26
0
from experiment.experiment import Experiment
from experiment.hyper_param_opt import GridSearch
from models.tensorflow.model import Model
from models.tensorflow.tf_train_eval import TfTrainEvalModelFactory

if __name__ == '__main__':
    exp = Experiment('density/synthetic/sin_normal')

    conf.num_workers = 4
    conf.visible_device_list = [0,1]
    conf.eval_batch_size = {'0': 10000, '1': 10000}

    exp.data_loader = registry.sin_normal_noise()

    exp.model_factory = TfTrainEvalModelFactory(Model(name="RNADE_laplace"))

    exp.hyper_param_search = GridSearch([
        Categorical([1,20,50,100,150,200], name='km'),
        Categorical([20,60,100,140,200], name='sh'),

        Categorical([128], name='bs'),
        Categorical([1], name='rs'),

        Categorical(['AdamOptimizer'], name='opt'),
        Categorical([1e-4,1e-3,1e-2], name='opt_lr'),
    ])

    exp.early_stopping = EarlyStop(monitor_every_epoch=1, patience=[30])

    exp.run()
Ejemplo n.º 27
0
            ("preprocessing", preprocessing()),
            ("estimator", GradientBoostingRegressor()),
        ]
    )
    estimator_elastic_net = Pipeline(
        steps=[
            ("preprocessing", preprocessing()),
            ("estimator", ElasticNet(max_iter=10000)),
        ]
    )

    svm_search_space = {
        "estimator__C": Real(1e-6, 1e6, prior="log-uniform"),
        "estimator__gamma": Real(1e-6, 1e1, prior="log-uniform"),
        "estimator__degree": Integer(1, 8),
        "estimator__kernel": Categorical(["linear", "poly", "rbf"]),
    }
    rf_search_space = {
        "estimator__max_depth": Integer(low=10, high=50),
        "estimator__max_features": Categorical(["log2", "sqrt", None]),
        "estimator__min_samples_leaf": Integer(low=2, high=10),
        "estimator__min_samples_split": Integer(low=2, high=4),
        "estimator__n_estimators": Integer(low=40, high=200),
        "estimator__bootstrap": Categorical([True, False]),
    }

    gb_search_space = {
        "estimator__n_estimators": Integer(low=100, high=1000),
        "estimator__learning_rate": Real(low=0.025, high=0.5, prior="log-uniform"),
        "estimator__max_depth": Integer(low=2, high=15),
        "estimator__subsample": Real(low=0.5, high=1),
Ejemplo n.º 28
0
        "y_train : \n\n%s"%y_train
    )
    print(
        "X_test : \n\n%s"%X_test
    )
    print(
        "y_test : \n\n%s"%y_test
    )

    fbestmodel = "./GPMinModelTank"
    dim_first_neuron = Integer(low=3, high=300, name="first_neuron")
    dim_n_hidden = Integer(low=1, high=10, name="n_hidden")
    dim_magnifier = Real(low=1e-4, high=1.0, name="magnifier", prior="uniform")
    dim_dropout = Real(low=1e-3, high=0.5, name="dropout", prior="uniform")
    dim_lr = Real(low=3e-5, high=3e-2, name="lr")
    dim_first_activation = Categorical(categories=["relu","sigmoid","elu","tanh"],name="first_activation")
    dim_hidden_activation = Categorical(categories=["relu","sigmoid","elu","tanh"],name="hidden_activation")
    dim_last_activation = Categorical(categories=["relu","sigmoid","elu","tanh"],name="last_activation")
    dim_regulariser = Categorical(categories=[reg.l1, reg.l2, reg.l1_l2, None],name="regulariser")
    dim_l1 = Real(low=0,high=1e-1,name="l1")
    dim_l2 = Real(low=0,high=1e-1,name="l2")

    dimensions = [dim_first_neuron, dim_n_hidden, dim_magnifier, dim_dropout, dim_lr, dim_first_activation, dim_hidden_activation, dim_last_activation, dim_regulariser, dim_l1, dim_l2]
    
    best_error = 10000

    f = open("./gp_min_best_error.txt","w")
    f.write("%s"%best_error)
    f.close()

    @use_named_args(dimensions=dimensions)
Ejemplo n.º 29
0
    def optimize(self, x, y):
        """"
        n_estimators int, default=100, The number of trees in the forest. Changed in version 0.22: The default value of n_estimators changed from 10 to 100 in 0.22.

        criterion{“gini”, “entropy”}, default=”gini”, The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. Note: this parameter is tree-specific.

        max_depth int, default=None, The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.

        min_samples_split int or float, default=2, The minimum number of samples required to split an internal node: If int, then consider min_samples_split as the minimum number. If float, then min_samples_split is a fraction and ceil(min_samples_split * n_samples) are the minimum number of samples for each split. Changed in version 0.18: Added float values for fractions.

        min_samples_leaf int or float, default=1, The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least min_samples_leaf training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. If int, then consider min_samples_leaf as the minimum number. If float, then min_samples_leaf is a fraction and ceil(min_samples_leaf * n_samples) are the minimum number of samples for each node. Changed in version 0.18: Added float values for fractions.

        min_weight_fraction_leaf float, default=0.0 The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.

        max_leaf_nodes int, default=None, Grow trees with max_leaf_nodes in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.

        min_impurity_decrease float, default=0.0, A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:

        min_impurity_split float, default=None, Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf.

        ccp_alpha non-negative float, default=0.0, Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. See Minimal Cost-Complexity Pruning for details.

        New in version 0.22.

        max_samples int or float, default=None, If bootstrap is True, the number of samples to draw from X to train each base estimator.

        n_jobs int, default=None
        The number of jobs to run in parallel. fit, predict, decision_path and apply are all parallelized over the trees. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See Glossary for more details.

        random_state int or RandomState, default=None
        Controls both the randomness of the bootstrapping of the samples used when building trees (if bootstrap=True) and the sampling of the features to consider when looking for the best split at each node (if max_features < n_features). See Glossary for details.


        """
        self.iterations = []

        space = [
            Integer(10, 5000, name='n_estimators'),
            Categorical(['gini', 'entropy'], name='criterion'),
            Integer(1, 100, name='max_depth'),
            Integer(1, 100, name='min_samples_split'),
            Integer(1, 100, name='min_samples_leaf'),
            Real(1e-8, 1, name='min_weight_fraction_leaf'),
            Integer(1, 100, name='max_leaf_nodes'),
            Real(1e-8, 1, name='min_impurity_decrease'),
            Real(1e-8, 1, name='min_impurity_split')
        ]

        @use_named_args(space)
        def objective(
            n_estimators,
            criterion,
            max_depth,
            min_samples_split,
            min_samples_leaf,
            min_weight_fraction_leaf,
            max_leaf_nodes,
            min_impurity_decrease,
            min_impurity_split,
        ):
            try:
                scores = []

                params = {
                    'n_estimators': int(round(n_estimators, 0)),
                    'criterion': criterion,
                    'max_depth': max_depth,
                    'min_samples_split': min_samples_split,
                    'min_samples_leaf': min_samples_leaf,
                    'min_weight_fraction_leaf': min_weight_fraction_leaf,
                    'max_leaf_nodes': int(round(max_leaf_nodes, 0)),
                    'min_impurity_decrease': min_impurity_decrease,
                    'min_impurity_split': min_impurity_split,
                    'random_state': self.random_state
                }

                if isinstance(self.fixed_parameters, dict):
                    params.update(self.fixed_parameters)

                skf = StratifiedKFold(self.n_folds,
                                      shuffle=self.shuffle,
                                      random_state=self.random_state)

                for train_index, valid_index in skf.split(x, y):

                    x_train, y_train = x[train_index, :], y[train_index]
                    x_valid, y_valid = x[valid_index, :], y[valid_index]

                    rf = RandomForestClassifier(**params, n_jobs=-1)

                    rf.fit(x_train, y_train)

                    y_valid_hat = rf.predict(x_valid)

                    loss_valid = log_loss(y_valid, y_valid_hat)

                    scores.append(loss_valid)

                result = np.mean(scores)

                self.iterations.append((params, result))

                return result

            except ValueError:

                return np.inf

        return self.execute_optimization(objective, space)
from models.tensorflow.tf_train_eval import TfTrainEvalModelFactory

if __name__ == '__main__':
    exp = Experiment('density/synthetic/inv_sin_normal')

    conf.num_workers = 4
    conf.visible_device_list = [0, 1]
    conf.eval_batch_size = {'0': 10000, '1': 10000}

    exp.data_loader = registry.inv_sin_normal()

    exp.model_factory = TfTrainEvalModelFactory(
        Model(name="MONDE_copula_const_cov"))

    exp.hyper_param_search = GridSearch([
        Categorical([32, 64, 128], name='hxy_sh'),
        Categorical([1, 2, 3], name='hxy_nh'),
        Categorical([32, 64, 128], name='x_sh'),
        Categorical([1, 2, 3], name='x_nh'),
        Categorical([16, 32], name='hxy_x'),
        Categorical([0.05, 0.01], name='clr'),
        Categorical([128], name='bs'),
        Categorical([1], name='rs'),
        Categorical(['AdamOptimizer'], name='opt'),
        Categorical([1e-4, 1e-3, 1e-2], name='opt_lr'),
    ])

    exp.early_stopping = EarlyStop(monitor_every_epoch=1, patience=[30])

    exp.run()