Ejemplo n.º 1
0
def fit_hier_embedding(X, y, result_dir, project):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
    y_train = to_categorical(y_train, output_dim)
    y_test = to_categorical(y_test, output_dim)

    X_train1 = X_train[['Rating', 'CocoaPercent']].values
    X_train2 = X_train.drop(['Rating', 'CocoaPercent'], axis=1).values
    X_test1 = X_test[['Rating', 'CocoaPercent']].values
    X_test2 = X_test.drop(['Rating', 'CocoaPercent'], axis=1).values

    dim1 = X_train1.shape[1]
    dim2 = X_train2.shape[1]

    hp = HyperParameters()

    bm = lambda x: tune_optimizer_model(hp, dim1, dim2)

    print(dim1, dim2)
    tuner = RandomSearch(bm,
                         objective='val_accuracy',
                         max_trials=MAX_TRIALS,
                         executions_per_trial=EXECUTIONS_PER_TRIAL,
                         directory=result_dir,
                         project_name=project,
                         seed=32)

    TRAIN_EPOCHS = 1000

    tuner.search(x=[X_train1, X_train2],
                 y=y_train,
                 epochs=TRAIN_EPOCHS,
                 validation_data=([X_test1, X_test2], y_test))
    tuner.results_summary()
Ejemplo n.º 2
0
def CNN_Hyper():
    training_set = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_PATH + "processed/training",
        seed=957,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )
    validation_set = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_PATH + "processed/validation",
        seed=957,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )

    test_set = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_PATH + "processed/testing",
        seed=957,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )

    training_set = training_set.prefetch(buffer_size=32)
    validation_set = validation_set.prefetch(buffer_size=32)

    hyperModel = CNNHyperModel(IMAGE_SIZE + (3, ), CLASS_COUNT, "softmax")

    MAX_TRIALS = 20
    EXECUTION_PER_TRIAL = 1
    N_EPOCH_SEARCH = 25

    tuner = RandomSearch(hyperModel,
                         objective='val_accuracy',
                         seed=957,
                         max_trials=MAX_TRIALS,
                         executions_per_trial=EXECUTION_PER_TRIAL,
                         directory='random_search',
                         project_name='Stanford-Dogs-40_1')

    tuner.search_space_summary()

    tuner.search(training_set,
                 epochs=N_EPOCH_SEARCH,
                 validation_data=validation_set)

    # Show a summary of the search
    tuner.results_summary()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(test_set)
    print("Loss: ", loss)
    print("Accuracy: ", accuracy)
    best_model.summary()
    # Save model
    best_model.save('CNN_Tuned_Best_Model')


# https://www.sicara.ai/blog/hyperparameter-tuning-keras-tuner
Ejemplo n.º 3
0
def build_model(X_train, Y_train, X_test, Y_test):
    hyperModel = RegressionHyperModel((X_train.shape[1], ))

    tuner_rs = RandomSearch(hyperModel,
                            objective='mse',
                            max_trials=135,
                            executions_per_trial=1,
                            directory='param_opt_checkouts',
                            project_name='GDW')
    tuner_rs.search(X_train,
                    Y_train,
                    validation_data=(X_test, Y_test),
                    epochs=160)
    best_model = tuner_rs.get_best_models(num_models=1)[0]

    #metrics = ['loss', 'mse', 'mae', 'mape', 'cosine_proximity']
    #_eval = best_model.evaluate(X_test, Y_test)
    #print(_eval)
    #for i in range(len(metrics)):
    #    print(f'{metrics[i]} : {_eval[i]}')

    # history = best_model.fit(X_train, Y_train, validation_data = (X_test, Y_test), epochs=50)

    # best_model.save('./models_ANN/best_model')

    # save_model(best_model)
    tuner_rs.results_summary()
    print(load_model().summary())
    predict(best_model)
Ejemplo n.º 4
0
def main():

    dataset = makeHistoricalData(fixed_data, temporal_data, h, r, 'death',
                                 'mrmr', 'country', 'regular')

    numberOfSelectedCounties = len(dataset['county_fips'].unique())
    new_dataset = clean_data(dataset, numberOfSelectedCounties)
    X_train, y_train, X_val, y_val, X_test, y_test, y_train_date, y_test_date, y_val_date, val_naive_pred, test_naive_pred = preprocess(
        new_dataset)
    X_train, y_train, X_val, y_val, X_test, y_test, scalar = data_normalize(
        X_train, y_train, X_val, y_val, X_test, y_test)

    hypermodel = LSTMHyperModel(n=X_train.shape[2])

    tuner = RandomSearch(hypermodel,
                         objective='mse',
                         seed=1,
                         max_trials=60,
                         executions_per_trial=4,
                         directory='parameter_tuning',
                         project_name='lstm_model_tuning')

    tuner.search_space_summary()

    print()
    input("Press Enter to continue...")
    print()

    N_EPOCH_SEARCH = 50
    tuner.search(X_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.2)

    print()
    input("Press Enter to show the summary of search...")
    print()

    # Show a summary of the search
    tuner.results_summary()

    print()
    input("Press Enter to retrive the best model...")
    print()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    print()
    input("Press Enter to show best model summary...")
    print()

    best_model.summary()

    print()
    input("Press Enter to run the best model on test dataset...")
    print()

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(X_test, y_test)
    print("loss = " + str(loss) + ", acc = " + str(accuracy))
Ejemplo n.º 5
0
def random_keras_tuner(compiled_model, objective='val_accuracy', max_trials=5,
                       executions_per_trial=3):
    tuner = RandomSearch(
        compiled_model,
        objective=objective,
        max_trials=max_trials,
        executions_per_trial=executions_per_trial,
        directory='cryptolytic-ds',
        project_name='cryptolytic'
    )
    tuner.results_summary()
    return tuner
Ejemplo n.º 6
0
def find_best_NN(x_train, y_train):
  tuner = RandomSearch(build_model, objective="loss", max_trials=10, executions_per_trial=1)
  print("\n\n\n")
  print('[INFO] start searching')
  tuner.search(x_train, y_train, batch_size=100, epochs=10, validation_split=0.2)
  print("\n\n\nRESULTS SUMMARY")
  tuner.results_summary()
  print("\n\n\n")
  print("\n\n\nHERE IS THE BEST MODEL\n\n\n")
  best_params = tuner.get_best_hyperparameters()[0]
  best_model = tuner.hypermodel.build(best_params)
  best_model.summary()
  return best_model
Ejemplo n.º 7
0
def run_fn(fn_args):

  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=100)
  eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=100)

  log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='batch')

  if True:
    print("Use normal Keras model")
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
      model = build_keras_model(None)
    model.fit(
        train_dataset,
        epochs=1,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback])
  else:
    print("Use normal Keras Tuner")
    tuner = RandomSearch(
        build_keras_model,
        objective='val_binary_accuracy',
        max_trials=5,
        executions_per_trial=3,
        directory=fn_args.serving_model_dir,
        project_name='tuner')
    tuner.search(
        train_dataset,
        epochs=1,
        steps_per_epoch=fn_args.train_steps, # or few steps to get best HP and then well fit
        validation_steps=fn_args.eval_steps,
        validation_data=eval_dataset,
        callbacks=[tensorboard_callback, tf.keras.callbacks.EarlyStopping()])
    tuner.search_space_summary()
    tuner.results_summary()
    best_hparams = tuner.oracle.get_best_trials(1)[0].hyperparameters.get_config()
    model = tuner.get_best_models(1)[0]

  signatures = {
      'serving_default': get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function(
          tf.TensorSpec(shape=[None],
                        dtype=tf.string,
                        name='input_example_tensor')),
  }

  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Ejemplo n.º 8
0
def search(
    epochs: int,
    n_trials: int,
    execution_per_trial: int,
    project: Text = "test",
    cleanup: bool = False,
):
    start_time = datetime.now()

    results_path = os.path.join(SEARCH_DIR, project)
    if cleanup and os.path.exists(results_path):
        shutil.rmtree(results_path)

    ds_tr = input_fn(
        "data/train_covertype.csv", shuffle=True, batch_size=DEFAULTS["batch_size"]
    )
    ds_val = input_fn(
        "data/val_covertype.csv", shuffle=False, batch_size=DEFAULTS["batch_size"]
    )

    num_train_steps = np.floor(N_TR_SAMPLES / DEFAULTS["batch_size"])
    num_valid_steps = np.floor(N_VAL_SAMPLES / DEFAULTS["batch_size"])

    # RandomSearch, BayesianOptimization
    tuner = RandomSearch(
        build_model,
        objective="val_loss",
        max_trials=n_trials,
        executions_per_trial=execution_per_trial,
        directory=SEARCH_DIR,
        project_name=project,
    )

    # tuner.search_space_summary()

    tuner.search(
        ds_tr,
        epochs=epochs,
        validation_data=ds_val,
        steps_per_epoch=num_train_steps,
        validation_steps=num_valid_steps,
    )

    # models = tuner.get_best_models(num_models=1)

    tuner.results_summary(num_trials=2)

    print(f"Total runtime: {(datetime.now() - start_time).seconds / 60:.2f} mins")
Ejemplo n.º 9
0
    def _fit(self, X_train, y_train, X_test, y_test, X_val, y_val):
        tuner = RandomSearch(self._build_model,
                             objective='val_accuracy',
                             max_trials=self.max_trials,
                             executions_per_trial=1,
                             directory='logs/keras-tuner/',
                             project_name='cnn')

        tuner.search_space_summary()

        tuner.search(x=X_train,
                     y=y_train,
                     epochs=self.epochs,
                     batch_size=self.batch_size,
                     verbose=0,
                     validation_data=(X_val, y_val),
                     callbacks=[EarlyStopping('val_accuracy', patience=4)])
        print('kakkanat\n\n\n\n\n\n')
        print(tuner.results_summary())
        model = tuner.get_best_models(num_models=1)[0]
        print(model.summary())

        # Evaluate Best Model #
        _, train_acc = model.evaluate(X_train, y_train, verbose=0)
        _, test_acc = model.evaluate(X_test, y_test, verbose=0)
        print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
Ejemplo n.º 10
0
 def search_bestCNN(self,
                    X,
                    Y,
                    testX,
                    testY,
                    epochs=50,
                    max_trails=20,
                    batch_size=64,
                    project_name='A1'):
     tuner = RandomSearch(self._build_CNN,
                          objective='val_accuracy',
                          max_trials=max_trails,
                          executions_per_trial=1,
                          directory='tunerlog',
                          project_name=project_name)
     tuner.search(x=X,
                  y=Y,
                  epochs=epochs,
                  batch_size=batch_size,
                  validation_data=(testX, testY),
                  callbacks=[
                      tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                       patience=5)
                  ],
                  verbose=2)
     tuner.search_space_summary()
     print(tuner.results_summary())
     print('best_hyperparameters')
     print(tuner.get_best_hyperparameters()[0].values)
     return tuner.get_best_models()
Ejemplo n.º 11
0
def find_best_NN(x_train, y_train):
  # создаю тюнер, который сможет подобрать оптимальную архитектуру модели
  tuner = RandomSearch(build_model, objective="val_mae", max_trials=40, executions_per_trial=1,)
  print("\n\n\n")
  # начинается автоматический подбор гиперпараметров
  print('[INFO] start searching')
  tuner.search(x_train, y_train, batch_size=500, epochs=150, validation_split=0.3)
  # выбираем лучшую модель
  print("\n\n\nRESULTS SUMMARY")
  tuner.results_summary()
  print("\n\n\n")
  # получаем лучшую модель
  print("\n\n\nHERE IS THE BEST MODEL\n\n\n")
  best_params = tuner.get_best_hyperparameters()[0]
  best_model = tuner.hypermodel.build(best_params)
  best_model.summary()
  return best_model
Ejemplo n.º 12
0
def KerasTuner(XTrain, YTrain, XValidation, YValidation):
    tuner = RandomSearch(buildModel,
                         objective='mse',
                         max_trials=30,
                         executions_per_trial=10,
                         directory='KerasTuner',
                         project_name=f'KerasTuner-{constants.NAME}')

    tuner.search_space_summary()

    tuner.search(XTrain,
                 YTrain,
                 epochs=5,
                 validation_data=(XValidation, YValidation))

    models = tuner.get_best_models(num_models=1)

    tuner.results_summary()

    return models
Ejemplo n.º 13
0
def keras_tuner(x_train, y_train, x_test, y_test):
    from kerastuner.tuners import RandomSearch
    tuner = RandomSearch(build_model,
                         objective='val_accuracy',
                         max_trials=5,
                         executions_per_trial=3,
                         directory='./test',
                         project_name='helloworld')

    tuner.search_space_summary()

    tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

    print(tuner.results_summary())
Ejemplo n.º 14
0
def tune():

    tuner = RandomSearch(tuner_model,
                         objective="val_accuracy",
                         max_trials=100,
                         executions_per_trial=1,
                         directory=LOG_DIR,
                         project_name='final_year_project')

    tuner.search(x=x_train,
                 y=y_train,
                 epochs=3,
                 batch_size=64,
                 validation_data=(x_test, y_test))

    with open("tuner.pkl", "wb") as f:
        pickle.dump(tuner, f)

    tuner = pickle.load(open("tuner.pkl", "rb"))

    print(tuner.get_best_hyperparameters()[0].values)
    print(tuner.results_summary())
    print(tuner.get_best_models()[0].summary())
Ejemplo n.º 15
0
      # model.summary()

      return model


# Declare tuner:
tuner = RandomSearch(build_model,
                     objective = 'val_accuracy',
                     max_trials = 1000,
                     executions_per_trial = 3,
                     directory = logdir,
                     project_name = project_name)


tuner.search_space_summary()

# Define callbacks:
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor = 'loss', factor = 0.5, patience = 50, min_lr = 0.0001)
callbacks = [reduce_lr]

# Train model:
tuner.search(x_train, y_train, batch_size = mini_batch_size, epochs = num_epochs,
		 verbose = 1, validation_data = (x_test, y_test), callbacks = callbacks)


sys.stdout = open(logdir + project_name + '/results.txt', 'w')
tuner.results_summary(num_trials = 1000)

model = tuner.get_best_models(num_models = 1)[0]
model.save(logdir + project_name + '/best_model.hdf5')
def main():
    parser = argparse.ArgumentParser(
        description=
        "Train a bidirectional LSTM model for text sentiment classification")
    parser.add_argument("--train_mode",
                        type=str,
                        default='preset_param',
                        choices=['preset_param', 'kerastuner'],
                        help="Set the training mode (preset_param/kerastuner)")
    parser.add_argument("--batch_size",
                        type=int,
                        default=64,
                        help="Batch size")
    parser.add_argument("--sen_len",
                        type=int,
                        default=20,
                        help="Maximum length of a sentence")
    parser.add_argument("--lstm1",
                        type=int,
                        default=32,
                        help="Hidden dimension of first LSTM")
    parser.add_argument("--lstm2",
                        type=int,
                        default=32,
                        help="Hidden dimension of second LSTM")
    parser.add_argument("--dp_rate",
                        type=float,
                        default=0.5,
                        help="Dropout rate (percentage of droping)")
    parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate")
    parser.add_argument("--epochs", type=int, default=1, help="epochs")

    parser.add_argument("--epochs_before_search",
                        type=int,
                        default=1,
                        help="epochs_before_search")
    parser.add_argument("--epochs_after_search",
                        type=int,
                        default=1,
                        help="epochs_after_search")
    parser.add_argument("--max_trials",
                        type=int,
                        default=1,
                        help="max_trials for kerastuner")
    parser.add_argument("--executions_per_trial",
                        type=int,
                        default=1,
                        help="executions_per_trial for kerastuner")

    args = parser.parse_args()

    # Setup paths
    path_prefix = Path.cwd()
    train_with_label = os.path.join(path_prefix, 'data/training_label.txt')
    train_no_label = os.path.join(path_prefix, 'data/training_nolabel.txt')
    testing_data = os.path.join(path_prefix, 'data/testing_data.txt')
    w2v_path = path_prefix.joinpath('model/w2v_all.model')

    # Configuration
    batch_size = args.batch_size
    sen_len = args.sen_len

    # Preprocess dataset
    ## Read 'training_label.txt' and 'training_nolabel.txt'
    print("loading training data ...")
    X_train_lable, y_train_lable = load_training_data(train_with_label)
    X_train, X_val, y_train, y_val = train_test_split(X_train_lable,
                                                      y_train_lable,
                                                      test_size=0.1)

    train_x_no_label = load_training_data(train_no_label)

    print(
        f"Positive rate in training dataset: {np.sum(y_train) / len(y_train)}")
    print(f"Positive rate in validation dataset: {np.sum(y_val) / len(y_val)}")

    ## Build the preprocessor
    preprocessor = Preprocess(sen_len, w2v_path=str(w2v_path))
    embedding = preprocessor.make_embedding(load=True)
    X_train_idx = preprocessor.sentences_word2idx(X_train)
    X_val_idx = preprocessor.sentences_word2idx(X_val)

    print(f"Pretrained embedding matrix shape: {embedding.shape}")

    ## Preprocess training and validation datasets
    X_train_idx_dataset = tf.data.Dataset.from_tensor_slices(X_train_idx)
    y_train_dataset = tf.data.Dataset.from_tensor_slices(y_train)
    train_dataset = tf.data.Dataset.zip((X_train_idx_dataset, y_train_dataset))

    X_val_idx_dataset = tf.data.Dataset.from_tensor_slices(X_val_idx)
    y_val_dataset = tf.data.Dataset.from_tensor_slices(y_val)
    val_dataset = tf.data.Dataset.zip((X_val_idx_dataset, y_val_dataset))

    train_dataset = train_dataset.batch(batch_size)
    val_dataset = val_dataset.batch(batch_size)

    train_dataset = train_dataset.cache().prefetch(AUTOTUNE)
    val_dataset = val_dataset.cache().prefetch(AUTOTUNE)

    # Train a bidirectional LSTM model
    train_embedding = False  # fix embedding during training

    ## Method1 - preset parameters
    if args.train_mode == 'preset_param':
        ### Build the model
        hidden_dim1 = args.lstm1
        hidden_dim2 = args.lstm2
        dp_rate = args.dp_rate
        lr = args.lr
        epochs = args.epochs

        model = buildModel(embedding, train_embedding, sen_len, hidden_dim1,
                           hidden_dim2, dp_rate, lr)

        model.summary()

        ### Train the model
        checkpoint_filepath = os.path.join(path_prefix, 'ckpt/')
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath, save_best_only=True)

        history = model.fit(train_dataset,
                            validation_data=val_dataset,
                            epochs=epochs,
                            callbacks=[model_checkpoint_callback])

    elif args.train_mode == 'kerastuner':
        import IPython
        from kerastuner.tuners import RandomSearch

        class ClearTrainingOutput(tf.keras.callbacks.Callback):
            def on_train_end(*args, **kwargs):
                IPython.display.clear_output(wait=True)

        ### Build the model
        tuner = RandomSearch(BiLstmTuner(embedding, train_embedding, sen_len),
                             objective='val_accuracy',
                             max_trials=args.max_trials,
                             executions_per_trial=args.executions_per_trial,
                             directory=os.path.join(path_prefix, 'tuner_dir'),
                             project_name='tsc')

        ### Train the model
        tuner.search(
            train_dataset,
            epochs=args.epochs_before_search,
            validation_data=val_dataset,
            verbose=1,
            callbacks=[ClearTrainingOutput()],
        )

    # Load the best model
    print('\nload model ...')

    ## Method1
    if args.train_mode == 'preset_param':
        best_model = tf.keras.models.load_model(checkpoint_filepath)
    ## Method2
    elif args.train_mode == 'kerastuner':
        tuner.results_summary(num_trials=min(3, args.max_trials))
        best_model = tuner.get_best_models()[0]
        best_model.summary()

        # Train again with training set and validation set
        combined_dataset = train_dataset.concatenate(val_dataset)
        best_model.fit(combined_dataset, epochs=args.epochs_after_search)

    # Testing
    ## Preprocess test dataset
    print("loading testing data ...")
    X_test = load_testing_data(testing_data)
    X_test_idx = preprocessor.sentences_word2idx(X_test)

    test_dataset = tf.data.Dataset.from_tensor_slices(X_test_idx)
    test_dataset = test_dataset.batch(batch_size)
    test_dataset = test_dataset.cache().prefetch(AUTOTUNE)

    ## Predict
    outputs = testing(best_model, test_dataset)

    # Write the result to a CSV file
    tmp = pd.DataFrame({
        "id": [str(i) for i in range(len(X_test))],
        "label": outputs
    })
    print("save csv ...")
    tmp.to_csv(os.path.join(path_prefix, 'predict.csv'), index=False)
    print("Finish Predicting")
Ejemplo n.º 17
0
    def buttonClicked1(self):

        train_dir = 'train'
        val_dir = 'val'
        test_dir = 'test'
        img_width, img_height = 150, 150
        input_shape = (img_width, img_height, 3)

        epochs = self.InputEpochs.value()
        Nclasses = self.InputClass.value()
        batch_size = self.InputBatch.value()
        nb_train_samples = self.InputTrain.value()
        nb_validation_samples = self.InputValidation.value()
        nb_test_samples = self.InputTest.value()

        l=0

        def build_model(hp):  
            model = Sequential()

            num_hidden_layers = hp.Int('num_hidden_layers', 1, 3, default=1)
            num_conv_layers = hp.Int('num_conv_layers', 2, 6, default=2)

            model.add(Conv2D(32, (3, 3), input_shape=input_shape))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for i in range(num_conv_layers):
                filters = hp.Int('filters'+str(i), 32, 64, step=16)
                model.add(Conv2D(filters,(3, 3)))
                model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Conv2D(128, (3, 3)))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Flatten())
    
            for j in range(num_hidden_layers):
                model.add(Dense(units=hp.Int('units_hiddenNeurons_'+str(j),
                                             min_value=128,
                                             max_value=1024,
                                             step=64),
                                activation=hp.Choice('activation'+str(j),values=['relu','tanh','elu','selu'])))

            model.add(Dropout(0.5))
            model.add(Dense(Nclasses))
            model.add(Activation('softmax'))
            model.compile(
                loss='categorical_crossentropy',
                optimizer=hp.Choice('optimizer', values=['adam','rmsprop','SGD'],default='adam'),
                metrics=['accuracy'])
            return model

        tuner = RandomSearch(
            build_model,
            objective='val_accuracy',
            max_trials=15,
            directory='test_directory')

        tuner.search_space_summary()

        datagen = ImageDataGenerator(rescale=1. / 255)

        train_generator = datagen.flow_from_directory(
            train_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='categorical')

        val_generator = datagen.flow_from_directory(
            val_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='categorical')

        test_generator = datagen.flow_from_directory(
            test_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='categorical')

        tuner.search(
            train_generator,
            steps_per_epoch=nb_train_samples // batch_size,
            epochs=epochs,
            validation_data=val_generator,
            validation_steps=nb_validation_samples // batch_size)

        tuner.results_summary()

        models = tuner.get_best_models(num_models=3)

        for model in models:
            model.summary()
            l=l+1
            scores = model.evaluate_generator(test_generator, nb_test_samples // batch_size)
            model.save('bestmodel_'+str(l)+'.h5')
            print("Аккуратность на тестовых данных: %.2f%%" % (scores[1]*100))
Ejemplo n.º 18
0
def search(
    epochs: int,
    batch_size: int,
    n_trials: int,
    execution_per_trial: int,
    project: Text,
    do_cleanup: bool,
):
    set_seed(SEED)

    dir_to_clean = os.path.join(SEARCH_DIR, project)
    if do_cleanup and os.path.exists(dir_to_clean):
        shutil.rmtree(dir_to_clean)

    # first 80% for train. remaining 20% for val & test dataset for final eval.
    ds_tr, ds_val, ds_test = tfds.load(
        name="mnist",
        split=["train[:80%]", "train[-20%:]", "test"],
        data_dir="mnist",
        shuffle_files=False,
    )

    ds_tr = prepare_dataset(ds_tr,
                            batch_size,
                            shuffle=True,
                            drop_remainder=True)
    ds_val = prepare_dataset(ds_val,
                             batch_size,
                             shuffle=False,
                             drop_remainder=False)
    ds_test = prepare_dataset(ds_test,
                              batch_size,
                              shuffle=False,
                              drop_remainder=False)

    tuner = RandomSearch(
        build_model,
        objective="val_accuracy",
        max_trials=n_trials,
        executions_per_trial=execution_per_trial,
        directory=SEARCH_DIR,
        project_name=project,
    )

    # ? add callbacks
    tuner.search(
        ds_tr,
        epochs=epochs,
        validation_data=ds_val,
    )

    best_model: tf.keras.Model = tuner.get_best_models(num_models=1)[0]
    best_model.build((None, DEFAULTS["num_features"]))
    results = best_model.evaluate(ds_test, return_dict=True)

    tuner.results_summary(num_trials=1)
    best_hyperparams = tuner.get_best_hyperparameters(num_trials=1)
    print(f"Test results: {results}")

    output = {"results": results, "best_hyperparams": best_hyperparams}
    with open("search_results.pickle", "wb") as f:
        pickle.dump(output, f)
Ejemplo n.º 19
0
def tune(cfg):
    # =========
    # Configure
    # =========

    cfg = yaml.full_load(open(cfg))
    # Go deep
    algName = [nm for nm in cfg][0]
    cfg = cfg[algName]

    # ======
    # Logger
    # ======

    logger = get_logger('Tune', 'INFO')

    # =======
    # Dataset
    # =======

    lmdb_dir = cfg['lmdb_dir']
    length = 4000
    train = 2000
    split = length - train

    s = np.arange(0, length)
    np.random.shuffle(s)

    # *** hardcoded shapes *** #
    y = list(
        islice(decaymode_generator(lmdb_dir, "Label", (), np.long), length))
    X_1 = list(
        islice(decaymode_generator(lmdb_dir, "ChargedPFO", (3, 6), np.float32),
               length))
    X_2 = list(
        islice(
            decaymode_generator(lmdb_dir, "NeutralPFO", (8, 21), np.float32),
            length))
    X_3 = list(
        islice(decaymode_generator(lmdb_dir, "ShotPFO", (6, 6), np.float32),
               length))
    X_4 = list(
        islice(decaymode_generator(lmdb_dir, "ConvTrack", (4, 6), np.float32),
               length))

    y = np.asarray(y)[s]
    X_1, X_2, X_3, X_4 = np.asarray(X_1)[s], np.asarray(X_2)[s], np.asarray(
        X_3)[s], np.asarray(X_4)[s]

    y_train = y[:-split]
    X_train_1, X_train_2, X_train_3, X_train_4 = X_1[:
                                                     -split], X_2[:
                                                                  -split], X_3[:
                                                                               -split], X_4[:
                                                                                            -split]

    y_valid = y[-split:]
    X_valid_1, X_valid_2, X_valid_3, X_valid_4 = X_1[-split:], X_2[
        -split:], X_3[-split:], X_4[-split:]

    # =====
    # Model
    # =====

    # build algs architecture, then print to console
    model_ftn = partial(getattr(ModelModule, cfg['model']), cfg['arch'])
    model = model_ftn()
    logger.info(model.summary())

    hp = HyperParameters()

    hp.Fixed("n_layers_tdd_default", 3)
    hp.Fixed("n_layers_fc_default", 3)

    tuner = RandomSearch(
        getattr(ModelModule, cfg['tune_model']),
        hyperparameters=hp,
        tune_new_entries=True,
        objective='val_loss',
        max_trials=20,
        executions_per_trial=2,
        directory=os.path.join(cfg['save_dir'], cfg['tune']),
        project_name=cfg['tune'],
        distribution_strategy=tf.distribute.MirroredStrategy(),
    )

    logger.info('Search space summary: ')
    tuner.search_space_summary()

    logger.info('Now searching ... ')
    tuner.search([X_train_1, X_train_2, X_train_3, X_train_4],
                 y_train,
                 steps_per_epoch=int(train / 200),
                 epochs=20,
                 validation_steps=int(split / 200),
                 validation_data=([X_valid_1, X_valid_2, X_valid_3,
                                   X_valid_4], y_valid),
                 workers=10,
                 verbose=0)

    logger.info('Done! ')
    models = tuner.get_best_models(num_models=8)
    tuner.results_summary()

    logger.info('Saving best models ... ')
    for i, model in enumerate(models):
        arch = model.to_json()
        with open(
                os.path.join(cfg['save_dir'], cfg['tune'],
                             f'architecture-{i}.json'), 'w') as arch_file:
            arch_file.write(arch)
        model.save_weights(
            os.path.join(cfg['save_dir'], cfg['tune'], f'weights-{i}.h5'), 'w')
    logger.info('Done! ')
Ejemplo n.º 20
0
def main(hp_file, data_file, terms_file, gos_file, model_file, out_file, fold,
         batch_size, epochs, load, logger_file, threshold, device):
    gos_df = pd.read_pickle(gos_file)
    gos = gos_df['gos'].values.flatten()
    gos_dict = {v: i for i, v in enumerate(gos)}

    # cross validation settings
    model_file = f'fold{fold}_exp-' + model_file
    out_file = f'fold{fold}_exp-' + out_file
    params = {
        'input_shape': (len(gos), ),
        'exp_shape': 53,
        'nb_layers': 1,
        'loss': 'binary_crossentropy',
        'rate': 0.3,
        'learning_rate': 0.0001,
        'units': 1500,  # 750
        'model_file': model_file
    }

    print('Params:', params)
    global hpo
    hpo = Ontology(hp_file, with_rels=True)
    terms_df = pd.read_pickle(terms_file)
    global terms
    terms = terms_df['terms'].values.flatten()
    print('Phenotypes', len(terms))
    global term_set
    term_set = set(terms)
    train_df, valid_df, test_df = load_data(data_file, terms, fold)
    terms_dict = {v: i for i, v in enumerate(terms)}
    hpo_matrix = get_hpo_matrix(hpo, terms_dict)
    nb_classes = len(terms)
    params['nb_classes'] = nb_classes
    print(len(terms_dict))
    test_steps = int(math.ceil(len(test_df) / batch_size))
    test_generator = DFGenerator(test_df, gos_dict, terms_dict, len(test_df))
    valid_steps = int(math.ceil(len(valid_df) / batch_size))
    train_steps = int(math.ceil(len(train_df) / batch_size))

    xy_generator = DFGenerator(train_df, gos_dict, terms_dict, len(train_df))
    x, y = xy_generator[0]
    val_generator = DFGenerator(valid_df, gos_dict, terms_dict, len(valid_df))
    val_x, val_y = val_generator[0]
    test_x, test_y = test_generator[0]

    # train_generator = DFGenerator(train_df, gos_dict, terms_dict,
    #                               batch_size)
    # valid_generator = DFGenerator(valid_df, gos_dict, terms_dict,
    #                               batch_size)

    with tf.device(device):
        if load:
            print('Loading pretrained model')
            model = load_model(model_file,
                               custom_objects={'HPOLayer': HPOLayer})
            flat_model = load_model(model_file + '_flat.h5')
        else:
            print('Creating a new model')
            flat_model = MyHyperModel(params)
            # flat_model = create_flat_model(params)

            print("Training data size: %d" % len(train_df))
            print("Validation data size: %d" % len(valid_df))
            checkpointer = ModelCheckpoint(filepath=model_file + '_flat.h5',
                                           verbose=1,
                                           save_best_only=True)
            earlystopper = EarlyStopping(monitor='val_loss',
                                         patience=6,
                                         verbose=1)
            logger = CSVLogger(logger_file)

            # print('Starting training the flat model')
            # flat_model.summary()
            # flat_model.fit(
            #     train_generator,
            #     steps_per_epoch=train_steps,
            #     epochs=epochs,
            #     validation_data=valid_generator,
            #     validation_steps=valid_steps,
            #     max_queue_size=batch_size,
            #     workers=12,
            #     callbacks=[checkpointer, earlystopper])

            tuner = RandomSearch(flat_model,
                                 objective='val_loss',
                                 max_trials=50,
                                 directory='data-cafa',
                                 project_name='pheno')
            tuner.search(x,
                         y,
                         epochs=100,
                         validation_data=(val_x, val_y),
                         callbacks=[earlystopper])
            tuner.results_summary()
            logging.info('Loading best model')
            flat_model = tuner.get_best_models(num_models=1)[0]
            flat_model.summary()
            loss = flat_model.evaluate(val_x, val_y)
            print('Valid loss %f' % loss)
            flat_model.save(model_file + '_flat.h5')

            model = create_model(params, hpo_matrix)

            checkpointer = ModelCheckpoint(filepath=model_file,
                                           verbose=1,
                                           save_best_only=True)
            model.summary()
            print('Starting training the model')
            model.fit(x,
                      y,
                      epochs=epochs,
                      batch_size=batch_size,
                      validation_data=(val_x, val_y),
                      callbacks=[logger, checkpointer, earlystopper])

            logging.info('Loading best model')
            model = load_model(model_file,
                               custom_objects={'HPOLayer': HPOLayer})
            flat_model = load_model(model_file + '_flat.h5')

        logging.info('Evaluating model')
        loss = flat_model.evaluate(test_x, test_y, batch_size=batch_size)
        print('Flat Test loss %f' % loss)
        loss = model.evaluate(test_x, test_y, batch_size=batch_size)
        print('Test loss %f' % loss)

        logging.info('Predicting')
        start_time = time.time()
        preds = model.predict(test_x, batch_size=batch_size, verbose=1)
        end_time = time.time()
        run_time = (end_time - start_time)
        n_genes = len(test_df)
        print(f'Running time for {n_genes} is {run_time} sec')
        flat_preds = flat_model.predict(test_x,
                                        batch_size=batch_size,
                                        verbose=1)

        all_terms_df = pd.read_pickle('data/all_terms.pkl')
        all_terms = all_terms_df['terms'].values
        all_terms_dict = {v: k for k, v in enumerate(all_terms)}
        all_labels = np.zeros((len(test_df), len(all_terms)), dtype=np.int32)
        for i, row in enumerate(test_df.itertuples()):
            for hp_id in row.hp_annotations:
                if hp_id in all_terms_dict:
                    all_labels[i, all_terms_dict[hp_id]] = 1

        all_preds = np.zeros((len(test_df), len(all_terms)), dtype=np.float32)
        all_flat_preds = np.zeros((len(test_df), len(all_terms)),
                                  dtype=np.float32)
        for i in range(len(test_df)):
            for j in range(nb_classes):
                all_preds[i, all_terms_dict[terms[j]]] = preds[i, j]
                all_flat_preds[i, all_terms_dict[terms[j]]] = flat_preds[i, j]
        logging.info('Computing performance:')
        roc_auc = compute_roc(all_labels, all_preds)
        print('ROC AUC: %.2f' % (roc_auc, ))
        flat_roc_auc = compute_roc(all_labels, all_flat_preds)
        print('FLAT ROC AUC: %.2f' % (flat_roc_auc, ))
        test_df['preds'] = list(preds)
        print(test_df)
        logging.info('Saving predictions')
        test_df.to_pickle(out_file)

        test_df['preds'] = list(flat_preds)
        test_df.to_pickle(out_file + '_flat.pkl')
Ejemplo n.º 21
0
class AutoEncoder():
    def __init__(self, df_source_info, df_fluxes, df_wavelengths):
        X = self._prepare_data(df_source_info, df_fluxes, df_wavelengths)
        objids = self.df_quasars['objid'].values
        print(f'objids = {objids}')

        X_train, X_test = train_test_split(X, 0.2)
        self.objids_train, self.objids_test = train_test_split(objids, 0.2)
        
        self.scaler = StandardScaler()
        X_train = self.scaler.fit_transform(X_train)
        X_test = self.scaler.transform(X_test)

        self.X_train = np.expand_dims(X_train, axis=2)
        self.X_test = np.expand_dims(X_test, axis=2)
        
        print(f'self.X_train = {self.X_train}')
        
        self.optimizer = Nadam(lr=0.001)

    
    def _prepare_data(self, df_source_info, df_fluxes, df_wavelengths):
        if "b'" in str(df_source_info['class'][0]):
            df_source_info = remove_bytes_from_class(df_source_info)
    
        self.df_quasars = df_source_info.loc[df_source_info['class'] == 'QSO']
        quasar_objids = self.df_quasars['objid'].to_numpy()
        quasar_fluxes = df_fluxes.loc[df_fluxes['objid'].isin(quasar_objids)]
        
        X = np.delete(quasar_fluxes.values, 0, axis=1)
        X = X[:, 0::8]
        print(f'X.shape = {X.shape}')

        X = X[:, np.mod(np.arange(X[0].size),25)!=0]

        print(f'X.shape {X.shape}')
        wavelengths = df_wavelengths.to_numpy()

        wavelengths = wavelengths[::8]
        self.wavelengths = wavelengths[0:448]
        # plot_spectrum(X[0], wavelengths)
        return X
    
    def build_model(self, hp):

        hyperparameters = {
            'layer_1_filters': hp.Choice('layer_1_filters', values=[16, 32, 64, 128, 256], default=64),
            'layer_1_kernel_size': hp.Choice('layer_1_kernel_size', values=[3, 5, 7, 9, 11]),
            'layer_2_filters': hp.Choice('layer_2_filters', values=[8, 16, 32, 64, 128], default=32),
            'layer_2_kernel_size': hp.Choice('layer_2_kernel_size', values=[3, 5, 7, 9]),
            'layer_3_filters': hp.Choice('layer_3_filters', values=[4, 8, 16, 32], default=32),
            'layer_3_kernel_size': hp.Choice('layer_3_kernel_size', values=[3, 5, 7]),
            'layer_4_filters': hp.Choice('layer_4_filters', values=[4, 8, 12, 16], default=16),
            'layer_4_kernel_size': hp.Choice('layer_4_kernel_size', values=[3, 5]),
            'layer_5_filters': hp.Choice('layer_5_filters', values=[2, 3, 4, 8], default=8),
            'layer_5_kernel_size': hp.Choice('layer_5_kernel_size', values=[3]),
            'optimizer': hp.Choice('optimizer', values=['adam', 'nadam', 'rmsprop']),
            'last_activation': hp.Choice('last_activation', ['tanh'])
        }
        
        # ================================================================================== #
        # ==================================== ENCODER ===================================== #
        # ================================================================================== #
        
        input_layer = Input(shape=(self.X_train.shape[1], 1))

        # encoder
        x = Conv1D(filters=hyperparameters['layer_1_filters'],
                   kernel_size=hyperparameters['layer_1_kernel_size'],
                   activation='relu', 
                   padding='same')(input_layer)

        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_2_filters'],
                    kernel_size=hyperparameters['layer_2_kernel_size'],
                    activation='relu',
                    padding='same')(x)
        
        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_3_filters'],
                    kernel_size=hyperparameters['layer_3_kernel_size'],
                    activation='relu',
                    padding='same')(x)

        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_4_filters'],
                    kernel_size=hyperparameters['layer_4_kernel_size'],
                    activation='relu',
                    padding='same')(x)

        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_5_filters'],
                    kernel_size=hyperparameters['layer_5_kernel_size'],
                    activation='relu',
                    padding='same')(x)

        encoded = MaxPooling1D(2, padding="same")(x)

        # ================================================================================== #
        # ==================================== DECODER ===================================== #
        # ================================================================================== #

        x = Conv1D(filters=hyperparameters['layer_5_filters'],
                   kernel_size=hyperparameters['layer_5_kernel_size'],
                   activation='relu',
                   padding='same')(encoded)
        
        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_4_filters'],
                   kernel_size=hyperparameters['layer_4_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_3_filters'],
                   kernel_size=hyperparameters['layer_3_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_2_filters'],
                   kernel_size=hyperparameters['layer_2_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_1_filters'],
                   kernel_size=hyperparameters['layer_1_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)
        decoded = Conv1D(1, 1, activation=hyperparameters['last_activation'], padding='same')(x)
        
        self.autoencoder = Model(input_layer, decoded)
        self.autoencoder.summary()
        self.autoencoder.compile(loss='mse', optimizer=hyperparameters['optimizer'])

        return self.autoencoder
    
    def train_model(self, epochs, batch_size=32):
        self.tuner = RandomSearch(self.build_model,
                                  objective='val_loss',
                                  max_trials=50,
                                  executions_per_trial=1,
                                  directory='logs/keras-tuner/',
                                  project_name='autoencoder')

        self.tuner.search_space_summary()

        self.tuner.search(x=self.X_train,
                          y=self.X_train,
                          epochs=24,
                          batch_size=32,
                          validation_data=(self.X_test, self.X_test),
                          callbacks=[EarlyStopping('val_loss', patience=3)])

        self.tuner.results_summary()

    def evaluate_model(self):
        best_model = self.tuner.get_best_models(1)[0]
        best_model.save('best_autoencoder_model')
        best_hyperparameters = self.tuner.get_best_hyperparameters(1)[0]

        print(f'best_model = {best_model}')
        print(f'best_hyperparameters = {self.tuner.results_summary()[0]}')
        nth_qso = 24

        X_test = np.squeeze(self.X_test, axis=2)

        preds = best_model.predict(self.X_test)
        preds = self.scaler.inverse_transform(np.squeeze(preds, axis=2))
        original = self.scaler.inverse_transform(X_test)

        qso_ra = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['ra'].values[0]
        qso_dec = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['dec'].values[0]
        qso_plate = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['plate'].values[0]
        qso_z = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['z'].values[0]

        plotify = Plotify(theme='ugly') 

        _, axs = plotify.get_figax(nrows=2, figsize=(8, 8))
        axs[0].plot(self.wavelengths, original[nth_qso], color=plotify.c_orange)
        axs[1].plot(self.wavelengths, preds[nth_qso], color=plotify.c_orange)
        axs[0].set_title(f'ra = {qso_ra}, dec = {qso_dec}, z = {qso_z}, plate = {qso_plate}', fontsize=14)
        axs[1].set_title(f'Autoencoder recreation')
        axs[0].set_ylabel(r'$F_{\lambda[10^{-17} erg \: cm^{-2}s^{-1} Å^{-1}]}$', fontsize=14)
        axs[1].set_ylabel(r'$F_{\lambda[10^{-17} erg \: cm^{-2}s^{-1} Å^{-1}]}$', fontsize=14)
        axs[1].set_xlabel('Wavelength (Å)')

        plt.subplots_adjust(hspace=0.4)
        # plt.savefig('plots/autoencoder_gaussian', facecolor=plotify.c_background, dpi=180)
        plt.show()

        return preds
def train_keras(train_dir, validation_dir, hidden_units):

    TRAINING_DIR=train_dir[1:-1]
    training_datagen = ImageDataGenerator(
          rescale = 1./255,
          rotation_range=100,
          width_shift_range=0.4,
          height_shift_range=0.4,
          shear_range=0.4,
          zoom_range=0.2,
          horizontal_flip=True,
          fill_mode='nearest',
          featurewise_center=False,  # set input mean to 0 over the dataset
          samplewise_center=False,  # set each sample mean to 0
          featurewise_std_normalization=False,  # divide inputs by std of the dataset
          samplewise_std_normalization=False,  # divide each input by its std
          zca_whitening=False,  # apply ZCA whitening
          vertical_flip=False)
    

    VALIDATION_DIR =validation_dir[1:-1]
    validation_datagen = ImageDataGenerator(rescale = 1./255)
    
    train_generator = training_datagen.flow_from_directory(
        TRAINING_DIR,
        target_size=(150,150),
        class_mode='categorical',
        batch_size=20
    )
    
    validation_generator = validation_datagen.flow_from_directory(
        VALIDATION_DIR,
        target_size=(150,150),
        class_mode='categorical',
      batch_size=20
    )
    invert_class_indices=invert_dict(validation_generator.class_indices)
    print(invert_class_indices)
    local_dir = tempfile.mkdtemp()
    local_filename = os.path.join(local_dir, "class_indice.json")
    with open(local_filename, 'w') as output_file:
        print(invert_class_indices, file=output_file)
         
    mlflow.log_artifact(local_filename, "class_indice.json")
    print ("class_indice.json loaded",local_filename )
    
    
    train_img,train_lables = train_generator.next()
    train_lables=train_lables.nonzero()[1]
    test_img,test_lables = validation_generator.next()
    test_lables=test_lables.nonzero()[1]
    
 
    INPUT_SHAPE = (150, 150, 3)  
    NUM_CLASSES = 6  #  number of classes
    class CNNHyperModel(HyperModel):
        def __init__(self, input_shape, num_classes):
            self.input_shape = input_shape
            self.num_classes = num_classes
    
        def build(self, hp):
            model = keras.Sequential()
            model.add(
                Conv2D(
                    filters=16,
                    kernel_size=3,
                    activation='relu',
                    input_shape=self.input_shape
                )
            )
            model.add(
                Conv2D(
                    filters=16,
                    activation='relu',
                    kernel_size=3
                )
            )
            model.add(MaxPooling2D(pool_size=2))
            model.add(
                Dropout(rate=hp.Float(
                    'dropout_1',
                    min_value=0.0,
                    max_value=0.5,
                    default=0.25,
                    step=0.05,
                ))
            )
            model.add(
                Conv2D(
                    filters=32,
                    kernel_size=3,
                    activation='relu'
                )
            )
            model.add(
                Conv2D(
                    filters=hp.Choice(
                        'num_filters',
                        values=[32, 64],
                        default=64,
                    ),
                    activation='relu',
                    kernel_size=3
                )
            )
            model.add(MaxPooling2D(pool_size=2))
            model.add(
                Dropout(rate=hp.Float(
                    'dropout_2',
                    min_value=0.0,
                    max_value=0.5,
                    default=0.25,
                    step=0.05,
                ))
            )
            model.add(Flatten())
            model.add(
                Dense(
                    units=hp.Int(
                        'units',
                        min_value=32,
                        max_value=512,
                        step=32,
                        default=128
                    ),
                    activation=hp.Choice(
                        'dense_activation',
                        values=['relu', 'tanh', 'sigmoid'],
                        default='relu'
                    )
                )
            )
            model.add(
                Dropout(
                    rate=hp.Float(
                        'dropout_3',
                        min_value=0.0,
                        max_value=0.5,
                        default=0.25,
                        step=0.05
                    )
                )
            )
            model.add(Dense(self.num_classes, activation='softmax'))
    
            model.compile(
                optimizer=keras.optimizers.Adam(
                    hp.Float(
                        'learning_rate',
                        min_value=1e-4,
                        max_value=1e-2,
                        sampling='LOG',
                        default=1e-3
                    )
                ),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
            mlflow.keras.autolog()

            return model
           
    hypermodel = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)

    MAX_TRIALS = 5
    EXECUTION_PER_TRIAL = 5
    
    
    SEED=17
    hypermodel = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)
    tuner_dir = tempfile.mkdtemp()
    print("tunerdir=%s" % tuner_dir)
    tuner = RandomSearch(
        hypermodel,
        objective='val_accuracy',
        seed=SEED,
        max_trials=MAX_TRIALS,
        executions_per_trial=EXECUTION_PER_TRIAL,
        directory=tuner_dir,
        project_name='versatile'
    )
    tuner.search_space_summary()
    N_EPOCH_SEARCH = 10

    tuner.search(train_img,train_lables, epochs=N_EPOCH_SEARCH, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=6)])
    tuner.results_summary()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(test_img,test_lables)
    print("accuracy=%s" % accuracy)
    
    mlflow.log_metric("loss", loss)
    mlflow.log_metric("accuracy", accuracy)


    mlflow.keras.log_model(best_model, "keras-model")
Ejemplo n.º 23
0
def train_data(iq, symbol, symbols, timeframe):

    df = iq_get_data(iq, symbol, symbols, timeframe)

    # df =  pd.read_csv("EURUSD.csv")
    df = Indicators(df)

    df.isnull().sum().sum()  # there are no nans
    df.fillna(method="ffill", inplace=True)
    df = df.loc[~df.index.duplicated(keep='first')]
    df['future'] = df["GOAL"].shift(-predict_period)

    df = df.dropna()
    dataset = df.fillna(method="ffill")
    dataset = dataset.dropna()

    dataset.sort_index(inplace=True)

    main_df = dataset

    main_df.fillna(method="ffill", inplace=True)
    main_df.dropna(inplace=True)

    main_df['target'] = list(map(classify, main_df['GOAL'], main_df['future']))

    main_df.dropna(inplace=True)

    main_df['target'].value_counts()

    main_df.dropna(inplace=True)

    main_df = main_df.astype('float32')
    if VALIDATION_TRAIN:
        times = sorted(main_df.index.values)
        last_5pct = sorted(main_df.index.values)[-int(0.2 * len(times))]

        validation_main_df = main_df[(main_df.index >= last_5pct)]
        main_df = main_df[(main_df.index < last_5pct)]

        train_x, train_y = preprocess_df(main_df)
        validation_x, validation_y = preprocess_df(validation_main_df)

        print(f"train data: {len(train_x)} validation: {len(validation_x)}")
        print(f"sells: {train_y.count(0)}, buys: {train_y.count(1)}")
        print(
            f"VALIDATION sells: {validation_y.count(0)}, buys : {validation_y.count(1)}"
        )

        train_y = np.asarray(train_y)
        validation_y = np.asarray(validation_y)
    else:
        train_x, train_y = preprocess_df(main_df)
        print(f"train data: {len(train_x)}")
        print(f"sells: {train_y.count(0)}, buys: {train_y.count(1)}")
        train_y = np.asarray(train_y)

    def build_model(hp):
        model = Sequential()
        model.add(
            LSTM(hp.Int('units', min_value=10, max_value=70, step=1),
                 input_shape=(train_x.shape[1:]),
                 return_sequences=True))
        model.add(Dropout(0.1))
        model.add(BatchNormalization())

        model.add(
            LSTM(units=hp.Int('units', min_value=10, max_value=70, step=1),
                 return_sequences=True))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())

        model.add(
            LSTM(units=hp.Int('units', min_value=10, max_value=70, step=1)))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())

        model.add(
            Dense(hp.Int('units', min_value=10, max_value=70, step=1),
                  activation='relu'))
        model.add(Dropout(0.2))

        model.add(Dense(2, activation='softmax'))

        # Compile model
        model.compile(optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2, 1e-3])),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        return model

    tuner = RandomSearch(build_model,
                         objective='val_accuracy',
                         max_trials=200,
                         executions_per_trial=1,
                         directory='TUN',
                         project_name='IQOTC')

    tuner.search_space_summary()

    tuner.search(train_x,
                 train_y,
                 epochs=EPOCHS,
                 validation_data=(validation_x, validation_y))

    # model = tuner.get_best_models(num_models=2)

    tuner.results_summary()
Ejemplo n.º 24
0
def find_hyperparameters(X_train, X_test, y_train, y_test):
    tuner = RandomSearch(create_model_from_hyperparameters, objective="val_accuracy", max_trials=20, directory="models", seed=42)
    tuner.search(x=X_train, y=y_train, epochs=25, validation_data=(X_test, y_test))
    tuner.results_summary()
    for item in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(item),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss="mean_absolute_error",
        metrics=['mean_absolute_error'])
    return model


tuner = RandomSearch(
    Model,
    objective='val_mean_absolute_error',
    max_trials=5,
    executions_per_trial=3,
    directory='tuned parameters',
    project_name='Air quality Index')

X_train, X_test, y_train, y_test = train_test_split(Independent_Features, dependent_Feature, test_size=0.2)

tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

print(tuner.results_summary())

print(tuner.tuner.get_best_models(num_models=2))
Ejemplo n.º 26
0
    model.add(Activation("softmax"))

    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

    return model


tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=2,  # how many variations on model?
    executions_per_trial=1,  # how many trials per variation? (same model could perform differently)
    directory=LOG_DIR)

tuner.search_space_summary()

tuner.search(x=x_train,
             y=y_train,
             epochs=2,
             batch_size=64,
             callbacks=[tensorboard],
             validation_data=(x_test, y_test))

tuner.results_summary()


with open(PICKLE, "wb") as f:
    pickle.dump(tuner, f)
    overwrite=True)

#%%[markdown]
'''
summary of the search space:
Start the search for the best hyperparameter configuration. The call to search has the same signature as model.fit().
Here's what happens in search: models are built iteratively by calling the model-building function, which populates the hyperparameter space (search space) tracked by the hp object. The tuner progressively explores the space, recording metrics for each configuration.
'''
random_search_tuner.search(X_train,
                           y_train,
                           epochs=5,
                           validation_data=(X_test, y_test)
                           #validation_split=0.2,verbose=1)
                           )
#%%
random_search_tuner.results_summary()

#%%
bayesian_opt_tuner.search(X_train,
                          y_train,
                          epochs=5,
                          validation_data=(X_test, y_test)
                          #validation_split=0.2,verbose=1)
                          )
#%%
bayesian_opt_tuner.results_summary()

#%%[markdown]
### Best models achieved with the random search and and bayesian hyperparametrization
rand_searched_models = random_search_tuner.get_best_models(num_models=-1)
bayes_optimized_models = bayesian_opt_tuner.get_best_models(num_models=-1)