def main(args):
    tv = FLAGS.tv
    vv = FLAGS.vv
    bs = FLAGS.bs

    project_name = f'tv{tv}-vv{vv}-bs{bs}'
    print(f'Project Name: {project_name}')
    print()
    tuner = RandomSearch(
        build_hyper_conv_estimator,
        objective='val_loss',
        max_trials=20,
        executions_per_trial=3,
        directory='hyper_search',
        project_name=project_name,
    )

    batch_size = 64
    batches = 4000
    workers = 2
    verbose = 2

    tuner.search_space_summary()
    dataset = TFSeqRandomDataGenerator(batch_size, batches)
    valid_dataset = TFSeqRandomDataGenerator(batch_size, 4000, version=1)
    tuner.search(dataset,
                 validation_data=valid_dataset,
                 epochs=10,
                 workers=workers,
                 use_multiprocessing=True,
                 verbose=verbose)
Exemple #2
0
    def _fit(self, X_train, y_train, X_test, y_test, X_val, y_val):
        tuner = RandomSearch(self._build_model,
                             objective='val_accuracy',
                             max_trials=self.max_trials,
                             executions_per_trial=1,
                             directory='logs/keras-tuner/',
                             project_name='cnn')

        tuner.search_space_summary()

        tuner.search(x=X_train,
                     y=y_train,
                     epochs=self.epochs,
                     batch_size=self.batch_size,
                     verbose=0,
                     validation_data=(X_val, y_val),
                     callbacks=[EarlyStopping('val_accuracy', patience=4)])
        print('kakkanat\n\n\n\n\n\n')
        print(tuner.results_summary())
        model = tuner.get_best_models(num_models=1)[0]
        print(model.summary())

        # Evaluate Best Model #
        _, train_acc = model.evaluate(X_train, y_train, verbose=0)
        _, test_acc = model.evaluate(X_test, y_test, verbose=0)
        print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
 def search_bestCNN(self,
                    X,
                    Y,
                    testX,
                    testY,
                    epochs=50,
                    max_trails=20,
                    batch_size=64,
                    project_name='A1'):
     tuner = RandomSearch(self._build_CNN,
                          objective='val_accuracy',
                          max_trials=max_trails,
                          executions_per_trial=1,
                          directory='tunerlog',
                          project_name=project_name)
     tuner.search(x=X,
                  y=Y,
                  epochs=epochs,
                  batch_size=batch_size,
                  validation_data=(testX, testY),
                  callbacks=[
                      tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                       patience=5)
                  ],
                  verbose=2)
     tuner.search_space_summary()
     print(tuner.results_summary())
     print('best_hyperparameters')
     print(tuner.get_best_hyperparameters()[0].values)
     return tuner.get_best_models()
Exemple #4
0
def CNN_Hyper():
    training_set = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_PATH + "processed/training",
        seed=957,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )
    validation_set = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_PATH + "processed/validation",
        seed=957,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )

    test_set = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_PATH + "processed/testing",
        seed=957,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )

    training_set = training_set.prefetch(buffer_size=32)
    validation_set = validation_set.prefetch(buffer_size=32)

    hyperModel = CNNHyperModel(IMAGE_SIZE + (3, ), CLASS_COUNT, "softmax")

    MAX_TRIALS = 20
    EXECUTION_PER_TRIAL = 1
    N_EPOCH_SEARCH = 25

    tuner = RandomSearch(hyperModel,
                         objective='val_accuracy',
                         seed=957,
                         max_trials=MAX_TRIALS,
                         executions_per_trial=EXECUTION_PER_TRIAL,
                         directory='random_search',
                         project_name='Stanford-Dogs-40_1')

    tuner.search_space_summary()

    tuner.search(training_set,
                 epochs=N_EPOCH_SEARCH,
                 validation_data=validation_set)

    # Show a summary of the search
    tuner.results_summary()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(test_set)
    print("Loss: ", loss)
    print("Accuracy: ", accuracy)
    best_model.summary()
    # Save model
    best_model.save('CNN_Tuned_Best_Model')


# https://www.sicara.ai/blog/hyperparameter-tuning-keras-tuner
def main():

    dataset = makeHistoricalData(fixed_data, temporal_data, h, r, 'death',
                                 'mrmr', 'country', 'regular')

    numberOfSelectedCounties = len(dataset['county_fips'].unique())
    new_dataset = clean_data(dataset, numberOfSelectedCounties)
    X_train, y_train, X_val, y_val, X_test, y_test, y_train_date, y_test_date, y_val_date, val_naive_pred, test_naive_pred = preprocess(
        new_dataset)
    X_train, y_train, X_val, y_val, X_test, y_test, scalar = data_normalize(
        X_train, y_train, X_val, y_val, X_test, y_test)

    hypermodel = LSTMHyperModel(n=X_train.shape[2])

    tuner = RandomSearch(hypermodel,
                         objective='mse',
                         seed=1,
                         max_trials=60,
                         executions_per_trial=4,
                         directory='parameter_tuning',
                         project_name='lstm_model_tuning')

    tuner.search_space_summary()

    print()
    input("Press Enter to continue...")
    print()

    N_EPOCH_SEARCH = 50
    tuner.search(X_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.2)

    print()
    input("Press Enter to show the summary of search...")
    print()

    # Show a summary of the search
    tuner.results_summary()

    print()
    input("Press Enter to retrive the best model...")
    print()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    print()
    input("Press Enter to show best model summary...")
    print()

    best_model.summary()

    print()
    input("Press Enter to run the best model on test dataset...")
    print()

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(X_test, y_test)
    print("loss = " + str(loss) + ", acc = " + str(accuracy))
Exemple #6
0
def run_fn(fn_args):

  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=100)
  eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=100)

  log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='batch')

  if True:
    print("Use normal Keras model")
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
      model = build_keras_model(None)
    model.fit(
        train_dataset,
        epochs=1,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback])
  else:
    print("Use normal Keras Tuner")
    tuner = RandomSearch(
        build_keras_model,
        objective='val_binary_accuracy',
        max_trials=5,
        executions_per_trial=3,
        directory=fn_args.serving_model_dir,
        project_name='tuner')
    tuner.search(
        train_dataset,
        epochs=1,
        steps_per_epoch=fn_args.train_steps, # or few steps to get best HP and then well fit
        validation_steps=fn_args.eval_steps,
        validation_data=eval_dataset,
        callbacks=[tensorboard_callback, tf.keras.callbacks.EarlyStopping()])
    tuner.search_space_summary()
    tuner.results_summary()
    best_hparams = tuner.oracle.get_best_trials(1)[0].hyperparameters.get_config()
    model = tuner.get_best_models(1)[0]

  signatures = {
      'serving_default': get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function(
          tf.TensorSpec(shape=[None],
                        dtype=tf.string,
                        name='input_example_tensor')),
  }

  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Exemple #7
0
def keras_tuner(x_train, y_train, x_test, y_test):
    from kerastuner.tuners import RandomSearch
    tuner = RandomSearch(build_model,
                         objective='val_accuracy',
                         max_trials=5,
                         executions_per_trial=3,
                         directory='./test',
                         project_name='helloworld')

    tuner.search_space_summary()

    tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

    print(tuner.results_summary())
def keras_hp_search(
    model_dir,
    epochs = 3,
    dataset_source: DATASET_SOURCE_TYPE = DATASET_SOURCE_TYPE.gcs,
    dataset_size: DATASET_SIZE_TYPE = DATASET_SIZE_TYPE.tiny,
    embeddings_mode: EMBEDDINGS_MODE_TYPE = EMBEDDINGS_MODE_TYPE.hashbucket,
    distribution_strategy: DistributionStrategyType = None):

    def build_model(hp):
        feature_columns = create_feature_columns(embeddings_mode)
        feature_layer = tf.keras.layers.DenseFeatures(feature_columns, name="feature_layer")
        Dense = tf.keras.layers.Dense
        kernel_regularizer=tf.keras.regularizers.l2(0.001)
        model = tf.keras.Sequential()
        model.add(feature_layer)
        model.add(Dense(hp.Choice('layer1', values=[50, 100, 200]), activation=tf.nn.relu, kernel_regularizer=kernel_regularizer)),
        model.add(Dense(hp.Choice('layer2', values=[50, 100, 200]), activation=tf.nn.relu, kernel_regularizer=kernel_regularizer)),
        model.add(Dense(1, activation=tf.nn.sigmoid, kernel_regularizer=kernel_regularizer))

        logging.info('compiling sequential keras model')
        # Compile Keras model
        model.compile(
          optimizer=tf.optimizers.SGD(learning_rate=0.05),
          loss=tf.keras.losses.BinaryCrossentropy(),
          metrics=['accuracy'])
        return model

    training_ds = criteo_nbdev.data_reader.get_dataset(dataset_source, dataset_size, DATASET_TYPE.training, embeddings_mode).repeat(epochs)
    eval_ds = criteo_nbdev.data_reader.get_dataset(dataset_source, dataset_size, DATASET_TYPE.validation, embeddings_mode).repeat(epochs)

    tuner = RandomSearch(
        build_model,
        objective='val_loss',
        max_trials=30,
        executions_per_trial=1,
        directory=model_dir)

    tuner.search_space_summary()
    tuner.search(training_ds,
                 validation_data=eval_ds,
                 epochs=3,
                 verbose=2)
Exemple #9
0
def KerasTuner(XTrain, YTrain, XValidation, YValidation):
    tuner = RandomSearch(buildModel,
                         objective='mse',
                         max_trials=30,
                         executions_per_trial=10,
                         directory='KerasTuner',
                         project_name=f'KerasTuner-{constants.NAME}')

    tuner.search_space_summary()

    tuner.search(XTrain,
                 YTrain,
                 epochs=5,
                 validation_data=(XValidation, YValidation))

    models = tuner.get_best_models(num_models=1)

    tuner.results_summary()

    return models
Exemple #10
0
def train_models(x_train, x_test, y_train, y_test, model_name, epochs,
                 batch_size, params):
    # Get the class object from the models file and create instance
    model = getattr(models, model_name)(**params)
    tuner = RandomSearch(
        model,
        objective=kerastuner.Objective("val_f1_m", direction="max"),
        max_trials=5,
        executions_per_trial=1,
        directory='random_search',
        project_name='sentiment_analysis_' + str(model_name),
        distribution_strategy=tf.distribute.MirroredStrategy())
    tuner.search_space_summary()
    tuner.search(x_train,
                 to_categorical(y_train),
                 epochs=epochs,
                 validation_data=(x_test, to_categorical(y_test)))
    return tuner.get_best_models(
        num_models=1)[0], tuner.oracle.get_best_trials(
            num_trials=1)[0].hyperparameters
Exemple #11
0
    model.add(Activation("softmax"))

    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

    return model


tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=2,  # how many variations on model?
    executions_per_trial=1,  # how many trials per variation? (same model could perform differently)
    directory=LOG_DIR)

tuner.search_space_summary()

tuner.search(x=x_train,
             y=y_train,
             epochs=2,
             batch_size=64,
             callbacks=[tensorboard],
             validation_data=(x_test, y_test))

tuner.results_summary()


with open(PICKLE, "wb") as f:
    pickle.dump(tuner, f)
def train_keras(train_dir, validation_dir, hidden_units):

    TRAINING_DIR=train_dir[1:-1]
    training_datagen = ImageDataGenerator(
          rescale = 1./255,
          rotation_range=100,
          width_shift_range=0.4,
          height_shift_range=0.4,
          shear_range=0.4,
          zoom_range=0.2,
          horizontal_flip=True,
          fill_mode='nearest',
          featurewise_center=False,  # set input mean to 0 over the dataset
          samplewise_center=False,  # set each sample mean to 0
          featurewise_std_normalization=False,  # divide inputs by std of the dataset
          samplewise_std_normalization=False,  # divide each input by its std
          zca_whitening=False,  # apply ZCA whitening
          vertical_flip=False)
    

    VALIDATION_DIR =validation_dir[1:-1]
    validation_datagen = ImageDataGenerator(rescale = 1./255)
    
    train_generator = training_datagen.flow_from_directory(
        TRAINING_DIR,
        target_size=(150,150),
        class_mode='categorical',
        batch_size=20
    )
    
    validation_generator = validation_datagen.flow_from_directory(
        VALIDATION_DIR,
        target_size=(150,150),
        class_mode='categorical',
      batch_size=20
    )
    invert_class_indices=invert_dict(validation_generator.class_indices)
    print(invert_class_indices)
    local_dir = tempfile.mkdtemp()
    local_filename = os.path.join(local_dir, "class_indice.json")
    with open(local_filename, 'w') as output_file:
        print(invert_class_indices, file=output_file)
         
    mlflow.log_artifact(local_filename, "class_indice.json")
    print ("class_indice.json loaded",local_filename )
    
    
    train_img,train_lables = train_generator.next()
    train_lables=train_lables.nonzero()[1]
    test_img,test_lables = validation_generator.next()
    test_lables=test_lables.nonzero()[1]
    
 
    INPUT_SHAPE = (150, 150, 3)  
    NUM_CLASSES = 6  #  number of classes
    class CNNHyperModel(HyperModel):
        def __init__(self, input_shape, num_classes):
            self.input_shape = input_shape
            self.num_classes = num_classes
    
        def build(self, hp):
            model = keras.Sequential()
            model.add(
                Conv2D(
                    filters=16,
                    kernel_size=3,
                    activation='relu',
                    input_shape=self.input_shape
                )
            )
            model.add(
                Conv2D(
                    filters=16,
                    activation='relu',
                    kernel_size=3
                )
            )
            model.add(MaxPooling2D(pool_size=2))
            model.add(
                Dropout(rate=hp.Float(
                    'dropout_1',
                    min_value=0.0,
                    max_value=0.5,
                    default=0.25,
                    step=0.05,
                ))
            )
            model.add(
                Conv2D(
                    filters=32,
                    kernel_size=3,
                    activation='relu'
                )
            )
            model.add(
                Conv2D(
                    filters=hp.Choice(
                        'num_filters',
                        values=[32, 64],
                        default=64,
                    ),
                    activation='relu',
                    kernel_size=3
                )
            )
            model.add(MaxPooling2D(pool_size=2))
            model.add(
                Dropout(rate=hp.Float(
                    'dropout_2',
                    min_value=0.0,
                    max_value=0.5,
                    default=0.25,
                    step=0.05,
                ))
            )
            model.add(Flatten())
            model.add(
                Dense(
                    units=hp.Int(
                        'units',
                        min_value=32,
                        max_value=512,
                        step=32,
                        default=128
                    ),
                    activation=hp.Choice(
                        'dense_activation',
                        values=['relu', 'tanh', 'sigmoid'],
                        default='relu'
                    )
                )
            )
            model.add(
                Dropout(
                    rate=hp.Float(
                        'dropout_3',
                        min_value=0.0,
                        max_value=0.5,
                        default=0.25,
                        step=0.05
                    )
                )
            )
            model.add(Dense(self.num_classes, activation='softmax'))
    
            model.compile(
                optimizer=keras.optimizers.Adam(
                    hp.Float(
                        'learning_rate',
                        min_value=1e-4,
                        max_value=1e-2,
                        sampling='LOG',
                        default=1e-3
                    )
                ),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
            mlflow.keras.autolog()

            return model
           
    hypermodel = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)

    MAX_TRIALS = 5
    EXECUTION_PER_TRIAL = 5
    
    
    SEED=17
    hypermodel = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)
    tuner_dir = tempfile.mkdtemp()
    print("tunerdir=%s" % tuner_dir)
    tuner = RandomSearch(
        hypermodel,
        objective='val_accuracy',
        seed=SEED,
        max_trials=MAX_TRIALS,
        executions_per_trial=EXECUTION_PER_TRIAL,
        directory=tuner_dir,
        project_name='versatile'
    )
    tuner.search_space_summary()
    N_EPOCH_SEARCH = 10

    tuner.search(train_img,train_lables, epochs=N_EPOCH_SEARCH, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=6)])
    tuner.results_summary()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(test_img,test_lables)
    print("accuracy=%s" % accuracy)
    
    mlflow.log_metric("loss", loss)
    mlflow.log_metric("accuracy", accuracy)


    mlflow.keras.log_model(best_model, "keras-model")
    def buttonClicked1(self):

        train_dir = 'train'
        val_dir = 'val'
        test_dir = 'test'
        img_width, img_height = 150, 150
        input_shape = (img_width, img_height, 3)

        epochs = self.InputEpochs.value()
        Nclasses = self.InputClass.value()
        batch_size = self.InputBatch.value()
        nb_train_samples = self.InputTrain.value()
        nb_validation_samples = self.InputValidation.value()
        nb_test_samples = self.InputTest.value()

        l=0

        def build_model(hp):  
            model = Sequential()

            num_hidden_layers = hp.Int('num_hidden_layers', 1, 3, default=1)
            num_conv_layers = hp.Int('num_conv_layers', 2, 6, default=2)

            model.add(Conv2D(32, (3, 3), input_shape=input_shape))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for i in range(num_conv_layers):
                filters = hp.Int('filters'+str(i), 32, 64, step=16)
                model.add(Conv2D(filters,(3, 3)))
                model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Conv2D(128, (3, 3)))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Flatten())
    
            for j in range(num_hidden_layers):
                model.add(Dense(units=hp.Int('units_hiddenNeurons_'+str(j),
                                             min_value=128,
                                             max_value=1024,
                                             step=64),
                                activation=hp.Choice('activation'+str(j),values=['relu','tanh','elu','selu'])))

            model.add(Dropout(0.5))
            model.add(Dense(Nclasses))
            model.add(Activation('softmax'))
            model.compile(
                loss='categorical_crossentropy',
                optimizer=hp.Choice('optimizer', values=['adam','rmsprop','SGD'],default='adam'),
                metrics=['accuracy'])
            return model

        tuner = RandomSearch(
            build_model,
            objective='val_accuracy',
            max_trials=15,
            directory='test_directory')

        tuner.search_space_summary()

        datagen = ImageDataGenerator(rescale=1. / 255)

        train_generator = datagen.flow_from_directory(
            train_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='categorical')

        val_generator = datagen.flow_from_directory(
            val_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='categorical')

        test_generator = datagen.flow_from_directory(
            test_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='categorical')

        tuner.search(
            train_generator,
            steps_per_epoch=nb_train_samples // batch_size,
            epochs=epochs,
            validation_data=val_generator,
            validation_steps=nb_validation_samples // batch_size)

        tuner.results_summary()

        models = tuner.get_best_models(num_models=3)

        for model in models:
            model.summary()
            l=l+1
            scores = model.evaluate_generator(test_generator, nb_test_samples // batch_size)
            model.save('bestmodel_'+str(l)+'.h5')
            print("Аккуратность на тестовых данных: %.2f%%" % (scores[1]*100))
Exemple #14
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    parser = argparse.ArgumentParser(description='Keras Tuner HP search')
    parser.add_argument('--epochs', type=int, default=1)
    parser.add_argument(
        '--steps-per-epoch', type=int,
        default=-1)  # if set to -1, don't override the normal calcs for this
    parser.add_argument('--tuner-proj', required=True)
    parser.add_argument('--bucket-name', required=True)
    parser.add_argument('--tuner-dir', required=True)
    parser.add_argument('--tuner-num', required=True)
    parser.add_argument('--respath', required=True)
    parser.add_argument('--executions-per-trial', type=int, default=2)
    parser.add_argument('--max-trials', type=int, default=20)
    parser.add_argument('--num-best-hps', type=int, default=2)
    parser.add_argument('--data-dir',
                        default='gs://aju-dev-demos-codelabs/bikes_weather/')

    args = parser.parse_args()
    logging.info('Tensorflow version %s', tf.__version__)

    TRAIN_DATA_PATTERN = args.data_dir + "train*"
    EVAL_DATA_PATTERN = args.data_dir + "test*"

    train_batch_size = TRAIN_BATCH_SIZE
    eval_batch_size = 1000
    if args.steps_per_epoch == -1:  # calc based on dataset size
        steps_per_epoch = NUM_EXAMPLES // train_batch_size
    else:
        steps_per_epoch = args.steps_per_epoch
    logging.info('using %s steps per epoch', steps_per_epoch)

    logging.info('using train batch size %s', train_batch_size)
    train_dataset = bwmodel.read_dataset(TRAIN_DATA_PATTERN, train_batch_size)
    eval_dataset = bwmodel.read_dataset(
        EVAL_DATA_PATTERN, eval_batch_size, tf.estimator.ModeKeys.EVAL,
        eval_batch_size * 100 * STRATEGY.num_replicas_in_sync)

    logging.info('executions per trial: %s', args.executions_per_trial)

    # TODO: parameterize
    retries = 0
    num_retries = 5
    sleep_time = 5
    while retries < num_retries:
        try:
            tuner = RandomSearch(
                # tuner = Hyperband(
                create_model,
                objective='val_mae',
                # max_epochs=10,
                # hyperband_iterations=2,
                max_trials=args.max_trials,
                distribution_strategy=STRATEGY,
                executions_per_trial=args.executions_per_trial,
                directory=args.tuner_dir,
                project_name=args.tuner_proj)
            break
        except Exception as e:
            logging.warning(e)
            logging.info('sleeping %s seconds...', sleep_time)
            time.sleep(sleep_time)
            retries += 1
            sleep_time *= 2

    logging.info("search space summary:")
    logging.info(tuner.search_space_summary())

    logging.info("hp tuning model....")
    tuner.search(
        train_dataset,
        validation_data=eval_dataset,
        validation_steps=eval_batch_size,
        epochs=args.epochs,
        steps_per_epoch=steps_per_epoch,
    )
    best_hps = tuner.get_best_hyperparameters(args.num_best_hps)
    best_hps_list = [best_hps[i].values for i in range(args.num_best_hps)]
    logging.info('best_hps_list: %s', best_hps_list)
    best_hp_values = json.dumps(best_hps_list)
    logging.info('best hyperparameters: %s', best_hp_values)

    storage_client = storage.Client()
    logging.info('writing best results to %s', args.respath)
    bucket = storage_client.get_bucket(args.bucket_name)
    logging.info('using bucket %s: %s, path %s', args.bucket_name, bucket,
                 args.respath)
    blob = bucket.blob(args.respath)
    blob.upload_from_string(best_hp_values)
Exemple #15
0
def tune(cfg):
    # =========
    # Configure
    # =========

    cfg = yaml.full_load(open(cfg))
    # Go deep
    algName = [nm for nm in cfg][0]
    cfg = cfg[algName]

    # ======
    # Logger
    # ======

    logger = get_logger('Tune', 'INFO')

    # =======
    # Dataset
    # =======

    lmdb_dir = cfg['lmdb_dir']
    length = 4000
    train = 2000
    split = length - train

    s = np.arange(0, length)
    np.random.shuffle(s)

    # *** hardcoded shapes *** #
    y = list(
        islice(decaymode_generator(lmdb_dir, "Label", (), np.long), length))
    X_1 = list(
        islice(decaymode_generator(lmdb_dir, "ChargedPFO", (3, 6), np.float32),
               length))
    X_2 = list(
        islice(
            decaymode_generator(lmdb_dir, "NeutralPFO", (8, 21), np.float32),
            length))
    X_3 = list(
        islice(decaymode_generator(lmdb_dir, "ShotPFO", (6, 6), np.float32),
               length))
    X_4 = list(
        islice(decaymode_generator(lmdb_dir, "ConvTrack", (4, 6), np.float32),
               length))

    y = np.asarray(y)[s]
    X_1, X_2, X_3, X_4 = np.asarray(X_1)[s], np.asarray(X_2)[s], np.asarray(
        X_3)[s], np.asarray(X_4)[s]

    y_train = y[:-split]
    X_train_1, X_train_2, X_train_3, X_train_4 = X_1[:
                                                     -split], X_2[:
                                                                  -split], X_3[:
                                                                               -split], X_4[:
                                                                                            -split]

    y_valid = y[-split:]
    X_valid_1, X_valid_2, X_valid_3, X_valid_4 = X_1[-split:], X_2[
        -split:], X_3[-split:], X_4[-split:]

    # =====
    # Model
    # =====

    # build algs architecture, then print to console
    model_ftn = partial(getattr(ModelModule, cfg['model']), cfg['arch'])
    model = model_ftn()
    logger.info(model.summary())

    hp = HyperParameters()

    hp.Fixed("n_layers_tdd_default", 3)
    hp.Fixed("n_layers_fc_default", 3)

    tuner = RandomSearch(
        getattr(ModelModule, cfg['tune_model']),
        hyperparameters=hp,
        tune_new_entries=True,
        objective='val_loss',
        max_trials=20,
        executions_per_trial=2,
        directory=os.path.join(cfg['save_dir'], cfg['tune']),
        project_name=cfg['tune'],
        distribution_strategy=tf.distribute.MirroredStrategy(),
    )

    logger.info('Search space summary: ')
    tuner.search_space_summary()

    logger.info('Now searching ... ')
    tuner.search([X_train_1, X_train_2, X_train_3, X_train_4],
                 y_train,
                 steps_per_epoch=int(train / 200),
                 epochs=20,
                 validation_steps=int(split / 200),
                 validation_data=([X_valid_1, X_valid_2, X_valid_3,
                                   X_valid_4], y_valid),
                 workers=10,
                 verbose=0)

    logger.info('Done! ')
    models = tuner.get_best_models(num_models=8)
    tuner.results_summary()

    logger.info('Saving best models ... ')
    for i, model in enumerate(models):
        arch = model.to_json()
        with open(
                os.path.join(cfg['save_dir'], cfg['tune'],
                             f'architecture-{i}.json'), 'w') as arch_file:
            arch_file.write(arch)
        model.save_weights(
            os.path.join(cfg['save_dir'], cfg['tune'], f'weights-{i}.h5'), 'w')
    logger.info('Done! ')
                                                    random_state=20)
# hiperparametros del finetuning
filename = "Fine_tunning_model"
batch_size = 60
epochs = 150
patience = 25
min_delta = 500
input_shape = (X_train.shape[1],)
# modelo parametrizado
hypermodel = HyperModel(input_shape)
# tunner de búsqueda del modelo
tuner_rs = RandomSearch(hypermodel, objective='mse', seed=20,
                        max_trials=150,
                        executions_per_trial=3,
                        directory='fine_tuning/')
tuner_rs.search_space_summary()
keras.callbacks.Callback()
# stop conditions
stop_condition = keras.callbacks.EarlyStopping(monitor='val_loss',
                                               mode='min',
                                               patience=patience,
                                               verbose=1,
                                               min_delta=min_delta,
                                               restore_best_weights=True)

learning_rate_schedule = ReduceLROnPlateau(monitor="val_loss",
                                           factor=0.5,
                                           patience=25,
                                           verbose=1,
                                           mode="auto",
                                           cooldown=0,
Exemple #17
0
def train_data(iq, symbol, symbols, timeframe):

    df = iq_get_data(iq, symbol, symbols, timeframe)

    # df =  pd.read_csv("EURUSD.csv")
    df = Indicators(df)

    df.isnull().sum().sum()  # there are no nans
    df.fillna(method="ffill", inplace=True)
    df = df.loc[~df.index.duplicated(keep='first')]
    df['future'] = df["GOAL"].shift(-predict_period)

    df = df.dropna()
    dataset = df.fillna(method="ffill")
    dataset = dataset.dropna()

    dataset.sort_index(inplace=True)

    main_df = dataset

    main_df.fillna(method="ffill", inplace=True)
    main_df.dropna(inplace=True)

    main_df['target'] = list(map(classify, main_df['GOAL'], main_df['future']))

    main_df.dropna(inplace=True)

    main_df['target'].value_counts()

    main_df.dropna(inplace=True)

    main_df = main_df.astype('float32')
    if VALIDATION_TRAIN:
        times = sorted(main_df.index.values)
        last_5pct = sorted(main_df.index.values)[-int(0.2 * len(times))]

        validation_main_df = main_df[(main_df.index >= last_5pct)]
        main_df = main_df[(main_df.index < last_5pct)]

        train_x, train_y = preprocess_df(main_df)
        validation_x, validation_y = preprocess_df(validation_main_df)

        print(f"train data: {len(train_x)} validation: {len(validation_x)}")
        print(f"sells: {train_y.count(0)}, buys: {train_y.count(1)}")
        print(
            f"VALIDATION sells: {validation_y.count(0)}, buys : {validation_y.count(1)}"
        )

        train_y = np.asarray(train_y)
        validation_y = np.asarray(validation_y)
    else:
        train_x, train_y = preprocess_df(main_df)
        print(f"train data: {len(train_x)}")
        print(f"sells: {train_y.count(0)}, buys: {train_y.count(1)}")
        train_y = np.asarray(train_y)

    def build_model(hp):
        model = Sequential()
        model.add(
            LSTM(hp.Int('units', min_value=10, max_value=70, step=1),
                 input_shape=(train_x.shape[1:]),
                 return_sequences=True))
        model.add(Dropout(0.1))
        model.add(BatchNormalization())

        model.add(
            LSTM(units=hp.Int('units', min_value=10, max_value=70, step=1),
                 return_sequences=True))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())

        model.add(
            LSTM(units=hp.Int('units', min_value=10, max_value=70, step=1)))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())

        model.add(
            Dense(hp.Int('units', min_value=10, max_value=70, step=1),
                  activation='relu'))
        model.add(Dropout(0.2))

        model.add(Dense(2, activation='softmax'))

        # Compile model
        model.compile(optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2, 1e-3])),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        return model

    tuner = RandomSearch(build_model,
                         objective='val_accuracy',
                         max_trials=200,
                         executions_per_trial=1,
                         directory='TUN',
                         project_name='IQOTC')

    tuner.search_space_summary()

    tuner.search(train_x,
                 train_y,
                 epochs=EPOCHS,
                 validation_data=(validation_x, validation_y))

    # model = tuner.get_best_models(num_models=2)

    tuner.results_summary()
def run_fn(fn_args):
    """Build the estimator using the high level API.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """

    train_batch_size = 100
    eval_batch_size = 100

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
    # numeric_columns =  [tf.feature_column.numeric_column('packed_numeric')]
    numeric_columns = [
        tf.feature_column.numeric_column(key) for key in NUMERIC_FEATURE_KEYS
    ]
    categorical_columns = [
        tf.feature_column.indicator_column(  # pylint: disable=g-complex-comprehension
            tf.feature_column.categorical_column_with_hash_bucket(
                key, hash_bucket_size=CATEGORICAL_FEATURE_BUCKETS[key]))
        for key in CATEGORICAL_FEATURE_KEYS
    ]

    train_data = input_fn(  # pylint: disable=g-long-lambda
        fn_args.train_files,
        tf_transform_output,
        batch_size=train_batch_size)

    eval_data = input_fn(  # pylint: disable=g-long-lambda
        fn_args.eval_files,
        tf_transform_output,
        batch_size=eval_batch_size)

    feature_columns = numeric_columns + categorical_columns

    model = KerasModel(feature_columns)

    tuner = RandomSearch(
        model,
        objective='val_binary_accuracy',
        max_trials=10,
        # Separate tunner files with model files, so that pusher can work properly when version <= 0.21.4.
        directory=os.path.dirname(fn_args.serving_model_dir),
        project_name='keras_tuner')

    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                          update_freq='batch')
    # When passing an infinitely repeating dataset, you must specify the `steps_per_epoch` argument.
    tuner.search(train_data,
                 epochs=10,
                 steps_per_epoch=20,
                 validation_steps=fn_args.eval_steps,
                 validation_data=eval_data,
                 callbacks=[tensorboard_callback])
    tuner.search_space_summary()
    best_model = tuner.get_best_models(1)[0]
    signatures = {
        'serving_default':
        get_serving_receiver_fn(best_model,
                                tf_transform_output).get_concrete_function(
                                    tf.TensorSpec(shape=[None],
                                                  dtype=tf.string,
                                                  name='examples'))
    }
    # More about signatures:
    # https://www.tensorflow.org/api_docs/python/tf/saved_model/save?hl=en
    best_model.save(fn_args.serving_model_dir,
                    save_format='tf',
                    signatures=signatures)
Exemple #19
0
class AutoEncoder():
    def __init__(self, df_source_info, df_fluxes, df_wavelengths):
        X = self._prepare_data(df_source_info, df_fluxes, df_wavelengths)
        objids = self.df_quasars['objid'].values
        print(f'objids = {objids}')

        X_train, X_test = train_test_split(X, 0.2)
        self.objids_train, self.objids_test = train_test_split(objids, 0.2)
        
        self.scaler = StandardScaler()
        X_train = self.scaler.fit_transform(X_train)
        X_test = self.scaler.transform(X_test)

        self.X_train = np.expand_dims(X_train, axis=2)
        self.X_test = np.expand_dims(X_test, axis=2)
        
        print(f'self.X_train = {self.X_train}')
        
        self.optimizer = Nadam(lr=0.001)

    
    def _prepare_data(self, df_source_info, df_fluxes, df_wavelengths):
        if "b'" in str(df_source_info['class'][0]):
            df_source_info = remove_bytes_from_class(df_source_info)
    
        self.df_quasars = df_source_info.loc[df_source_info['class'] == 'QSO']
        quasar_objids = self.df_quasars['objid'].to_numpy()
        quasar_fluxes = df_fluxes.loc[df_fluxes['objid'].isin(quasar_objids)]
        
        X = np.delete(quasar_fluxes.values, 0, axis=1)
        X = X[:, 0::8]
        print(f'X.shape = {X.shape}')

        X = X[:, np.mod(np.arange(X[0].size),25)!=0]

        print(f'X.shape {X.shape}')
        wavelengths = df_wavelengths.to_numpy()

        wavelengths = wavelengths[::8]
        self.wavelengths = wavelengths[0:448]
        # plot_spectrum(X[0], wavelengths)
        return X
    
    def build_model(self, hp):

        hyperparameters = {
            'layer_1_filters': hp.Choice('layer_1_filters', values=[16, 32, 64, 128, 256], default=64),
            'layer_1_kernel_size': hp.Choice('layer_1_kernel_size', values=[3, 5, 7, 9, 11]),
            'layer_2_filters': hp.Choice('layer_2_filters', values=[8, 16, 32, 64, 128], default=32),
            'layer_2_kernel_size': hp.Choice('layer_2_kernel_size', values=[3, 5, 7, 9]),
            'layer_3_filters': hp.Choice('layer_3_filters', values=[4, 8, 16, 32], default=32),
            'layer_3_kernel_size': hp.Choice('layer_3_kernel_size', values=[3, 5, 7]),
            'layer_4_filters': hp.Choice('layer_4_filters', values=[4, 8, 12, 16], default=16),
            'layer_4_kernel_size': hp.Choice('layer_4_kernel_size', values=[3, 5]),
            'layer_5_filters': hp.Choice('layer_5_filters', values=[2, 3, 4, 8], default=8),
            'layer_5_kernel_size': hp.Choice('layer_5_kernel_size', values=[3]),
            'optimizer': hp.Choice('optimizer', values=['adam', 'nadam', 'rmsprop']),
            'last_activation': hp.Choice('last_activation', ['tanh'])
        }
        
        # ================================================================================== #
        # ==================================== ENCODER ===================================== #
        # ================================================================================== #
        
        input_layer = Input(shape=(self.X_train.shape[1], 1))

        # encoder
        x = Conv1D(filters=hyperparameters['layer_1_filters'],
                   kernel_size=hyperparameters['layer_1_kernel_size'],
                   activation='relu', 
                   padding='same')(input_layer)

        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_2_filters'],
                    kernel_size=hyperparameters['layer_2_kernel_size'],
                    activation='relu',
                    padding='same')(x)
        
        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_3_filters'],
                    kernel_size=hyperparameters['layer_3_kernel_size'],
                    activation='relu',
                    padding='same')(x)

        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_4_filters'],
                    kernel_size=hyperparameters['layer_4_kernel_size'],
                    activation='relu',
                    padding='same')(x)

        x = MaxPooling1D(2)(x)
        x = Conv1D(filters=hyperparameters['layer_5_filters'],
                    kernel_size=hyperparameters['layer_5_kernel_size'],
                    activation='relu',
                    padding='same')(x)

        encoded = MaxPooling1D(2, padding="same")(x)

        # ================================================================================== #
        # ==================================== DECODER ===================================== #
        # ================================================================================== #

        x = Conv1D(filters=hyperparameters['layer_5_filters'],
                   kernel_size=hyperparameters['layer_5_kernel_size'],
                   activation='relu',
                   padding='same')(encoded)
        
        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_4_filters'],
                   kernel_size=hyperparameters['layer_4_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_3_filters'],
                   kernel_size=hyperparameters['layer_3_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_2_filters'],
                   kernel_size=hyperparameters['layer_2_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)

        x = Conv1D(filters=hyperparameters['layer_1_filters'],
                   kernel_size=hyperparameters['layer_1_kernel_size'],
                   activation='relu',
                   padding='same')(x)

        x = UpSampling1D(2)(x)
        decoded = Conv1D(1, 1, activation=hyperparameters['last_activation'], padding='same')(x)
        
        self.autoencoder = Model(input_layer, decoded)
        self.autoencoder.summary()
        self.autoencoder.compile(loss='mse', optimizer=hyperparameters['optimizer'])

        return self.autoencoder
    
    def train_model(self, epochs, batch_size=32):
        self.tuner = RandomSearch(self.build_model,
                                  objective='val_loss',
                                  max_trials=50,
                                  executions_per_trial=1,
                                  directory='logs/keras-tuner/',
                                  project_name='autoencoder')

        self.tuner.search_space_summary()

        self.tuner.search(x=self.X_train,
                          y=self.X_train,
                          epochs=24,
                          batch_size=32,
                          validation_data=(self.X_test, self.X_test),
                          callbacks=[EarlyStopping('val_loss', patience=3)])

        self.tuner.results_summary()

    def evaluate_model(self):
        best_model = self.tuner.get_best_models(1)[0]
        best_model.save('best_autoencoder_model')
        best_hyperparameters = self.tuner.get_best_hyperparameters(1)[0]

        print(f'best_model = {best_model}')
        print(f'best_hyperparameters = {self.tuner.results_summary()[0]}')
        nth_qso = 24

        X_test = np.squeeze(self.X_test, axis=2)

        preds = best_model.predict(self.X_test)
        preds = self.scaler.inverse_transform(np.squeeze(preds, axis=2))
        original = self.scaler.inverse_transform(X_test)

        qso_ra = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['ra'].values[0]
        qso_dec = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['dec'].values[0]
        qso_plate = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['plate'].values[0]
        qso_z = self.df_quasars.loc[self.df_quasars['objid'] == self.objids_test[nth_qso]]['z'].values[0]

        plotify = Plotify(theme='ugly') 

        _, axs = plotify.get_figax(nrows=2, figsize=(8, 8))
        axs[0].plot(self.wavelengths, original[nth_qso], color=plotify.c_orange)
        axs[1].plot(self.wavelengths, preds[nth_qso], color=plotify.c_orange)
        axs[0].set_title(f'ra = {qso_ra}, dec = {qso_dec}, z = {qso_z}, plate = {qso_plate}', fontsize=14)
        axs[1].set_title(f'Autoencoder recreation')
        axs[0].set_ylabel(r'$F_{\lambda[10^{-17} erg \: cm^{-2}s^{-1} Å^{-1}]}$', fontsize=14)
        axs[1].set_ylabel(r'$F_{\lambda[10^{-17} erg \: cm^{-2}s^{-1} Å^{-1}]}$', fontsize=14)
        axs[1].set_xlabel('Wavelength (Å)')

        plt.subplots_adjust(hspace=0.4)
        # plt.savefig('plots/autoencoder_gaussian', facecolor=plotify.c_background, dpi=180)
        plt.show()

        return preds
Exemple #20
0
    2, cls_target, num_classes, batch_size)

# list of "class" names used for confusion matrices and validity testing. Not always classes, also subgroups or minerals
class_names = [i for i in range(num_classes)]
class_weights = class_weight.compute_class_weight('balanced', class_names,
                                                  train_labels)

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=150,
    executions_per_trial=3,
    directory='/home/ben/Dropbox/uni/3_semester/ml/libs-pewpew/results',
    project_name='tuned_mlp')

tuner.search_space_summary(extended=True)
print('')

tuner.reload()
# tuner.search(
#     train_data,
#     steps_per_epoch=epoch_steps,
#     epochs=10,
#     class_weight=class_weights,
#     verbose=0,
#     validation_data=test_data
# )

tuner.results_summary()

print(tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values)
    return model


SEED = 10
direc = 'logs'

from kerastuner.tuners import Hyperband

turner = RandomSearch(build_model,
                      objective="val_accuracy",
                      max_trials=3,
                      seed=SEED,
                      directory=direc,
                      executions_per_trial=3)

print(turner.search_space_summary())

turner.search(x_train,
              y_train,
              epochs=50,
              batch_size=24,
              validation_data=(x_test, y_test))

print(turner.results_summary())

model = turner.get_best_models()[0]
print(model.summary())
model.save("HP_model.h5")

print(turner.get_best_hyperparameters()[0].values)
hps = turner.oracle.get_best_trials(num_trials=1)[0].hyperparameters