예제 #1
0
    def test_random_search(self):
        spark = SparkSession \
            .builder \
            .master("local[3]") \
            .appName("Python Spark SQL basic example") \
            .getOrCreate()

        # Load training data
        df = spark.read.format("libsvm").load("./tests/sample_libsvm_data.txt").repartition(8)
        df.printSchema()

        backend = SparkBackend(spark_context=spark.sparkContext, num_workers=3)
        store = LocalStore('/tmp')


        ######## Random Search ###########
        search_space = {'lr': hp_choice([0.01, 0.001, 0.0001])}

        random_search = RandomSearch(backend, store, estimator_gen_fn, search_space, 3, 1,
                                     validation=0.25,
                                     evaluation_metric='loss',
                                     feature_columns=['features'], label_columns=['label'])
        model = random_search.fit(df)

        output_df = model.transform(df)
        output_df.select('label', 'label__output').show(n=10)

        assert True
예제 #2
0
    def test_tpe(self):
        spark = SparkSession \
            .builder \
            .master("local[3]") \
            .appName("Python Spark SQL basic example") \
            .getOrCreate()

        # Load training data
        df = spark.read.format("libsvm").load(
            "./tests/sample_libsvm_data.txt").repartition(8)
        df.printSchema()

        backend = SparkBackend(spark_context=spark.sparkContext, num_workers=3)
        store = LocalStore('/tmp')

        def estimator_gen_fn(params):
            model = tf.keras.models.Sequential()
            model.add(tf.keras.layers.Input(shape=692, name='features'))
            model.add(tf.keras.layers.Dense(100, input_dim=692))
            model.add(tf.keras.layers.Dense(1, input_dim=100))
            model.add(tf.keras.layers.Activation('sigmoid'))

            optimizer = tf.keras.optimizers.Adam(lr=params['lr'])
            loss = 'binary_crossentropy'

            keras_estimator = SparkEstimator(model=model,
                                             optimizer=optimizer,
                                             loss=loss,
                                             metrics=['acc'],
                                             batch_size=10)

            return keras_estimator

        search_space = {
            'lr': hp_choice([0.01, 0.001, 0.0001]),
            'dummy1': hp_uniform(0, 100),
            'dummy2': hp_quniform(0, 100, 1),
            'dummy3': hp_qloguniform(0, 100, 1),
        }

        hyperopt = TPESearch(backend=backend,
                             store=store,
                             estimator_gen_fn=estimator_gen_fn,
                             search_space=search_space,
                             num_models=3,
                             num_epochs=1,
                             validation=0.25,
                             evaluation_metric='loss',
                             feature_columns=['features'],
                             label_columns=['label'],
                             verbose=2)

        model = hyperopt.fit(df)
        output_df = model.transform(df)
        output_df.select('label', 'label__output').show(n=10)

        assert True
예제 #3
0
    estimator = SparkEstimator(model=model,
                               optimizer=optimizer,
                               loss='mae',
                               metrics=[exp_rmspe],
                               custom_objects=CUSTOM_OBJECTS,
                               batch_size=params['batch_size'])

    return estimator


# Define dictionary containing the parameter search space
search_space = {
    'lr': hp_loguniform(-5, -3),
    'l2': hp_loguniform(-6, -4),
    'num_layers': hp_choice([3, 4, 5, 6]),
    'batch_size': hp_quniform(16, 128, 16)
}

# Instantiate model selection object
model_selection = TPESearch(backend=backend,
                            store=store,
                            estimator_gen_fn=estimator_gen_fn,
                            search_space=search_space,
                            num_models=args.num_models,
                            num_epochs=args.epochs,
                            validation='Validation',
                            evaluation_metric='loss',
                            feature_columns=all_cols,
                            label_columns=['Sales'],
                            parallelism=args.num_workers)
예제 #4
0
    return keras_estimator


def main():
    SPARK_MASTER_URL = 'spark://...' # Change the Spark master URL.
    DATA_STORE_PATH = 'file:///...' # Change data store path. Should be accessible from all Spark workers.
    
    spark = SparkSession \
            .builder \
            # Change the Spark Master URL
            .master(SPARK_MASTER_URL) \
            .appName("Deep Postures Example") \
            .getOrCreate()

    backend = SparkBackend(spark_context=spark.sparkContext, num_workers=1)
    store = LocalStore(DATA_STORE_PATH, train_path=os.path.join(DATA_STORE_PATH, 'train'), val_path=os.path.join(DATA_STORE_PATH, 'valid'))

    search_space = {
                        'lr': hp_choice([0.001, 0.0001]),
                        'l2_reg': hp_choice([0.001, 0.0001]),
                        'win_size': hp_choice([7, 9]),
                        'amp_factor': hp_choice([2, 4])
                }

    model_selection = GridSearch(backend, store, estimator_gen_fn, search_space, 10, evaluation_metric='loss',
                        feature_columns=['id', 'time', 'non_wear', 'sleeping', 'label', 'data'], label_columns=['label'])
    model = model_selection.fit_on_prepared_data()

if __name__ == "__main__":
    main()