Exemple #1
0
def run():

    print("============================================")
    print("Testing Multi Table Pipeline")
    print("============================================")

    orders = pd.read_csv("data/Retail/orders.csv")
    order_products = pd.read_csv("data/Retail/order_products.csv")
    label_times = pd.read_csv("data/Retail/label_times.csv")

    X_train = label_times.sample(frac=0.8)
    X_test = label_times.drop(X_train.index)
    y_train = X_train["label"]
    y_test = X_test["label"]

    entity_set = make_entity_set(orders, order_products)

    multitable = MLPipeline(['dfs', 'random_forest_classifier'])

    updated_hyperparam = MLHyperparam('max_depth', 'int', [1, 10])
    updated_hyperparam.block_name = 'dfs'
    # multitable.update_tunable_hyperparams([updated_hyperparam])

    # Check that the hyperparameters are correct.
    for hyperparam in multitable.get_tunable_hyperparams():
        print(hyperparam)

    # Check that the blocks are correct.
    expected_blocks = {'dfs', 'rf_classifier'}
    blocks = set(multitable.blocks.keys())
    assert expected_blocks == blocks

    # Check that we can score properly.
    produce_params = {
        ('dfs', 'entityset'): entity_set,
        ('dfs', 'cutoff_time_in_index'): True
    }
    print("\nFitting pipeline...")
    fit_params = {
        ('dfs', 'entityset'): entity_set,
        ('dfs', 'target_entity'): "users",
        ('dfs', 'training_window'): ft.Timedelta("60 days")
    }
    multitable.fit(X_train,
                   y_train,
                   fit_params=fit_params,
                   produce_params=produce_params)
    print("\nFit pipeline.")

    print("\nScoring pipeline...")
    predicted_y_val = multitable.predict(X_test, predict_params=produce_params)
    score = f1_score(predicted_y_val, y_test, average='micro')
    print("\nf1 micro score: %f" % score)

    return score
Exemple #2
0
def run(train_size=160, test_size=40):

    print("============================================")
    print("Testing Audio Pipeline")
    print("============================================")

    # Data loading.
    classes = [
        'street_music', 'siren', 'jackhammer', 'gun_shot', 'engine_idling',
        'drilling', 'dog_bark', 'children_playing', 'car_horn',
        'air_conditioner'
    ]

    labels = []
    all_filepaths = []
    for label_class in classes:
        for filepath in glob.glob(
                os.path.join('data/UrbanSound/data', label_class, '*.wav')):
            all_filepaths.append(filepath)
            labels.append(label_class)

    filepaths, filepaths_test, y, y_test = train_test_split(
        all_filepaths, labels, train_size=train_size, test_size=test_size)

    audio_pipeline = MLPipeline([
        'audio_featurizer', 'audio_padder', 'pca', 'random_forest_classifier'
    ])

    # Check that the hyperparameters are correct.
    for hyperparam in audio_pipeline.get_tunable_hyperparams():
        print(hyperparam)

    # Check that the blocks are correct.
    expected_blocks = {
        'audio_featurizer', 'audio_padder', 'pca', 'rf_classifier'
    }
    blocks = set(audio_pipeline.blocks.keys())
    assert expected_blocks == blocks

    # Check that we can score properly.
    print("\nFitting pipeline...")
    X, sample_freqs = load_and_segment(filepaths)
    produce_params = {('audio_featurizer', 'sample_freqs'): sample_freqs}
    audio_pipeline.fit(X, y, produce_params=produce_params)
    print("\nFit pipeline.")

    print("\nScoring pipeline...")
    X_test, sample_freqs_test = load_and_segment(filepaths_test)
    predict_params = {('audio_featurizer', 'sample_freqs'): sample_freqs_test}
    predicted_y_val = audio_pipeline.predict(X_test, predict_params)
    score = f1_score(predicted_y_val, y_test, average='micro')
    print("\nf1 micro score: %f" % score)

    return score