def run(): print("============================================") print("Testing Multi Table Pipeline") print("============================================") orders = pd.read_csv("data/Retail/orders.csv") order_products = pd.read_csv("data/Retail/order_products.csv") label_times = pd.read_csv("data/Retail/label_times.csv") X_train = label_times.sample(frac=0.8) X_test = label_times.drop(X_train.index) y_train = X_train["label"] y_test = X_test["label"] entity_set = make_entity_set(orders, order_products) multitable = MLPipeline(['dfs', 'random_forest_classifier']) updated_hyperparam = MLHyperparam('max_depth', 'int', [1, 10]) updated_hyperparam.block_name = 'dfs' # multitable.update_tunable_hyperparams([updated_hyperparam]) # Check that the hyperparameters are correct. for hyperparam in multitable.get_tunable_hyperparams(): print(hyperparam) # Check that the blocks are correct. expected_blocks = {'dfs', 'rf_classifier'} blocks = set(multitable.blocks.keys()) assert expected_blocks == blocks # Check that we can score properly. produce_params = { ('dfs', 'entityset'): entity_set, ('dfs', 'cutoff_time_in_index'): True } print("\nFitting pipeline...") fit_params = { ('dfs', 'entityset'): entity_set, ('dfs', 'target_entity'): "users", ('dfs', 'training_window'): ft.Timedelta("60 days") } multitable.fit(X_train, y_train, fit_params=fit_params, produce_params=produce_params) print("\nFit pipeline.") print("\nScoring pipeline...") predicted_y_val = multitable.predict(X_test, predict_params=produce_params) score = f1_score(predicted_y_val, y_test, average='micro') print("\nf1 micro score: %f" % score) return score
def run(train_size=160, test_size=40): print("============================================") print("Testing Audio Pipeline") print("============================================") # Data loading. classes = [ 'street_music', 'siren', 'jackhammer', 'gun_shot', 'engine_idling', 'drilling', 'dog_bark', 'children_playing', 'car_horn', 'air_conditioner' ] labels = [] all_filepaths = [] for label_class in classes: for filepath in glob.glob( os.path.join('data/UrbanSound/data', label_class, '*.wav')): all_filepaths.append(filepath) labels.append(label_class) filepaths, filepaths_test, y, y_test = train_test_split( all_filepaths, labels, train_size=train_size, test_size=test_size) audio_pipeline = MLPipeline([ 'audio_featurizer', 'audio_padder', 'pca', 'random_forest_classifier' ]) # Check that the hyperparameters are correct. for hyperparam in audio_pipeline.get_tunable_hyperparams(): print(hyperparam) # Check that the blocks are correct. expected_blocks = { 'audio_featurizer', 'audio_padder', 'pca', 'rf_classifier' } blocks = set(audio_pipeline.blocks.keys()) assert expected_blocks == blocks # Check that we can score properly. print("\nFitting pipeline...") X, sample_freqs = load_and_segment(filepaths) produce_params = {('audio_featurizer', 'sample_freqs'): sample_freqs} audio_pipeline.fit(X, y, produce_params=produce_params) print("\nFit pipeline.") print("\nScoring pipeline...") X_test, sample_freqs_test = load_and_segment(filepaths_test) predict_params = {('audio_featurizer', 'sample_freqs'): sample_freqs_test} predicted_y_val = audio_pipeline.predict(X_test, predict_params) score = f1_score(predicted_y_val, y_test, average='micro') print("\nf1 micro score: %f" % score) return score