def test_predict_start(self): # Setup variables primitives = [ 'sklearn.preprocessing.StandardScaler', 'sklearn.linear_model.LogisticRegression' ] pipeline = MLPipeline(primitives) pipeline.fit(self.X_train, self.y_train) # Mock the first block block_mock = Mock() pipeline.blocks['sklearn.preprocessing.StandardScaler#1'] = block_mock # Run first block context = { 'X': self.X_train, } int_start = 1 str_start = 'sklearn.linear_model.LogisticRegression#1' pipeline.predict(start_=int_start, **context) pipeline.predict(start_=str_start, **context) # Assert that mock has not been called block_mock.predict.assert_not_called()
def test_fit_predict_args_in_init(): def add(a, b): return a + b primitive = { 'name': 'add', 'primitive': add, 'produce': { 'args': [ { 'name': 'a', 'type': 'float', }, { 'name': 'b', 'type': 'float', }, ], 'output': [{ 'type': 'float', 'name': 'out' }] } } primitives = [primitive] init_params = {'add': {'b': 10}} pipeline = MLPipeline(primitives, init_params=init_params) out = pipeline.predict(a=3) assert out == 13
def test_predict_debug(self): outputs = { 'default': [{ 'name': 'a_name', 'variable': 'a_primitive#1.a_variable', 'type': 'a_type', }] } mlpipeline = MLPipeline(['a_primitive'], outputs=outputs) mlpipeline.blocks['a_primitive#1'].produce_args = [{ 'name': 'input', 'type': 'whatever' }] mlpipeline.blocks['a_primitive#1'].produce_output = [{ 'name': 'a_name', 'type': 'a_type' }] expected_return = dict() expected_return = { "a_primitive#1": { "elapsed": 0, "input": {"whatever"}, "output": {"whatever"} } } returned, debug_returned = mlpipeline.predict(debug=True) assert len([returned]) == len(outputs["default"]) assert isinstance(debug_returned, dict) assert set(debug_returned.keys()) == set(expected_return.keys()) for block_name, dictionary in expected_return.items(): assert set(debug_returned[block_name].keys()) == set( dictionary.keys())
def test_predict_no_debug(self): outputs = { 'default': [ { 'name': 'a_name', 'variable': 'a_primitive#1.a_variable', 'type': 'a_type', }, { 'name': 'b_name', 'variable': 'a_primitive#1.b_variable', 'type': 'b_type', }, ] } mlpipeline = MLPipeline(['a_primitive'], outputs=outputs) mlpipeline.blocks['a_primitive#1'].produce_args = [{ 'name': 'input', 'type': 'whatever' }] mlpipeline.blocks['a_primitive#1'].produce_output = [{ 'name': 'a_name', 'type': 'a_type' }, { 'name': 'b_name', 'type': 'b_type' }] returned = mlpipeline.predict(debug=False) assert len(returned) == len(outputs["default"]) for returned_output, expected_output in zip(returned, outputs["default"]): assert returned_output == expected_output["variable"]
def run(): print("============================================") print("Testing Multi Table Pipeline") print("============================================") orders = pd.read_csv("data/Retail/orders.csv") order_products = pd.read_csv("data/Retail/order_products.csv") label_times = pd.read_csv("data/Retail/label_times.csv") X_train = label_times.sample(frac=0.8) X_test = label_times.drop(X_train.index) y_train = X_train["label"] y_test = X_test["label"] entity_set = make_entity_set(orders, order_products) multitable = MLPipeline(['dfs', 'random_forest_classifier']) updated_hyperparam = MLHyperparam('max_depth', 'int', [1, 10]) updated_hyperparam.block_name = 'dfs' # multitable.update_tunable_hyperparams([updated_hyperparam]) # Check that the hyperparameters are correct. for hyperparam in multitable.get_tunable_hyperparams(): print(hyperparam) # Check that the blocks are correct. expected_blocks = {'dfs', 'rf_classifier'} blocks = set(multitable.blocks.keys()) assert expected_blocks == blocks # Check that we can score properly. produce_params = { ('dfs', 'entityset'): entity_set, ('dfs', 'cutoff_time_in_index'): True } print("\nFitting pipeline...") fit_params = { ('dfs', 'entityset'): entity_set, ('dfs', 'target_entity'): "users", ('dfs', 'training_window'): ft.Timedelta("60 days") } multitable.fit(X_train, y_train, fit_params=fit_params, produce_params=produce_params) print("\nFit pipeline.") print("\nScoring pipeline...") predicted_y_val = multitable.predict(X_test, predict_params=produce_params) score = f1_score(predicted_y_val, y_test, average='micro') print("\nf1 micro score: %f" % score) return score
def run(train_size=160, test_size=40): print("============================================") print("Testing Audio Pipeline") print("============================================") # Data loading. classes = [ 'street_music', 'siren', 'jackhammer', 'gun_shot', 'engine_idling', 'drilling', 'dog_bark', 'children_playing', 'car_horn', 'air_conditioner' ] labels = [] all_filepaths = [] for label_class in classes: for filepath in glob.glob( os.path.join('data/UrbanSound/data', label_class, '*.wav')): all_filepaths.append(filepath) labels.append(label_class) filepaths, filepaths_test, y, y_test = train_test_split( all_filepaths, labels, train_size=train_size, test_size=test_size) audio_pipeline = MLPipeline([ 'audio_featurizer', 'audio_padder', 'pca', 'random_forest_classifier' ]) # Check that the hyperparameters are correct. for hyperparam in audio_pipeline.get_tunable_hyperparams(): print(hyperparam) # Check that the blocks are correct. expected_blocks = { 'audio_featurizer', 'audio_padder', 'pca', 'rf_classifier' } blocks = set(audio_pipeline.blocks.keys()) assert expected_blocks == blocks # Check that we can score properly. print("\nFitting pipeline...") X, sample_freqs = load_and_segment(filepaths) produce_params = {('audio_featurizer', 'sample_freqs'): sample_freqs} audio_pipeline.fit(X, y, produce_params=produce_params) print("\nFit pipeline.") print("\nScoring pipeline...") X_test, sample_freqs_test = load_and_segment(filepaths_test) predict_params = {('audio_featurizer', 'sample_freqs'): sample_freqs_test} predicted_y_val = audio_pipeline.predict(X_test, predict_params) score = f1_score(predicted_y_val, y_test, average='micro') print("\nf1 micro score: %f" % score) return score