def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"} | ../data/test_kdd.csv | ./scenario_an_2/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_an_1', '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}', 'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_anomaly.i_create_anomaly_resources_from_source( self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[4])
def test_scenario16(self): """ Scenario: Successfully building threshold test predictions from ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using ensemble of <number_of_models> models with replacement to test "<test>" and log predictions in "<output>" And I check that the ensemble has been created And I check that the predictions are ready And I create BigML resources using the previous ensemble with different thresholds to test "<test>" and log predictions in "<output2>" and "<output3>" Then local predictions for different thresholds in "<output2>" and "<output3>" are different Examples: |scenario | kwargs | number_of_models | test | output | output2 | output3 """ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario16/predictions.csv', 'scenario16/predictions2.csv', 'scenario16/predictions3.csv' ]] show_doc(self.test_scenario16, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_ensemble_with_replacement( self, number_of_models=example[2], test=example[3], output=example[4]) test_pred.i_check_create_ensemble(self) test_pred.i_check_create_predictions(self) test_pred.i_create_resources_from_ensemble_with_threshold( self, test=example[3], output2=example[5], output3=example[6]) test_pred.i_check_predictions_with_different_thresholds( self, example[5], example[6])
def test_scenario06(self): """ Scenario: Successfully building batch test projections from model Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML linear regression resources using model to test "<test>" as a batch projection and log projections in "<output>" And I check that the projections are ready Then the local projection file is like "<projections_file>" Examples: |scenario | kwargs | test | output |projections_file | """ print self.test_scenario06.__doc__ examples = [[ 'scenario1_pca', '{"data": "data/grades.csv", "output": "scenario1_pca/projections.csv", "test": "data/test_grades_no_missings.csv"}', 'data/test_grades_no_missings.csv', 'scenario5_pca/projections.csv', 'check_files/projections_grades_pca.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) pca_proj.i_create_pca_resources_from_model_remote( self, test=example[2], output=example[3]) batch_pred.i_check_create_batch_projection(self) test_pred.i_check_create_projections(self) test_pred.i_check_projections(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building multi-label evaluations from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using source to evaluate and log evaluation in "<output>" And I check that the dataset has been created And I check that the models have been created And I check that the <number_of_labels> evaluations have been created And I check that the evaluation is ready Then the evaluation file is like "<json_evaluation_file>" Examples: |scenario | kwargs | number_of_labels | output |json_evaluation_file | | scenario_ml_e1| {"tag": "my_multilabel_e_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_e1/evaluation"} | 7 | ./scenario_ml_e2/evaluation | ./check_files/evaluation_ml.json | """ print self.test_scenario2.__doc__ examples = [ ['scenario_ml_e1', '{"tag": "my_multilabel_e_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_e1/evaluation"}', '7', 'scenario_ml_e2/evaluation', 'check_files/evaluation_ml.json']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) ml_eval.i_create_ml_evaluations_from_source(self, output=example[3]) test_pred.i_check_create_dataset(self) test_pred.i_check_create_models(self) test_pred.i_check_create_evaluations(self, number_of_evaluations=example[2]) ml_eval.i_check_evaluation_ready(self) evaluation.then_the_evaluation_file_is_like(self, example[4])
def test_scenario03(self): """ Scenario: Successfully building test projections from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML linear regression resources using source to test "<test>" and log projections in "<output>" And I check that the dataset has been created And I check that the model has been created And I check that the projections are ready Then the local projections file is like "<projections_file>" Examples: |scenario | kwargs | test | output |projections_file | | scenario1| {"data": "../data/grades.csv", "output": "./scenario1_lrr/projections.csv", "test": "../data/test_grades.csv"} | ../data/test_grades.csv | ./scenario2/projections.csv | ./check_files/projections_grades.csv | """ print self.test_scenario03.__doc__ examples = [[ 'scenario1_pca', '{"data": "data/grades.csv", "output": "scenario1_pca/projections.csv", "test": "data/test_grades_no_missings.csv"}', 'data/test_grades_no_missings.csv', 'scenario2_pca/projections.csv', 'check_files/projections_grades_pca.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) pca_proj.i_create_pca_resources_from_source(self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self, suffix=None) pca_proj.i_check_create_pca_model(self) test_pred.i_check_create_projections(self) test_pred.i_check_projections(self, example[4])
def test_scenario12(self): """ Scenario: Successfully building cross-validation from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create a BigML cross-validation with rate <rate> using the dataset in file "<dataset_file>" and log results in "<output>" And I check that the models have been created And I check that the evaluations have been created Then the cross-validation json model info is like the one in "<cv_file>" Examples: |scenario | kwargs | rate | dataset_file | output |cv_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | 0.05 | ./scenario1/dataset | ./scenario12/cross-validation | ./check_files/cross_validation.json | """ print self.test_scenario12.__doc__ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '0.05', 'scenario1/dataset', 'scenario12/cross-validation', 'check_files/cross_validation.json' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_cross_validation_from_dataset( self, rate=example[2], dataset_file=example[3], output=example[4]) test_pred.i_check_create_models(self) test_pred.i_check_create_evaluations(self, number_of_evaluations=None) test_pred.i_check_cross_validation(self, example[5])
def test_scenario17(self): """ Scenario: Successfully building test predictions from local model Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using local model in "<scenario>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario17/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario17.__doc__ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario17/predictions.csv', 'check_files/predictions_iris.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_local_model(self, directory=example[0], test=example[2], output=example[3]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario09(self): """ Scenario: Successfully combining test predictions from existing directories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2> Given I combine BigML predictions files in "<directory1>" and "<directory2>" into "<output>" Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |scenario2 | kwargs2 | directory1 | directory2 | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | scenario5| {"number_of_models": 10, "output": "./scenario5/predictions.csv", "test": "../data/test_iris.csv"} | ./scenario1 | ./scenario5 | ./scenario8/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario09.__doc__ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "test": "data/test_iris.csv", "output": "scenario5/predictions.csv"}', 'scenario1', 'scenario5', 'scenario8/predictions.csv', 'check_files/predictions_iris.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_have_previous_scenario_or_reproduce_it( self, example[2], example[3]) test_pred.i_find_predictions_files(self, directory1=example[4], directory2=example[5], output=example[6]) test_pred.i_check_predictions(self, example[7])
def test_scenario11(self): """ Scenario: Successfully building test predictions from dataset specifying objective field and model fields Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>" And I check that the model has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | objective | fields | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario11/predictions.csv | ./check_files/predictions_iris_b.csv | 0 | "petal length","petal width" | """ print self.test_scenario11.__doc__ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario11/predictions.csv', 'check_files/predictions_iris_b.csv', '0', '"petal length","petal width"' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_dataset_objective_model( self, objective=example[5], fields=example[6], test=example[2], output=example[3]) test_pred.i_check_create_model(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def setup_scenario06(self): """ Scenario: Successfully building test predictions from ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using ensemble of <number_of_models> models to test "<test>" and log predictions in "<output>" And I check that the ensemble has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | number_of_models | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | 10 | ../data/test_iris.csv | ./scenario5/predictions.csv | ./check_files/predictions_iris.csv | """ print self.setup_scenario06.__doc__ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario5/predictions.csv', 'check_files/predictions_iris.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_ensemble( self, number_of_models=example[2], test=example[3], output=example[4]) test_pred.i_check_create_ensemble(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario2(self): """ Scenario: Successfully building evaluations from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to evaluate and log evaluation in "<output>" And I check that the dataset has been created And I check that the model has been created And I check that the evaluation has been created Then the evaluation file is like "<json_evaluation_file>" Examples: |scenario | kwargs | output | json_evaluation_file | | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"} |./scenario_e2/evaluation | ./check_files/evaluation_iris.json | """ print self.test_scenario2.__doc__ examples = [ ['scenario_e1', '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}', 'scenario_e2/evaluation', 'check_files/evaluation_iris.json']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) evaluation.given_i_create_bigml_resources_using_source_to_evaluate(self, output=example[2]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_model(self) test_pred.i_check_create_evaluation(self) evaluation.then_the_evaluation_file_is_like(self, example[3])
def test_scenario5(self): """ Scenario: Successfully building evaluation from model and test file with data map Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using test file "<test>" and a fields map "<fields_map>" to evaluate a model and log evaluation in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the evaluation has been created Then the evaluation file is like "<json_evaluation_file>" Examples: |scenario | kwargs | test | fields_map | output | json_evaluation_file | | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"} | ../data/iris_permuted.csv | ../data/fields_map.csv | ./scenario_e7/evaluation | ./check_files/evaluation_iris2.json | """ print self.test_scenario5.__doc__ examples = [ ['scenario_e1', '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}', 'data/iris_permuted.csv', 'data/fields_map.csv', 'scenario_e7/evaluation', 'check_files/evaluation_iris2.json']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) evaluation.i_create_all_resources_to_evaluate_with_model_and_map(self, data=example[2], fields_map=example[3], output=example[4]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_evaluation(self) evaluation.then_the_evaluation_file_is_like(self, example[5])
def test_scenario8(self): """ Scenario: Successfully building test predictions from model with operating point Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML remote batch predictions one by one using model to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | operating_point | test | output |predictions_file | """ examples = [ ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario_r8/predictions.csv', 'check_files/predictions_iris.csv']] print self.test_scenario8.__doc__ for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( \ self, example[0], example[1]) test_batch_pred.i_create_resources_from_model_remote_no_batch( \ self, test=example[2], output=example[3]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario4(self): """ Scenario 4: Successfully building test predictions from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using dataset to test the previous test dataset remotely and log predictions in "<output>" And I check that the model has been created And I check that the batch prediction has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario_r3/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario4.__doc__ examples = [ ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario_r3/predictions.csv', 'check_files/predictions_iris.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_create_resources_from_dataset_batch(self, output=example[2]) test_pred.i_check_create_model(self) test_batch_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[3])
def test_scenario5(self): """ Scenario: Successfully building multi-label evaluations from models retrieved by tag Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using models tagged as "<tag>" to evaluate and log evaluation in "<output>" And I check that the <number_of_labels> evaluations have been created And I check that the evaluation is ready Then the evaluation key "<key>" value for the model is greater than <value> Examples: |scenario | kwargs | tag | number_of_labels | output |key | value | scenario_ml_e1| {"tag": "my_multilabel_e_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_e1/evaluation"} | my_multilabel_e_1 | 7 | ./scenario_ml_e5/evaluation | average_phi | 0.8180 """ print self.test_scenario5.__doc__ examples = [[ 'scenario_ml_e1', '{"tag": "my_multilabel_e_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_e1/evaluation"}', 'my_multilabel_e_1', '7', 'scenario_ml_e5/evaluation', 'average_phi', '0.8180' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) ml_eval.i_create_ml_evaluations_from_tagged_models( self, tag=example[2], output=example[4]) test_pred.i_check_create_evaluations( self, number_of_evaluations=example[3]) ml_eval.i_check_evaluation_ready(self) evaluation.i_check_evaluation_key(self, key=example[5], value=example[6])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"} | ../data/test_kdd.csv | ./scenario_an_2/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_an_1', '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}', 'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_anomaly.i_create_anomaly_resources_from_source(self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[4])
def test_scenario03(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML logistic regression resources using source to test "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the model has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1_lr/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario2/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario03.__doc__ examples = [ ['scenario1_lr', '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario2_lr/predictions.csv', 'check_files/predictions_iris_lr.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) lr_pred.i_create_lr_resources_from_source(self, None, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self, suffix=None) lr_pred.i_check_create_lr_model(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario24(self): """ Scenario: Successfully test predictions remotely from boosted ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using boosted ensemble in <iterations> iterations to remotely test "<test>" and log predictions in "<output>" And I check that the ensemble has been created And I check that the predictions are ready And I check that the batch prediction is ready And I check that the bath predictions datset is ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | iterations | test | output |predictions_file | """ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario24/predictions.csv', 'check_files/predictions_iris_boost.csv']] show_doc(self.test_scenario24, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( \ self, example[0], example[1]) test_pred.i_create_resources_remotely_from_boosted_ensemble( \ self, iterations=example[2], test=example[3], output=example[4]) test_pred.i_check_create_ensemble(self) test_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario5(self): """ Scenario: Successfully building evaluation from model and test file with data map Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using test file "<test>" and a fields map "<fields_map>" to evaluate a model and log evaluation in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the evaluation has been created Then the evaluation file is like "<json_evaluation_file>" Examples: |scenario | kwargs | test | fields_map | output | json_evaluation_file | | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"} | ../data/iris_permuted.csv | ../data/fields_map.csv | ./scenario_e7/evaluation | ./check_files/evaluation_iris2.json | """ print self.test_scenario5.__doc__ examples = [[ 'scenario_e1', '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}', 'data/iris_permuted.csv', 'data/fields_map.csv', 'scenario_e7/evaluation', 'check_files/evaluation_iris2.json' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) evaluation.i_create_all_resources_to_evaluate_with_model_and_map( self, data=example[2], fields_map=example[3], output=example[4]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_evaluation(self) evaluation.then_the_evaluation_file_is_like(self, example[5])
def test_scenario07(self): """ Scenario: Successfully building test predictions from models file Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2> And I create BigML resources using models in file "<models_file>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |scenario2 | kwargs2 | models_file | test | output |predictions_file | """ print self.test_scenario07.__doc__ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "test": "data/test_iris.csv", "output": "scenario5/predictions.csv"}', 'scenario5/models', 'data/test_iris.csv', 'scenario6/predictions.csv', 'check_files/predictions_iris.csv']] show_doc(self.test_scenario07, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3]) test_pred.i_create_resources_from_models_file(self, multi_label=None, models_file=example[4], test=example[5], output=example[6]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[7])
def test_scenario4(self): """ Scenario: Successfully building ensembles test predictions from models file with max categories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using models in file "<models_file>" to test "<test>" and log predictions with combine method in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |models_file| test | output |predictions_file | | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"} | scenario_mc_1/models | ../data/test_iris.csv | ./scenario_mc_4/predictions.csv | ./check_files/predictions_mc.csv | """ print self.test_scenario4.__doc__ examples = [ [ "scenario_mc_1", '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}', "scenario_mc_1/models", "data/test_iris.csv", "scenario_mc_4/predictions.csv", "check_files/predictions_mc.csv", ] ] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) max_cat.i_create_all_mc_resources_from_models( self, models_file=example[2], test=example[3], output=example[4] ) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario4(self): """ Scenario 4: Successfully building test predictions from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using dataset to test the previous test dataset remotely and log predictions in "<output>" And I check that the model has been created And I check that the batch prediction has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario_r3/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario4.__doc__ examples = [[ 'scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario_r3/predictions.csv', 'check_files/predictions_iris.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_dataset_batch(self, output=example[2]) test_pred.i_check_create_model(self) test_batch_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[3])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the cluster has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | ../data/diabetes.csv | ./scenario_c_2/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_cluster.i_create_cluster_resources_from_source( self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building association from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML association using source and log resources in "<output_dir>" And I check that the dataset has been created And I check that the association has been created Examples: |scenario | kwargs | output_dir | scenario_ass_1| {"data": "../data/iris.csv", "output_dir": "./scenario_ass_1/} | ./scenario_ass_2 | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_ass_1', '{"data": "data/iris.csv", "output_dir": "scenario_ass_1"}', 'scenario_ass_2' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_association.i_create_association_from_source( self, output_dir=example[2]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_association(self)
def test_scenario8(self): """ Scenario: Successfully building test predictions from model with operating point Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML remote batch predictions one by one using model to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | operating_point | test | output |predictions_file | """ examples = [ ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario_r8/predictions.csv', 'check_files/predictions_iris.csv']] print self.test_scenario8.__doc__ for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( \ self, example[0], example[1]) test_batch_pred.i_create_resources_from_model_remote_no_batch( \ self, test=example[2], output=example[3]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario3(self): """ Scenario: Successfully building test predictions from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML association using dataset and log predictions in "<output_dir>" And I check that the association has been created Examples: |scenario | kwargs | test | output |predictions_file | | scenario_ass_1| {"data": "../data/iris.csv", "output_dir": "./scenario_c_1"} | ../data/diabetes.csv | ./scenario_c_3/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario3.__doc__ examples = [[ 'scenario_ass_1', '{"data": "data/iris.csv", "output_dir": "scenario_ass_1"}', 'scenario_ass_3' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_association.i_create_association_from_dataset( self, output_dir=example[2]) test_pred.i_check_create_association(self)
def test_scenario2(self): """ Scenario: Successfully building test predictions from source using datasets with max categories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources with <max_categories> as categories limit and <objective> as objective field using source to test "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the max_categories datasets have been created And I check that the models have been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |max_categories| objective | test | output |predictions_file | | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"} |1| species | ../data/test_iris.csv | ./scenario_mc_2/predictions.csv | ./check_files/predictions_mc.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_mc_1', '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}', '1', 'species', 'data/test_iris.csv', 'scenario_mc_2/predictions.csv', 'check_files/predictions_mc.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) max_cat.i_create_all_mc_resources_from_source(self, max_categories=example[2], objective=example[3], test=example[4], output=example[5]) test_pred.i_check_create_dataset(self, suffix=None) max_cat.i_check_create_max_categories_datasets(self) test_pred.i_check_create_models(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[6])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using source to test "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the models have been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_ml_1| {"tag": "my_multilabel_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_1/predictions.csv", "test": "../data/test_multilabel.csv"} | ../data/test_multilabel.csv | ./scenario_ml_2/predictions.csv | ./check_files/predictions_ml_comma.csv | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_ml_1', '{"tag": "my_multilabel_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_1/predictions.csv", "test": "data/test_multilabel.csv"}', 'data/test_multilabel.csv', 'scenario_ml_2/predictions.csv', 'check_files/predictions_ml_comma.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_source(self, multi_label='multi-label', test=example[2], output=example[3]) test_pred.i_check_create_dataset(self) test_pred.i_check_create_models(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the cluster has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | ../data/diabetes.csv | ./scenario_c_2/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_cluster.i_create_cluster_resources_from_source(self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[4])
def test_scenario5(self): """ Scenario 5: Successfully building test predictions from dataset and prediction format info Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using a model to test the previous test dataset remotely with prediction headers and fields "<fields>" and log predictions in "<output>" And I check that the batch prediction has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | fields | output |predictions_file | | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"} | sepal length,sepal width | ./scenario_r4/predictions.csv | ./check_files/predictions_iris_format.csv | """ print self.test_scenario5.__doc__ examples = [[ 'scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'sepal length,sepal width', 'scenario_r4/predictions.csv', 'check_files/predictions_iris_format.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_pred.i_create_resources_from_model_batch(self, fields=example[2], output=example[3]) test_batch_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario8(self): """ Scenario: Successfully generating models from cluster centroids Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I generate models for "<centroid_names>" centroids and log results in "<output>" Then I check that the <model_number> cluster models are ready Examples: |scenario | kwargs | centroid_names | output | datasets_number | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | Cluster 1,Cluster 2 | ./scenario_c_8/centroids.csv | 2 | """ print self.test_scenario8.__doc__ examples = [[ 'scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'Cluster 1,Cluster 2', 'scenario_c_8/centroids.csv', '2' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_cluster.i_create_models_from_cluster(self, centroids=example[2], output=example[3]) test_cluster.i_check_create_cluster(self) test_cluster.i_check_cluster_models(self, models_number=example[4])
def test_scenario07(self): """ Scenario: Successfully building batch test predictions from model with customized output Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML deepnet resources using model to test "<test>" as a batch prediction with output format "<batch-output>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test |batch_output | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ../data/batch_output.json | ./scenario6_dn/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario07.__doc__ examples = [[ 'scenario1_dn', '{"data": "data/iris.csv", "output": "scenario1_dn/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'data/batch_output.json', 'scenario6_dn/predictions.csv', 'check_files/predictions_iris_dn_prob.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) dn_pred.i_create_dn_resources_from_model_remote_with_options( self, test=example[2], output=example[4], options_file=example[3]) batch_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using source and <number_of_models> models ensembles to test "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the models in the ensembles have been created Then I check that the predictions are ready Examples: |scenario | kwargs |number_of_models |test | output | | scenario_mle_1| {"tag": "my_multilabel_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mle_1/predictions.csv", "test": "../data/test_multilabel.csv", "number_of_models": 10} |10| ../data/test_multilabel.csv | ./scenario_mle_2/predictions.csv """ print self.test_scenario2.__doc__ examples = [[ 'scenario_mle_1', '{"tag": "my_multilabel_1%s", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mle_1/predictions.csv", "test": "data/test_multilabel.csv", "number_of_models": 10}' % PY3, '10', 'data/test_multilabel.csv', 'scenario_mle_2/predictions.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) ml_pred.i_create_resources_and_ensembles_from_source( self, multi_label='multi-label ', number_of_models=example[2], test=example[3], output=example[4]) test_pred.i_check_create_dataset(self) test_pred.i_check_create_models_in_ensembles(self, in_ensemble=True) test_pred.i_check_create_predictions(self)
def test_scenario09(self): """ Scenario: Successfully combining test predictions from existing directories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2> Given I combine BigML predictions files in "<directory1>" and "<directory2>" into "<output>" Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |scenario2 | kwargs2 | directory1 | directory2 | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | scenario5| {"number_of_models": 10, "output": "./scenario5/predictions.csv", "test": "../data/test_iris.csv"} | ./scenario1 | ./scenario5 | ./scenario8/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario09.__doc__ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "test": "data/test_iris.csv", "output": "scenario5/predictions.csv"}', 'scenario1', 'scenario5', 'scenario8/predictions.csv', 'check_files/predictions_iris.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3]) test_pred.i_find_predictions_files(self, directory1=example[4], directory2=example[5], output=example[6]) test_pred.i_check_predictions(self, example[7])
def test_scenario5(self): """ Scenario: Successfully building test predictions from models retrieved by tag Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using models tagged as "<tag>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | tag | test | output |predictions_file | | scenario_ml_6| {"tag": "my_multilabel_5", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_6/predictions.csv", "test": "../data/test_multilabel.csv"} | my_multilabel_5 | ../data/test_multilabel.csv | ./scenario_ml_5/predictions.csv | ./check_files/predictions_ml_comma.csv | """ print self.test_scenario5.__doc__ examples = [[ 'scenario_ml_6', '{"tag": "my_multilabel_5", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_6/predictions.csv", "test": "data/test_multilabel.csv"}', 'my_multilabel_5', 'data/test_multilabel.csv', 'scenario_ml_5/predictions.csv', 'check_files/predictions_ml_comma.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) ml_pred.i_predict_ml_from_model_tag(self, tag=example[2], test=example[3], output=example[4]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario12(self): """ Scenario: Successfully building cross-validation from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create a BigML cross-validation with rate <rate> using the dataset in file "<dataset_file>" and log results in "<output>" And I check that the models have been created And I check that the evaluations have been created Then the cross-validation json model info is like the one in "<cv_file>" Examples: |scenario | kwargs | rate | dataset_file | output |cv_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | 0.05 | ./scenario1/dataset | ./scenario12/cross-validation | ./check_files/cross_validation.json | """ print self.test_scenario12.__doc__ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '0.05', 'scenario1/dataset', 'scenario12/cross-validation', 'check_files/cross_validation.json']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_create_cross_validation_from_dataset(self, rate=example[2], dataset_file=example[3], output=example[4]) test_pred.i_check_create_models(self) test_pred.i_check_create_evaluations(self, number_of_evaluations=None) test_pred.i_check_cross_validation(self, example[5])
def test_scenario22(self): """ Scenario: Successfully building threshold test predictions remotely from ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using ensemble of <number_of_models> models with replacement to test "<test>" and log predictions in "<output>" And I check that the ensemble has been created And I check that the predictions are ready And I create BigML resources using the previous ensemble with different thresholds "<threshold_class>" to remotely test "<test>" and log predictions in "<output2>" and "<output3>" Then local predictions for different thresholds in "<output2>" and "<output3>" are different Examples: |scenario | kwargs | number_of_models | test | output | output2 | output3 | threshold_class """ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario22/predictions.csv', 'scenario22/predictions2.csv', 'scenario22/predictions3.csv', 'Iris-virginica']] show_doc(self.test_scenario22, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( \ self, example[0], example[1]) test_pred.i_create_resources_from_ensemble_with_replacement( \ self, number_of_models=example[2], test=example[3], output=example[4]) test_pred.i_check_create_ensemble(self) test_pred.i_check_create_predictions(self) test_pred.i_create_resources_from_ensemble_with_threshold_rem( \ self, test=example[3], output2=example[5], output3=example[6], threshold_class=example[7]) test_pred.i_check_predictions_with_different_thresholds( \ self, example[5], example[6])
def test_scenario18(self): """ Scenario: Successfully building test predictions from ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> Given I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2> And I create BigML resources using local ensemble of <number_of_models> models in "<scenario2>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |scenario2 | kwargs2 | number_of_models | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | scenario5| {"number_of_models": 10, "output": "./scenario5/predictions.csv", "test": "../data/test_iris.csv"} | 10 | ../data/test_iris.csv | ./scenario18/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario18.__doc__ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}', '10', 'scenario5', 'data/test_iris.csv', 'scenario18/predictions.csv', 'check_files/predictions_iris.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3]) test_pred.i_create_resources_from_local_ensemble(self, number_of_models=example[4], directory=example[5], test=example[6], output=example[7]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[8])
def test_scenario26(self): """ Scenario: Successfully building test predictions from ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> Given I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2> And I create BigML resources using local ensemble of <number_of_models> models in "<scenario2>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |scenario2 | kwargs2 | number_of_models | test | output |predictions_file | """ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}', '10', 'scenario5', 'data/test_iris.csv', 'scenario26/predictions_p.csv', 'check_files/predictions_iris_e_op_prob.csv', 'data/operating_point_prob.json'], ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}', '10', 'scenario5', 'data/test_iris.csv', 'scenario26/predictions_c.csv', 'check_files/predictions_iris_e_op_conf.csv', 'data/operating_point_conf.json']] show_doc(self.test_scenario26, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3]) test_pred.i_create_resources_from_local_ensemble_with_op( \ self, number_of_models=example[4], directory=example[5], test=example[6], output=example[7], operating_point=example[9]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[8])
def test_scenario04(self): """ Scenario: Successfully building test predictions from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML logistic regression resources using dataset to test "<test>" and log predictions in "<output>" And I check that the model has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario3/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario04.__doc__ examples = [[ 'scenario1_lr', '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario3_lr/predictions.csv', 'check_files/predictions_iris_lr.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) lr_pred.i_create_lr_resources_from_dataset(self, None, test=example[2], output=example[3]) lr_pred.i_check_create_lr_model(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building evaluations from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to evaluate and log evaluation in "<output>" And I check that the dataset has been created And I check that the model has been created And I check that the evaluation has been created Then the evaluation file is like "<json_evaluation_file>" Examples: |scenario | kwargs | output | json_evaluation_file | | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"} |./scenario_e2/evaluation | ./check_files/evaluation_iris.json | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_e1', '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}', 'scenario_e2/evaluation', 'check_files/evaluation_iris.json' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) evaluation.given_i_create_bigml_resources_using_source_to_evaluate( self, output=example[2]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_model(self) test_pred.i_check_create_evaluation(self) evaluation.then_the_evaluation_file_is_like(self, example[3])
def test_scenario5(self): """ Scenario 5: Successfully building test predictions from dataset and prediction format info Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using a model to test the previous test dataset remotely with prediction headers and fields "<fields>" and log predictions in "<output>" And I check that the batch prediction has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | fields | output |predictions_file | | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"} | sepal length,sepal width | ./scenario_r4/predictions.csv | ./check_files/predictions_iris_format.csv | """ print self.test_scenario5.__doc__ examples = [ ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'sepal length,sepal width', 'scenario_r4/predictions.csv', 'check_files/predictions_iris_format.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_create_resources_from_model_batch(self, fields=example[2], output=example[3]) test_batch_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario5(self): """ Scenario: Successfully building test predictions from models retrieved by tag Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources with labels "<labels>" and objective "<objective>" using models tagged as "<tag>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |labels | objective | tag | test | output |predictions_file | | scenario_mlm_1| {"tag": "my_multilabelm_1", "data": "../data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mlm_1/predictions.csv", "test": "../data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"} | Adult,Student | class | my_multilabelm_1 | ../data/test_multilabel.csv | ./scenario_mlm_5/predictions.csv | ./check_files/predictions_ml_labels.csv | """ print self.test_scenario5.__doc__ examples = [[ 'scenario_mlm_1', '{"tag": "my_multilabelm_1", "data": "data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mlm_1/predictions.csv", "test": "data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}', 'Adult,Student', 'class', 'my_multilabelm_1', 'data/test_multilabel.csv', 'scenario_mlm_5/predictions.csv', 'check_files/predictions_ml_labels.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) ml_pred.i_predict_ml_from_model_tag_with_labels_with_objective( self, labels=example[2], objective=example[3], tag=example[4], test=example[5], output=example[6]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[7])
def test_scenario24(self): """ Scenario: Successfully test predictions remotely from boosted ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using boosted ensemble in <iterations> iterations to remotely test "<test>" and log predictions in "<output>" And I check that the ensemble has been created And I check that the predictions are ready And I check that the batch prediction is ready And I check that the bath predictions datset is ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | iterations | test | output |predictions_file | """ examples = [[ 'scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario24/predictions.csv', 'check_files/predictions_iris_boost.csv' ]] show_doc(self.test_scenario24, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( \ self, example[0], example[1]) test_pred.i_create_resources_remotely_from_boosted_ensemble( \ self, iterations=example[2], test=example[3], output=example[4]) test_pred.i_check_create_ensemble(self) test_pred.i_check_create_batch_prediction(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario07(self): """ Scenario: Successfully building batch test predictions from model Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML logistic regression resources using model to test "<test>" as a batch prediction and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario6_lr/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario07.__doc__ examples = [[ 'scenario1_lr', '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario6_lr/predictions_p.csv', 'check_files/predictions_iris_lr_op_prob.csv', 'data/operating_point_prob.json' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) lr_pred.i_create_lr_resources_from_model_with_op( self, test=example[2], output=example[3], operating_point=example[5]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source using datasets with max categories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources with <max_categories> as categories limit and <objective> as objective field using source to test "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the max_categories datasets have been created And I check that the models have been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |max_categories| objective | test | output |predictions_file | | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"} |1| species | ../data/test_iris.csv | ./scenario_mc_2/predictions.csv | ./check_files/predictions_mc.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_mc_1', '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}', '1', 'species', 'data/test_iris.csv', 'scenario_mc_2/predictions.csv', 'check_files/predictions_mc.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) max_cat.i_create_all_mc_resources_from_source(self, max_categories=example[2], objective=example[3], test=example[4], output=example[5]) test_pred.i_check_create_dataset(self, suffix=None) max_cat.i_check_create_max_categories_datasets(self) test_pred.i_check_create_models(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[6])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using source with objective "<objective>" and model fields "<model_fields>" to test "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the models have been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | objective | model_fields | test | output |predictions_file | | scenario_mlm_1| {"tag": "my_multilabelm_1", "data": "../data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mlm_1/predictions.csv", "test": "../data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"} | class | -type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P |../data/test_multilabel.csv | ./scenario_mlm_2/predictions.csv | ./check_files/predictions_ml_comma.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_mlm_1', '{"tag": "my_multilabelm_1", "data": "data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mlm_1/predictions.csv", "test": "data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}', 'class', '-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P', 'data/test_multilabel.csv', 'scenario_mlm_2/predictions.csv', 'check_files/predictions_ml_comma.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_create_resources_from_source_with_objective(self, multi_label='multi-label ', objective=example[2], model_fields=example[3], test=example[4], output=example[5]) test_pred.i_check_create_dataset(self) test_pred.i_check_create_models(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[6])
def test_scenario2(self): """ Scenario: Successfully building multi-label evaluations from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using source to evaluate and log evaluation in "<output>" And I check that the dataset has been created And I check that the models have been created And I check that the <number_of_labels> evaluations have been created And I check that the evaluation is ready Then the evaluation file is like "<json_evaluation_file>" Examples: |scenario | kwargs | number_of_labels | output |json_evaluation_file | | scenario_ml_e1| {"tag": "my_multilabel_e_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_e1/evaluation"} | 7 | ./scenario_ml_e2/evaluation | ./check_files/evaluation_ml.json | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_ml_e1', '{"tag": "my_multilabel_e_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_e1/evaluation"}', '7', 'scenario_ml_e2/evaluation', 'check_files/evaluation_ml.json' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) ml_eval.i_create_ml_evaluations_from_source(self, output=example[3]) test_pred.i_check_create_dataset(self) test_pred.i_check_create_models(self) test_pred.i_check_create_evaluations( self, number_of_evaluations=example[2]) ml_eval.i_check_evaluation_ready(self) evaluation.then_the_evaluation_file_is_like(self, example[4])
def test_scenario9(self): """ Scenario: Successfully building test predictions from dataset with summary fields Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML cluster using dataset and summary fields "<summary_fields>" and log resources in "<output_dir>" And I check that the cluster has been created Then the cluster has summary fields "<summary_fields>" Examples: |scenario | kwargs | output-dir |summary_fields | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | scenario_c_9/ | diabetes,age """ print self.test_scenario9.__doc__ examples = [[ 'scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'scenario_c_9', '000008,000007' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_cluster.i_create_cluster_from_dataset_with_summary_fields( self, summary_fields=example[3], output_dir=example[2]) test_pred.i_check_create_cluster(self) test_cluster.i_check_cluster_has_summary_fields(self, example[3])
def test_scenario10(self): """ Scenario: Successfully combining test predictions from existing directories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2> And I combine BigML predictions files in "<directory1>" and "<directory2>" into "<output>" with method "<method>" Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |scenario2 | kwargs2 | directory1 | directory2 | output |predictions_file | method | """ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}', 'scenario1', 'scenario5', 'scenario9/predictions_c.csv', 'check_files/predictions_iris.csv', '"confidence weighted"'], ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}', 'scenario1', 'scenario5', 'scenario9/predictions_p.csv', 'check_files/predictions_iris_p.csv', '"probability weighted"'], ['scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}', 'scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}', 'scenario1_r', 'scenario1_r', 'scenario10/predictions_c.csv', 'check_files/predictions_grades.csv', '"confidence weighted"'], ['scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}', 'scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}', 'scenario1_r', 'scenario1_r', 'scenario10/predictions_p.csv', 'check_files/predictions_grades_p.csv', '"probability weighted"']] show_doc(self.test_scenario10, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3]) test_pred.i_find_predictions_files_with_method(self, directory1=example[4], directory2=example[5], output=example[6], method=example[8]) test_pred.i_check_predictions(self, example[7])
def test_scenario5(self): """ Scenario: Successfully building test predictions from dataset using datasets and model fields with max categories Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources with <max_categories> as categories limit and <objective> as objective field and model fields "<model_fields>" using dataset to test "<test>" and log predictions in "<output>" And I check that the max_categories datasets have been created And I check that the models have been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |max_categories|objective | model_fields | test | output |predictions_file | | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"} |1| species |sepal length,sepal width |../data/test_iris.csv | ./scenario_mc_5/predictions.csv | ./check_files/predictions_mc2.csv | """ print self.test_scenario5.__doc__ examples = [ [ "scenario_mc_1", '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}', "1", "species", "sepal length,sepal width", "data/test_iris.csv", "scenario_mc_5/predictions.csv", "check_files/predictions_mc2.csv", ] ] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) max_cat.i_create_all_mc_resources_from_dataset_with_model_fields( self, max_categories=example[2], objective=example[3], model_fields=example[4], test=example[5], output=example[6], ) max_cat.i_check_create_max_categories_datasets(self) test_pred.i_check_create_models(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[7])
def test_scenario3(self): """ Scenario: Successfully building test predictions from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML association using dataset and log predictions in "<output_dir>" And I check that the association has been created Examples: |scenario | kwargs | test | output |predictions_file | | scenario_ass_1| {"data": "../data/iris.csv", "output_dir": "./scenario_c_1"} | ../data/diabetes.csv | ./scenario_c_3/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario3.__doc__ examples = [ ['scenario_ass_1', '{"data": "data/iris.csv", "output_dir": "scenario_ass_1"}', 'scenario_ass_3']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_association.i_create_association_from_dataset(self, output_dir=example[2]) test_pred.i_check_create_association(self)
def test_scenario6(self): """ Scenario: Successfully generating datasets from cluster centroids Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I generate datasets for "<centroid_names>" centroids and log predictions in "<output>" Then I check that the <datasets_number> cluster datasets are ready Examples: |scenario | kwargs | centroid_names | output | datasets_number | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | Cluster 1,Cluster 2 | ./scenario_c_6/centroids.csv | 2 | """ print self.test_scenario6.__doc__ examples = [ ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'Cluster 1,Cluster 2', 'scenario_c_6/centroids.csv', '2']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_cluster.i_create_datasets_from_cluster(self, centroids=example[2], output=example[3]) test_cluster.i_check_cluster_datasets(self, datasets_number=example[4])
def test_scenario17(self): """ Scenario: Successfully building test predictions from local model Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using local model in "<scenario>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | """ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario17/predictions.csv', 'check_files/predictions_iris.csv']] show_doc(self.test_scenario17, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_create_resources_from_local_model(self, directory=example[0], test=example[2], output=example[3]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario5(self): """ Scenario: Successfully building test predictions from models retrieved by tag Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources with labels "<labels>" and objective "<objective>" using models tagged as "<tag>" to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs |labels | objective | tag | test | output |predictions_file | | scenario_mlm_1| {"tag": "my_multilabelm_1", "data": "../data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mlm_1/predictions.csv", "test": "../data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"} | Adult,Student | class | my_multilabelm_1 | ../data/test_multilabel.csv | ./scenario_mlm_5/predictions.csv | ./check_files/predictions_ml_labels.csv | """ print self.test_scenario5.__doc__ examples = [ ['scenario_mlm_1', '{"tag": "my_multilabelm_1", "data": "data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mlm_1/predictions.csv", "test": "data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}', 'Adult,Student', 'class', 'my_multilabelm_1', 'data/test_multilabel.csv', 'scenario_mlm_5/predictions.csv', 'check_files/predictions_ml_labels.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) ml_pred.i_predict_ml_from_model_tag_with_labels_with_objective(self, labels=example[2], objective=example[3], tag=example[4], test=example[5], output=example[6]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[7])
def test_scenario2(self): """ Scenario: Successfully building association from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML association using source and log resources in "<output_dir>" And I check that the dataset has been created And I check that the association has been created Examples: |scenario | kwargs | output_dir | scenario_ass_1| {"data": "../data/iris.csv", "output_dir": "./scenario_ass_1/} | ./scenario_ass_2 | """ print self.test_scenario2.__doc__ examples = [ ['scenario_ass_1', '{"data": "data/iris.csv", "output_dir": "scenario_ass_1"}', 'scenario_ass_2']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_association.i_create_association_from_source(self, output_dir=example[2]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_association(self)
def test_scenario05(self): """ Scenario: Successfully building test predictions from model Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML linear regression resources using model to test "<test>" and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | """ print self.test_scenario05.__doc__ examples = [ ['scenario1_lrr', '{"data": "data/grades.csv", "output": "scenario1_lrr/predictions.csv", "test": "data/test_grades_no_missings.csv"}', 'data/test_grades_no_missings.csv', 'scenario4_lrr/predictions.csv', 'check_files/predictions_grades_lrr.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) lr_pred.i_create_lr_resources_from_model(self, test=example[2], output=example[3]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def setup_scenario06(self): """ Scenario: Successfully building test predictions from ensemble Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using ensemble of <number_of_models> models to test "<test>" and log predictions in "<output>" And I check that the ensemble has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | number_of_models | test | output |predictions_file | """ examples = [ ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario5/predictions.csv', 'check_files/predictions_iris.csv']] show_doc(self.setup_scenario06, examples) for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_pred.i_create_resources_from_ensemble(self, number_of_models=example[2], test=example[3], output=example[4]) test_pred.i_check_create_ensemble(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[5])
def test_scenario3(self): """ Scenario: Successfully building test predictions from dataset Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML multi-label resources using dataset and <number_of_models> models ensembles to test "<test>" and log predictions in "<output>" And I check that the models in the ensembles have been created Then I check that the predictions are ready Examples: |scenario | kwargs | number_of_models |test | output | | scenario_mle_1| {"tag": "my_multilabel_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mle_1/predictions.csv", "test": "../data/test_multilabel.csv", "number_of_models": 10} |10| ../data/test_multilabel.csv | ./scenario_mle_3/predictions.csv """ print self.test_scenario3.__doc__ examples = [ ['scenario_mle_1', '{"tag": "my_multilabel_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mle_1/predictions.csv", "test": "data/test_multilabel.csv", "number_of_models": 10}', '10', 'data/test_multilabel.csv', 'scenario_mle_3/predictions.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) ml_pred.i_create_resources_and_ensembles_from_source(self, multi_label='multi-label', number_of_models=example[2], test=example[3], output=example[4]) test_pred.i_check_create_dataset(self) test_pred.i_check_create_models_in_ensembles(self, in_ensemble=True) test_pred.i_check_create_predictions(self)
def test_scenario07(self): """ Scenario: Successfully building batch test predictions from model Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML logistic regression resources using model to test "<test>" as a batch prediction and log predictions in "<output>" And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"} | ../data/test_iris.csv | ./scenario6_lr/predictions.csv | ./check_files/predictions_iris.csv | """ print self.test_scenario07.__doc__ examples = [ ['scenario1_lr', '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario6_lr/predictions_p.csv', 'check_files/predictions_iris_lr_op_prob.csv', 'data/operating_point_prob.json']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) lr_pred.i_create_lr_resources_from_model_with_op(self, test=example[2], output=example[3], operating_point=example[5]) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])