def test_scenario5(self): """ Scenario: Successfully creating a centroid and the associated dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs When I create a centroid for "<data_input>" And I check the centroid is ok Then the centroid is "<centroid>" And I create a dataset from the cluster and the centroid And I wait until the dataset is ready less than <time_2> secs And I check that the dataset is created for the cluster and the centroid Examples: | data | time_1 | time_2 | time_3 | data_input | centroid | | ../data/diabetes.csv | 10 | 20 | 20 | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 3 | """ print self.test_scenario5.__doc__ examples = [ ['data/diabetes.csv', '10', '20', '20', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_create.i_create_a_centroid(self, example[4]) prediction_create.the_centroid_is(self, example[5])
def test_scenario6(self): """ Scenario: Successfully creating an anomaly score: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an anomaly detector from a dataset And I wait until the anomaly detector is ready less than <time_3> secs When I create an anomaly score for "<data_input>" Then the anomaly score is "<score>" Examples: | data | time_1 | time_2 | time_3 | data_input | score | | ../data/tiny_kdd.csv | 10 | 10 | 100 | {"src_bytes": 350} | 0.92618 | | ../data/iris_sp_chars.csv | 10 | 10 | 100 | {"pétal&width\u0000": 300} | 0.90198 | """ print self.test_scenario6.__doc__ examples = [ ['data/tiny_kdd.csv', '10', '10', '100', '{"src_bytes": 350}', '0.92846'], ['data/iris_sp_chars.csv', '10', '10', '100', '{"pétal&width\u0000": 300}', '0.89313']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) anomaly_create.i_create_an_anomaly(self) anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) prediction_create.i_create_an_anomaly_score(self, example[4]) prediction_create.the_anomaly_score_is(self, example[5])
def test_scenario3(self): """ Scenario: Successfully creating a Fields object and a modified fields structure from a file: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a Fields object from the dataset with objective column "<objective_column>" And I import a summary fields file "<summary_file>" as a fields structure Then I check the new field structure has field "<field_id>" as "<optype>" Examples: | data | time_1 | objective_column | summary_file| field_id | optype | time_2 | ../data/iris.csv | 10 | 0 | fields_summary_modified.csv | 000000 | categorical | 10 """ print self.test_scenario3.__doc__ examples = [ ['data/iris.csv', '10', '0', 'data/fields/fields_summary_modified.csv', '000000', 'categorical', '10']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[6]) fields_steps.create_fields_from_dataset(self, example[2]) fields_steps.import_summary_file(self, example[3]) fields_steps.check_field_type(self, example[4], example[5])
def test_scenario5(self): """ Scenario: Successfully creating a batch anomaly score from an anomaly detector: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an anomaly detector And I wait until the anomaly detector is ready less than <time_3> secs When I create a batch anomaly score And I check the batch anomaly score is ok And I wait until the batch anomaly score is ready less than <time_4> secs And I download the created anomaly score file to "<local_file>" Then the batch anomaly score file is like "<predictions_file>" Examples: | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | | ../data/tiny_kdd.csv | 30 | 30 | 50 | 50 | ./tmp/batch_predictions.csv |./data/batch_predictions_a.csv | """ print self.test_scenario5.__doc__ examples = [ ['data/tiny_kdd.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_a.csv']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) anomaly_create.i_create_an_anomaly(self) anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction_with_anomaly(self) batch_pred_create.the_batch_anomaly_score_is_finished_in_less_than(self, example[4]) batch_pred_create.i_download_anomaly_score_file(self, example[5]) batch_pred_create.i_check_predictions(self, example[6])
def test_scenario1(self): """ Scenario: Successfully creating a batch prediction: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a batch prediction for the dataset with the model And I wait until the batch prediction is ready less than <time_4> secs And I download the created predictions file to "<local_file>" Then the batch prediction file is like "<predictions_file>" Examples: | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | | ../data/iris.csv | 30 | 30 | 50 | 50 | ./tmp/batch_predictions.csv |./data/batch_predictions.csv | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions.csv']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) batch_pred_create.i_download_predictions_file(self, example[5]) batch_pred_create.i_check_predictions(self, example[6])
def test_scenario1(self): """ Scenario: Successfully creating and reading a public dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I make the dataset public And I wait until the dataset is ready less than <time_3> secs When I get the dataset status using the dataset's public url Then the dataset's status is FINISHED Examples: | data | time_1 | time_2 | time_3 | | ../data/iris.csv | 10 | 10 | 10 | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) dataset_create.make_the_dataset_public(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) dataset_create.build_local_dataset_from_public_url(self) dataset_create.dataset_status_finished(self)
def test_scenario2(self): """ Scenario: Successfully creating a model from a dataset list and predicting with it using median: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local multi model When I create a local multimodel batch prediction using median for <input_data> Then the local prediction is <prediction> Examples: | data | time_1 | time_2 | time_3 | input_data | prediction | ../data/grades.csv | 10 | 10 | 10 | {'Tutorial': 99.47, 'Midterm': 53.12, 'TakeHome': 87.96} | 50 """ print self.test_scenario2.__doc__ examples = [ ["data/grades.csv", "10", "10", "10", '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', 50] ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) world.list_of_models = [world.model] compare_pred.i_create_a_local_multi_model(self) compare_pred.i_create_a_local_mm_median_batch_prediction(self, example[4]) compare_pred.the_local_prediction_is(self, example[5])
def test_scenario1(self): """ Scenario: Successfully exporting a dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I download the dataset file to "<local_file>" Then file "<local_file>" is like file "<data>" Examples: | data | time_1 | time_2 | local_file | | ../data/iris.csv | 30 | 30 | ./tmp/exported_iris.csv | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '30', '30', 'tmp/exported_iris.csv']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) dataset_create.i_export_a_dataset(self, example[3]) dataset_create.files_equal(self, example[3], example[0])
def test_scenario2(self): """ Scenario: Successfully obtaining parsing error counts: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<params>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs When I ask for the error counts in the fields Then the error counts dict is "<error_values>" Examples: | data | time_1 | params | time_2 |error_values | | ../data/iris_missing.csv | 30 | {"fields": {"000000": {"optype": "numeric"}}} |30 |{"000000": 1} | """ print self.test_scenario2.__doc__ examples = [ ['data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}}', '30', '{"000000": 1}']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[2]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) dataset_read.i_get_the_errors_values(self) dataset_read.i_get_the_properties_values( self, 'error counts', example[4])
def test_scenario1(self): """ Scenario 1: Successfully creating an optiml from a dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an optiml from a dataset And I wait until the optiml is ready less than <time_3> secs And I update the optiml name to "<optiml_name>" When I wait until the optiml is ready less than <time_4> secs Then the optiml name is "<optiml_name>" Examples: | data | time_1 | time_2 | time_3 | time_4 | optiml_name | | ../data/iris.csv | 10 | 10 | 2000 | 20 | my new optiml name | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10000', '20', 'my new optiml name']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_an_optiml_with_objective_and_params( \ self, parms='{"max_training_time": %s, "model_types": ' '["model", "logisticregression"]}' % \ (int(float(example[3])/1000) - 1)) model_create.the_optiml_is_finished_in_less_than(self, example[3]) model_create.i_update_optiml_name(self, example[5]) model_create.the_optiml_is_finished_in_less_than(self, example[4]) model_create.i_check_optiml_name(self, example[5])
def test_scenario5(self): """ Scenario: Successfully comparing centroids with summary fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | | ../data/iris.csv | 20 | 20 | 30 | {"summary_fields": ["sepal width"]} |{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"} | Cluster 6 | 0.7310939266123302 | """ print self.test_scenario5.__doc__ examples = [ ['data/iris.csv', '20', '20', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 0', '0.7310939266123302']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario4(self): """ Scenario: Successfully creating a source from a batch prediction: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a batch prediction for the dataset with the model And I wait until the batch prediction is ready less than <time_4> secs Then I create a source from the batch prediction And I wait until the source is ready less than <time_1> secs Examples: | data | time_1 | time_2 | time_3 | time_4 | | ../data/iris.csv | 30 | 30 | 50 | 50 | """ print self.test_scenario4.__doc__ examples = [ ['data/diabetes.csv', '30', '30', '50', '50']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) batch_pred_create.i_create_a_source_from_batch_prediction(self) source_create.the_source_is_finished(self, example[1])
def test_scenario2(self): """ Scenario: Successfully creating a model and exporting it: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I export the <"pmml"> model to file "<expected_file>" Then I check the model is stored in "<expected_file>" file in <"pmml"> Examples: | data | time_1 | time_2 | time_3 | expected_file | pmml | data/iris.csv | 10 | 10 | 10 | tmp/model/iris.json | false | data/iris_sp_chars.csv | 10 | 10 | 10 | tmp/model/iris_sp_chars.pmml | true """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '30', '30', '30', 'tmp/model/iris.json', False], ['data/iris_sp_chars.csv', '30', '30', '30', 'tmp/model/iris_sp_chars.pmml', True]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_export_model(self, example[5], example[4]) model_create.i_check_model_stored(self, example[4], example[5])
def test_scenario1(self): """ Scenario: Successfully creating an statistical test from a dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an statistical test from a dataset And I wait until the statistical test is ready less than <time_3> secs And I update the statistical test name to "<test_name>" When I wait until the statistical test is ready less than <time_4> secs Then the statistical test name is "<correlation_name>" Examples: | data | time_1 | time_2 | time_3 | time_4 | test_name | | ../data/iris.csv | 10 | 10 | 10 | 10 | my new statistical test name | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '10', 'my new statistical test name']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) statistical_tst_create.i_create_a_tst_from_dataset(self) statistical_tst_create.the_tst_is_finished_in_less_than(self, example[3]) statistical_tst_create.i_update_tst_name(self, example[5]) statistical_tst_create.the_tst_is_finished_in_less_than(self, example[4]) statistical_tst_create.i_check_tst_name(self, example[5])
def test_scenario1(self): """ Scenario: Successfully creating a model from a dataset list: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I store the dataset id in a list And I create a dataset And I wait until the dataset is ready less than <time_3> secs And I store the dataset id in a list Then I create a model from a dataset list And I wait until the model is ready less than <time_4> secs And I check the model stems from the original dataset list Examples: | data | time_1 | time_2 | time_3 | time_4 | | ../data/iris.csv | 10 | 10 | 10 | 10 """ print self.test_scenario1.__doc__ examples = [["data/iris.csv", "10", "10", "10", "10"]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) multimodel_create.i_store_dataset_id(self) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) multimodel_create.i_store_dataset_id(self) model_create.i_create_a_model_from_dataset_list(self) model_create.the_model_is_finished_in_less_than(self, example[4]) multimodel_create.i_check_model_datasets_and_datasets_ids(self)
def test_scenario1(self): """ Scenario: Successfully creating a split dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a dataset extracting a <rate> sample And I wait until the dataset is ready less than <time_3> secs When I compare the datasets' instances Then the proportion of instances between datasets is <rate> Examples: | data | time_1 | time_2 | time_3 | rate | | ../data/iris.csv | 10 | 10 | 10 | 0.8 | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '0.8']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) dataset_create.i_create_a_split_dataset(self, example[4]) dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) dataset_create.i_compare_datasets_instances(self) dataset_create.proportion_datasets_instances(self, example[4])
def test_scenario1(self): """ Scenario: Successfully creating a prediction in DEV mode: Given I want to use api in DEV mode When I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And the source has DEV True And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_read.source_has_dev(self, True) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario5(self): """ Scenario: Successfully creating a local prediction from an Ensemble: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an ensemble of <number_of_models> models and <tlp> tlp And I wait until the ensemble is ready less than <time_3> secs And I create a local Ensemble When I create a local ensemble prediction using median with confidence for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | number_of_models | tlp | data_input |prediction | | ../data/grades.csv | 10 | 10 | 50 | 2 | 1 | {} | 67.5 | """ print self.test_scenario5.__doc__ examples = [ ['data/grades.csv', '30', '30', '50', '2', '1', '{}', 69.0934]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) ensemble_create.i_create_an_ensemble(self, example[4], example[5]) ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) ensemble_create.create_local_ensemble(self) prediction_create.create_local_ensemble_prediction_using_median_with_confidence(self, example[6]) compare_pred.the_local_prediction_is(self, example[7])
def test_scenario1(self): """ Scenario: Successfully creating an evaluation: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create an evaluation for the model with the dataset And I wait until the evaluation is ready less than <time_4> secs Then the measured "<measure>" is <value> Examples: | data | time_1 | time_2 | time_3 | time_4 | measure | value | | ../data/iris.csv | 30 | 30 | 30 | 30 | average_phi | 1 | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '50', '50', '50', '50', 'average_phi', '1']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) evaluation_create.i_create_an_evaluation(self) evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4]) evaluation_create.the_measured_measure_is_value(self, example[5], example[6])
def test_scenario2(self): """ Scenario: Successfully creating an evaluation for an ensemble: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an ensemble of <number_of_models> models and <tlp> tlp And I wait until the ensemble is ready less than <time_3> secs When I create an evaluation for the ensemble with the dataset And I wait until the evaluation is ready less than <time_4> secs Then the measured "<measure>" is <value> Examples: | data | time_1 | time_2 | number_of_models | tlp | time_3 | time_4 | measure | value | | ../data/iris.csv | 30 | 30 | 5 | 1 | 50 | 30 | average_phi | 0.98029 | """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.98029']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) ensemble_create.i_create_an_ensemble(self, example[3], example[4]) ensemble_create.the_ensemble_is_finished_in_less_than(self, example[5]) evaluation_create.i_create_an_evaluation_ensemble(self) evaluation_create.the_evaluation_is_finished_in_less_than(self, example[6]) evaluation_create.the_measured_measure_is_value(self, example[7], example[8])
def test_scenario1(self): """ Scenario: Successfully creating a prediction using a public model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I make the model public And I wait until the model is ready less than <time_3> secs And I check the model status using the model's public url When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.make_the_model_public(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.model_from_public_url(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario1(self): """ Scenario: Successfully creating a local prediction from an Ensemble: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an ensemble of <number_of_models> models and <tlp> tlp And I wait until the ensemble is ready less than <time_3> secs And I create a local Ensemble When I create a local ensemble prediction with confidence for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" And the local probabilities are "<probabilities>" Examples: | data | time_1 | time_2 | time_3 | number_of_models | tlp | data_input |prediction | confidence | ../data/iris.csv | 10 | 10 | 50 | 5 | 1 | {"petal width": 0.5} | Iris-versicolor | 0.3687 | [0.3403, 0.4150, 0.2447] """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '50', '5', '1', '{"petal width": 0.5}', 'Iris-versicolor', '0.415', '["0.3403", "0.4150", "0.2447"]' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) ensemble_create.i_create_an_ensemble(self, example[4], example[5]) ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) ensemble_create.create_local_ensemble(self) prediction_create.create_local_ensemble_prediction_with_confidence(self, example[6]) compare_pred.the_local_prediction_is(self, example[7]) compare_pred.the_local_prediction_confidence_is(self, example[8]) compare_pred.the_local_probabilities_are(self, example[9])
def test_scenario5(self): """ Scenario: Successfully comparing association sets: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the association is ready less than <time_3> secs And I create a local association When I create an association set for "<data_input>" Then the association set is like the contents of "<association_set_file>" And I create a local association set for "<data_input>" Then the local association set is like the contents of "<association_set_file>" """ examples = [ ['data/groceries.csv', '20', '20', '30', '{"fields": {"00000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', 'data/associations/association_set.json', '{"field1": "cat food"}']] show_doc(self.test_scenario5, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) association_create.i_create_an_association_from_dataset(self) association_create.the_association_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_association(self) prediction_create.i_create_an_association_set(self, example[6]) prediction_compare.the_association_set_is_like_file(self, example[5]) prediction_compare.i_create_a_local_association_set(self, example[6]) prediction_compare.the_local_association_set_is_like_file(self, example[5])
def test_scenario2(self): """ Scenario: Successfully creating a Fields object and a summary fields file: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a Fields object from the dataset with objective column "<objective_column>" And I export a summary fields file "<summary_file>" Then I check that the file "<summary_file>" is like "<expected_file>" Examples: | data | time_1 | objective_column | summary_file| expected_file | time_2 | ../data/iris.csv | 10 | 0 | fields_summary.csv | data/fields/fields_summary.csv | 10 """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '10', '0', 'fields_summary.csv', 'data/fields/fields_summary.csv', '10']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[5]) fields_steps.create_fields_from_dataset(self, example[2]) fields_steps.generate_summary(self, example[3]) fields_steps.check_summary_like_expected(self, example[3], example[4])
def test_scenario2(self): """ Scenario: Successfully comparing centroids with configuration options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | full_data_input """ examples = [ ['data/iris.csv', '30', '30', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.16436', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}'], ['data/iris.csv', '20', '20', '30', '{"default_numeric_value": "zero"}', '{"petal length": 1}', 'Cluster 4', '1.41215', '{"petal length": 1, "petal width": 0, "sepal length": 0, "sepal width": 0, "species": ""}']] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[8]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario3(self): """ Scenario: Successfully comparing scores from anomaly detectors: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an anomaly detector And I wait until the anomaly detector is ready less than <time_3> secs And I create a local anomaly detector When I create an anomaly score for "<data_input>" Then the anomaly score is "<score>" And I create a local anomaly score for "<data_input>" Then the local anomaly score is "<score>" Examples: | data | time_1 | time_2 | time_3 | data_input | score | """ examples = [ ['data/tiny_kdd.csv', '30', '30', '30', '{"000020": 255.0, "000004": 183.0, "000016": 4.0, "000024": 0.04, "000025": 0.01, "000026": 0.0, "000019": 0.25, "000017": 4.0, "000018": 0.25, "00001e": 0.0, "000005": 8654.0, "000009": "0", "000023": 0.01, "00001f": 123.0}', '0.69802']] show_doc(self.test_scenario3, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) anomaly_create.i_create_an_anomaly(self) anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_anomaly(self) prediction_create.i_create_an_anomaly_score(self, example[4]) prediction_create.the_anomaly_score_is(self, example[5]) prediction_compare.i_create_a_local_anomaly_score(self, example[4]) prediction_compare.the_local_anomaly_score_is(self, example[5])
def test_scenario2(self): """ Scenario 2: Successfully creating Topic Model from a dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create topic model from a dataset And I wait until the topic model is ready less than <time_3> secs And I update the topic model name to "<topic_model_name>" When I wait until the topic_model is ready less than <time_4> secs Then the topic model name is "<topic_model_name>" Examples: | data | time_1 | time_2 | time_3 | time_4 | topic_model_name | params | ../data/spam.csv | 100 | 100 | 200 | 500 | my new topic model name | '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}' """ print self.test_scenario2.__doc__ examples = [ ['data/spam.csv', '100', '100', '10000', '500', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[6]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) topic_create.i_create_a_topic_model(self) topic_create.the_topic_model_is_finished_in_less_than(self, example[3]) topic_create.i_update_topic_model_name(self, example[5]) topic_create.the_topic_model_is_finished_in_less_than(self, example[4]) topic_create.i_check_topic_model_name(self, example[5])
def test_scenario2(self): """ Scenario: Successfully creating a batch prediction for an ensemble: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an ensemble of <number_of_models> models and <tlp> tlp And I wait until the ensemble is ready less than <time_3> secs When I create a batch prediction for the dataset with the ensemble And I wait until the batch prediction is ready less than <time_4> secs And I download the created predictions file to "<local_file>" Then the batch prediction file is like "<predictions_file>" Examples: | data | time_1 | time_2 | number_of_models | tlp | time_3 | time_4 | local_file | predictions_file | | ../data/iris.csv | 30 | 30 | 5 | 1 | 80 | 50 | ./tmp/batch_predictions.csv | ./data/batch_predictions_e.csv | """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '30', '30', '5', '1', '80', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_e.csv']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) ensemble_create.i_create_an_ensemble(self, example[3], example[4]) ensemble_create.the_ensemble_is_finished_in_less_than(self, example[5]) batch_pred_create.i_create_a_batch_prediction_ensemble(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[6]) batch_pred_create.i_download_predictions_file(self, example[7]) batch_pred_create.i_check_predictions(self, example[8])
def test_scenario2(self): """ Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the output is like "<expected_file>" expected file Examples: | data | time_1 | time_2 | time_3 | expected_file | | data/iris_missing2.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_missing2_MISSINGS.txt | """ print self.test_scenario2.__doc__ examples = [["data/iris_missing2.csv", "10", "10", "10", "data/model/if_then_rules_iris_missing2_MISSINGS.txt"]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with_missing_splits(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
def test_scenario2(self): """ Scenario: Successfully creating local association object: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an association from a dataset And I wait until the association is ready less than <time_3> secs And I create a local association When I get the rules for <"item_list"> Then the first rule is "<JSON_rule>" Examples: | data | time_1 | time_2 | time_3 | item_list | JSON_rule | | ../data/tiny_mushrooms.csv | 10 | 20 | 50 | ["Edible"] | {'p_value': 2.08358e-17, 'confidence': 1, 'lift': 1.12613, 'lhs': [14], 'leverage': 0.07885, 'lhs_cover': [0.704, 176], 'rhs_cover': [0.888, 222], 'rhs': [1], 'support': [0.704, 176], 'rule_id': u'000038'} """ print self.test_scenario2.__doc__ examples = [ ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], {'p_value': 5.26971e-31, 'confidence': 1, 'rhs_cover': [0.488, 122], 'leverage': 0.24986, 'rhs': [19], 'rule_id': u'000002', 'lift': 2.04918, 'lhs': [0, 21, 16, 7], 'lhs_cover': [0.488, 122], 'support': [0.488, 122]}]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) association_create.i_create_an_association_from_dataset(self) association_create.the_association_is_finished_in_less_than(self, example[3]) association_create.i_create_a_local_association(self) association_create.i_get_rules_for_item_list(self, example[4]) association_create.the_first_rule_is(self, example[5])
def test_scenario3(self): """ Scenario: Successfully comparing predictions with proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | """ examples = [[ 'data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.2629' ], [ 'data/grades.csv', '10', '10', '10', '{}', '000005', '68.62224', '27.5358' ], [ 'data/grades.csv', '10', '10', '10', '{"Midterm": 20}', '000005', '40.46667', '54.89713' ], [ 'data/grades.csv', '10', '10', '10', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '25.65806' ]] show_doc(self.test_scenario3, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.i_create_a_proportional_local_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_prediction_confidence_is( self, example[7])
def test_scenario11(self): """ Scenario: Successfully comparing remote and local predictions with raw date input for linear regression: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a linear regression And I wait until the linear regression is ready less than <time_3> secs And I create a local linear regression When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: |data|time_1|time_2|time_3|data_input|objective|prediction """ examples = [ ['data/dates2.csv', '20', '20', '25', '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', '000002', -0.01284], ['data/dates2.csv', '20', '20', '25', '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', '000002', -0.09459], ['data/dates2.csv', '20', '20', '25', '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', '000002', -0.02259], ['data/dates2.csv', '20', '20', '25', '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', '000002', -0.06754], ['data/dates2.csv', '20', '20', '25', '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', '000002', 0.05204], ['data/dates2.csv', '20', '20', '25', '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', '000002', 0.05878]] show_doc(self.test_scenario11, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) linear_create.i_create_a_linear_regression(self) linear_create.the_linear_regression_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_linear(self) prediction_create.i_create_a_linear_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_linear_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario2(self): """ Scenario 2: Successfully creating a fusion: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs And I update the fusion name to "<fusion_name>" When I wait until the fusion is ready less than <time_5> secs And I create a prediction for "<data_input>" Then the fusion name is "<fusion_name>" And the prediction for "<objective>" is "<prediction>" And I create an evaluation for the fusion with the dataset And I wait until the evaluation is ready less than <time_4> secs Then the measured "<measure>" is <value> Examples: | data | time_1 | time_2 | time_3 | time_4 | fusion_name | data_input | objective | prediction | ../data/iris.csv | 10 | 10 | 20 | 20 | my new fusion name | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '10', '10', '20', '20', 'my new fusion name', '{"tags":["my_fusion_2_tag"]}', 'my_fusion_2_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", "Iris-setosa", 'average_phi', '1.0']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with(self, example[6]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[6]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[6]) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_models(self, example[7]) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[3]) model_create.i_update_fusion_name(self, example[5]) model_create.the_fusion_is_finished_in_less_than(self, example[4]) model_create.i_check_fusion_name(self, example[5]) prediction_create.i_create_a_fusion_prediction(self, example[8]) prediction_create.the_prediction_is(self, example[9], example[10]) evaluation_create.i_create_an_evaluation_fusion(self) evaluation_create.the_evaluation_is_finished_in_less_than(self, example[3]) evaluation_create.the_measured_measure_is_value(self, example[11], example[12])
def test_scenario6(self): """ Scenario: Successfully comparing predictions for ensembles: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an ensemble with "<params>" And I wait until the ensemble is ready less than <time_3> secs And I create a local ensemble When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'], ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"number_of_models": 5}'], ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'], ['data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 46.261364, '{"number_of_models": 5}'], ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-setosa', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], """ examples = [ [ 'data/iris_unbalanced.csv', '10', '10', '120', '{"petal width": 4}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}' ], [ 'data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 61.61036, '{"boosting": {"iterations": 5}, "number_of_models": 5}' ] ] show_doc(self.test_scenario6, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) ensemble_create.i_create_an_ensemble_with_params(self, example[7]) ensemble_create.the_ensemble_is_finished_in_less_than( self, example[3]) ensemble_create.create_local_ensemble(self) prediction_create.i_create_an_ensemble_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_ensemble_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario5(self): """ Scenario: Successfully comparing predictions in operating kind for models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" in "<operating_kind>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" in "<operating_kind>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | prediction | operating_point """ examples = [[ 'data/iris.csv', '10', '50', '50', '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', "probability", "000004" ], [ 'data/iris.csv', '10', '50', '50', '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', "confidence", "000004" ], [ 'data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "probability", "000004" ], [ 'data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "confidence", "000004" ]] show_doc(self.test_scenario5, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction_op_kind( self, example[4], example[6]) prediction_create.the_prediction_is(self, example[7], example[5]) prediction_compare.i_create_a_local_prediction_op_kind( self, example[4], example[6]) prediction_compare.the_local_prediction_is(self, example[5])
def test_scenario9(self): """ Scenario: Successfully comparing predictions for ensembles with proportional missing strategy in a supervised model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an esemble with "<params>" And I wait until the ensemble is ready less than <time_3> secs And I create a local ensemble When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | params ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'], """ examples = [[ 'data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {} ], [ 'data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', { "operating_kind": "confidence" } ]] show_doc(self.test_scenario7, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) ensemble_create.i_create_an_ensemble_with_params(self, example[8]) ensemble_create.the_ensemble_is_finished_in_less_than( self, example[3]) ensemble_create.create_local_supervised_ensemble(self) prediction_create.i_create_an_ensemble_proportional_prediction( self, example[4], example[9]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_create.create_local_ensemble_proportional_prediction_with_confidence( self, example[4], example[9]) prediction_compare.the_local_ensemble_prediction_is( self, example[6]) prediction_compare.the_local_prediction_confidence_is( self, example[7])
def test_scenario2(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | objective | prediction | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"} | 000000 | spam | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"} | 000000 | spam | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"} | 000000 | spam | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"} | 000000 | ham | | ../data/movies.csv | 20 | 20 | 30 | {"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}} |{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}'| 000009| 3.93064 | ../data/text_missing.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{} | 000003 | paperwork | """ print self.test_scenario2.__doc__ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'], ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'], ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[5]) prediction_create.the_prediction_is(self, example[6], example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[7])
def test_scenario6(self): """ Scenario 6: Successfully creating a fusion: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a logistic regression with "<params>" And I wait until the logistic regression is ready less than <time_3> secs And I create a logistic regression with "<params>" And I wait until the logistic regression is ready less than <time_3> secs And I retrieve a list of remote logistic regression tagged with "<tag>" And I create a fusion from a list of models and weights And I wait until the fusion is ready less than <time_4> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the fusion probability for the prediction is "<probability>" And I create a local fusion prediction for "<data_input>" Then the local fusion prediction is "<prediction>" And the local fusion probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | time_3 | time_4 | data_input | objective | prediction | ../data/iris.csv | 10 | 10 | 20 | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" """ print self.test_scenario6.__doc__ examples = [ ['data/iris.csv', '10', '10', '20', '20', '{"tags":["my_fusion_6_tag"], "missing_numerics": true}', 'my_fusion_6_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", "Iris-setosa", '0.4727', '{"tags":["my_fusion_6_tag"], "missing_numerics": false, "balance_fields": false }', '[1, 2]']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[11]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6]) model_create.i_create_a_fusion_with_weights(self, example[12]) model_create.the_fusion_is_finished_in_less_than(self, example[3]) compare_pred.i_create_a_local_fusion(self) prediction_create.i_create_a_fusion_prediction(self, example[7]) prediction_create.the_prediction_is(self, example[8], example[9]) prediction_create.the_fusion_probability_is(self, example[10]) compare_pred.i_create_a_local_prediction(self, example[7]) compare_pred.the_local_prediction_is(self, example[9]) compare_pred.the_local_probability_is(self, example[10])
def test_scenario1(self): """ Scenario: Successfully comparing predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 6, "petal width": 2} | 000004 | Iris-virginica | | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 4, "petal width": 1.5}| 000004 | Iris-versicolor | | ../data/iris_sp_chars.csv | 10 | 10 | 10 | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004 | Iris-versicolor | """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa' ], [ 'data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica' ], [ 'data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor' ], [ 'data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario2(self): """ Scenario: Successfully comparing forecasts from time series with "A" seasonality Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a time series with "<params>" And I wait until the time series is ready less than <time_3> secs And I create a local time series When I create a forecast for "<input_data>" Then the forecast is "<forecasts>" And I create a local forecast for "<data_input>" Then the local forecast is "<forecasts>" Examples: | data | time_1 | time_2 | time_3 | input_data | forecasts | params ['data/grades.csv', '10', '10', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["A,Ad,A"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast":[66.16225, 72.17308, 66.65573, 73.09698, 70.51449], "model": "A,Ad,A"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'] """ examples = [ [ 'data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5}}', '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, 74.1996, 74.27899], "model": "M,M,N"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}' ], [ 'data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,A"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [67.43222, 68.24468, 64.14437, 67.5662, 67.79028], "model": "M,N,A"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}' ], [ 'data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,A"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [74.73553, 71.6163, 71.90264, 76.4249, 75.06982], "model": "A,A,A"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}' ] ] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) time_series_create.i_create_a_time_series_with_params( self, example[6]) time_series_create.the_time_series_is_finished_in_less_than( self, example[3]) time_series_create.create_local_time_series(self) forecast_create.i_create_a_forecast(self, example[4]) forecast_create.the_forecast_is(self, example[5]) forecast_compare.i_create_a_local_forecast(self, example[4]) forecast_compare.the_local_forecast_is(self, example[5])
def test_scenario10(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability | ../data/spam.csv | 20 | 20 | 000002 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "A normal message"} | ham | 0.9169 """ print self.test_scenario10.__doc__ examples = [ [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "A normal message"}', 'ham', 0.9169, "000000" ], [ 'data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', '25-34', '0.41686', '000002' ] ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_logistic_model_with_objective( self, example[8]) model_create.the_logistic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario10(self): """ Scenario: Successfully comparing predictions with proportional missing strategy and balanced models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a balanced model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" And I create local probabilities for "<data_input>" Then the local probabilities are "<probabilities>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | """ examples = [ [ 'data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]' ], [ 'data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]' ] ] show_doc(self.test_scenario10, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_balanced_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_proportional_local_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.the_local_prediction_confidence_is( self, example[7]) prediction_compare.i_create_local_probabilities(self, example[4]) prediction_compare.the_local_probabilities_are(self, example[8])
def test_scenario1(self): """ Scenario: Successfully comparing predictions for deepnets: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a deepnet with objective "<objective>" and "<params>" And I wait until the deepnet is ready less than <time_3> secs And I create a local deepnet When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params, """ examples = [[ 'data/iris.csv', '10', '50', '30000', '{"petal width": 4}', '000004', 'Iris-virginica', '{}' ], [ 'data/iris.csv', '10', '50', '30000', '{"sepal length": 4.1, "sepal width": 2.4}', '000004', 'Iris-setosa', '{}' ], [ 'data/iris_missing2.csv', '10', '50', '30000', '{}', '000004', 'Iris-setosa', '{}' ], [ 'data/spam.csv', '10', '50', '30000', '{}', '000000', 'ham', '{}' ]] show_doc(self.test_scenario1, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_deepnet_with_objective_and_params( self, example[5], example[7]) model_create.the_deepnet_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_deepnet(self) prediction_create.i_create_a_deepnet_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_deepnet_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario13(self): """ Scenario: Successfully comparing predictions for fusions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs And I create a local fusion When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | params| tag | data_input | objective | prediction | params """ examples = [[ 'data/grades.csv', '30', '30', '120', '120', 'my_fusion_tag_lreg', '{"000000": 10, "000001": 10, "000002": 10, "000003": 10, "000004": 10}', '000005', 21.01712 ]] show_doc(self.test_scenario13, examples) for example in examples: print "\nTesting with:\n", example tag = "%s_%s" % (example[5], PY3) tag_args = '{"tags":["%s"]}' % tag source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) linear_create.i_create_a_linear_regression_with_params( self, tag_args) linear_create.the_linear_regression_is_finished_in_less_than( self, example[3]) prediction_compare.i_retrieve_a_list_of_remote_linear_regressions( self, tag) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[4]) prediction_compare.i_create_a_local_fusion(self) prediction_create.i_create_a_fusion_prediction(self, example[6]) prediction_create.the_prediction_is(self, example[7], example[8]) prediction_compare.i_create_a_local_prediction(self, example[6]) prediction_compare.the_local_prediction_is(self, example[8])
def test_scenario1(self): """ Scenario: Successfully creating a prediction from an ensemble: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an ensemble of <number_of_models> models and <tlp> tlp And I wait until the ensemble is ready less than <time_3> secs When I create an ensemble prediction for "<data_input>" And I wait until the prediction is ready less than <time_4> secs Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | time_4 | number_of_models | tlp | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 50 | 20 | 5 | 1 | {"petal width": 0.5} | 000004 | Iris-versicolor | | ../data/iris_sp_chars.csv | 10 | 10 | 50 | 20 | 5 | 1 | {"pétal&width\u0000": 0.5} | 000004 | Iris-versicolor | | ../data/grades.csv | 10 | 10 | 150 | 20 | 10 | 1 | {"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93} | 000005 | 88.205575 | | ../data/grades.csv | 10 | 10 | 150 | 20 | 10 | 1 | {"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89} | 000005 | 84.29401 | """ print self.test_scenario1.__doc__ examples = [ [ 'data/iris.csv', '30', '30', '50', '20', '5', '1', '{"petal width": 0.5}', '000004', 'Iris-versicolor' ], [ 'data/iris_sp_chars.csv', '30', '30', '50', '20', '5', '1', '{"pétal&width\u0000": 0.5}', '000004', 'Iris-versicolor' ], [ 'data/grades.csv', '30', '30', '150', '20', '10', '1', '{"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93}', '000005', '84.556' ], [ 'data/grades.csv', '30', '30', '150', '20', '10', '1', '{"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89}', '000005', '73.13558' ] ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) ensemble_create.i_create_an_ensemble(self, example[5], example[6]) ensemble_create.the_ensemble_is_finished_in_less_than( self, example[3]) prediction_create.i_create_an_ensemble_prediction(self, example[7]) prediction_create.the_prediction_is_finished_in_less_than( self, example[4]) prediction_create.the_prediction_is(self, example[8], example[9])
def test_scenario1b(self): """ Scenario: Successfully comparing remote and local predictions with raw date input for anomaly detectors Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an anomaly detector And I wait until the anomaly detector is ready less than <time_3> secs And I create a local anomaly detector When I create an anomaly score for "<data_input>" Then the anomaly score is "<score>" And I create a local anomaly score for "<data_input>" Then the local anomaly score is "<score>" Examples: |data|time_1|time_2|time_3|data_input|score| """ examples = [ [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', 0.54343 ], [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', 0.5202 ], [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1969-7-14 17:36","cat-0":"cat2","target-2":0.9}', 0.93639 ] ] show_doc(self.test_scenario1b, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) anomaly_create.i_create_an_anomaly(self) anomaly_create.the_anomaly_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_anomaly(self) prediction_create.i_create_an_anomaly_score(self, example[4]) prediction_create.the_anomaly_score_is(self, example[5]) prediction_compare.i_create_a_local_anomaly_score(self, example[4]) prediction_compare.the_local_anomaly_score_is(self, example[5])
def test_scenario9(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | prediction | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"} | ham | """ print self.test_scenario9.__doc__ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', 'ham']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model(self) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario3(self): """ Scenario 3: Successfully creating a fusion from a dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs When I create a batch prediction for the dataset with the fusion And I wait until the batch prediction is ready less than <time_4> secs And I download the created predictions file to "<local_file>" Then the batch prediction file is like "<predictions_file>" Examples: | data | time_1 | time_2 | time_3 | time_4 | tag | local_file | predictions_file | | ../data/iris.csv | 10 | 10 | 20 | 20 | mytag | ./tmp/batch_predictions.csv | ./data/batch_predictions_fs.csv | """ print self.test_scenario3.__doc__ examples = [[ 'data/iris.csv', '10', '10', '20', '20', '{"tags":["my_fusion_3_tag"]}', 'my_fusion_3_tag', 'tmp/batch_predictions.csv', 'data/batch_predictions_fs.csv' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_models(self, example[6]) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction_fusion(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than( self, example[4]) batch_pred_create.i_download_predictions_file(self, example[7]) batch_pred_create.i_check_predictions(self, example[8])
def test_scenario6(self): """ Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with missing splits And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | | ../data/iris_missing2.csv | 10 | 10 | 10 | {"petal width": 1} | 000004 | Iris-setosa | 0.8064 | | ../data/iris_missing2.csv | 10 | 10 | 10 | {"petal width": 1, "petal length": 4} | 000004 | Iris-versicolor | 0.7847 | """ print self.test_scenario6.__doc__ examples = [[ 'data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1}', '000004', 'Iris-setosa', '0.8064' ], [ 'data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1, "petal length": 4}', '000004', 'Iris-versicolor', '0.7847' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with_missing_splits(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.i_create_a_proportional_local_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_prediction_confidence_is( self, example[7])
def test_scenario4(self): """ Scenario: Successfully comparing topic distributions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a topic model And I wait until the topic model is ready less than <time_3> secs And I create a local topic model When I create a topic distribution for "<data_input>" Then the topic distribution is "<topic_distribution>" And I create a local topic distribution for "<data_input>" Then the local topic distribution is "<topic_distribution>" Examples headers: | data | time_1 | time_2 | time_3 | options | data_input | topic distribution | """ examples = [ [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', '[0.01878, 0.00388, 0.00388, 0.00388, 0.20313, 0.47315, 0.00574, 0.05695, 0.00388, 0.19382, 0.00388, 0.02902]' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."}', '[0.00263, 0.01083, 0.00831, 0.06004, 0.33701, 0.00263, 0.01209, 0.44553, 0.0531, 0.00326, 0.06193, 0.00263]' ] ] show_doc(self.test_scenario4, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) topic_create.i_create_a_topic_model(self) topic_create.the_topic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_topic_model(self) topic_create.i_create_a_local_topic_distribution(self, example[5]) prediction_compare.the_local_topic_distribution_is( self, example[6]) topic_create.i_create_a_topic_distribution(self, example[5]) prediction_compare.the_topic_distribution_is(self, example[6])
def test_scenario1(self): """ Scenario: Successfully creating a batch prediction from a multi model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a local multi model When I create a batch prediction for "<data_input>" and save it in "<path>" And I combine the votes in "<path>" Then the plurality combined predictions are "<predictions>" And the confidence weighted predictions are "<predictions>" Examples: | data | time_1 | time_2 | time_3 | params | tag | data_input | path | predictions | | ../data/iris.csv | 10 | 10 | 10 | {"tags":["mytag"]} | mytag | [{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}] | ./tmp | ["Iris-setosa", "Iris-virginica", "Iris-versicolor"] | """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}]', './tmp', '["Iris-setosa", "Iris-virginica", "Iris-versicolor"]' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_models(self, example[5]) compare_pred.i_create_a_local_multi_model(self) compare_pred.i_create_a_batch_prediction(self, example[6], example[7]) compare_pred.i_combine_the_votes(self, example[7]) compare_pred.the_plurality_combined_prediction(self, example[8]) compare_pred.the_confidence_weighted_prediction(self, example[8])
def test_scenario3(self): """ Scenario: Successfully comparing predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | """ examples = [ [ 'data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}', '30', '{"sepal length": "foo", "petal length": 3}', '000004', 'Iris-versicolor' ], [ 'data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}', '30', '{"sepal length": "foo", "petal length": 5, "petal width": 1.5}', '000004', 'Iris-virginica' ] ] show_doc(self.test_scenario3, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[2]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[3]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario6(self): """ Scenario: Successfully comparing projections for PCAs: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a PCA with "<params>" And I wait until the PCA is ready less than <time_3> secs And I create a local PCA When I create a projection for "<input_data>" Then the projection is "<projection>" And I create a local projection for "<data_input>" Then the local projection is "<projection>" Examples: | data | time_1 | time_2 | time_3 | input_data | projection | params """ examples = [ [ 'data/spam_tiny.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all"}}}}', '{"Message": "early"}', '{}', '{"PC40": 0.00416, "PC38": 0.08267, "PC39": 0.00033, "PC18": 0.28094, "PC19": -0.15056, "PC14": 0.20643, "PC15": 0.23931, "PC16": 0.03251, "PC17": 0.02776, "PC10": 0.1424, "PC11": 0.4059, "PC12": -0.1238, "PC13": 0.15131, "PC43": 0.29617, "PC42": 1.0091, "PC41": 0, "PC25": 0.07164, "PC24": -0.29904, "PC27": -0.1331, "PC26": -0.18572, "PC21": 0.25616, "PC20": 0.30424, "PC23": -0.45775, "PC22": -0.3362, "PC47": -0.13757, "PC49": 0.01864, "PC48": 0.04742, "PC29": -0.16286, "PC28": 0.42207, "PC32": -0.05917, "PC46": -0.05018, "PC31": -0.13973, "PC45": -0.05015, "PC36": 0.03017, "PC44": 0, "PC37": -0.06093, "PC34": 0.25821, "PC35": -0.22194, "PC33": -0.23398, "PC8": 0.01159, "PC9": -0.16042, "PC2": -0.09202, "PC3": 0.14371, "PC1": 0.65114, "PC6": -0.43034, "PC7": -0.02563, "PC4": -0.04947, "PC5": -0.07796, "PC50": -0.00769, "PC30": 0.07813}' ], [ 'data/spam_tiny.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all"}}}}', '{"Message": "mobile call"}', '{}', '{"PC40": 0.31818, "PC38": 0.06912, "PC39": -0.14342, "PC18": 0.22382, "PC19": 0.18518, "PC14": 0.89231, "PC15": 0.05046, "PC16": -0.00241, "PC17": 0.54501, "PC10": -0.26463, "PC11": 0.30251, "PC12": 1.16327, "PC13": 0.16973, "PC43": 0.11952, "PC42": 1.05499, "PC41": 0.51263, "PC25": 0.02467, "PC24": -0.65128, "PC27": 0.48916, "PC26": -0.45228, "PC21": -0.44167, "PC20": 0.76896, "PC23": 0.29398, "PC22": 0.06425, "PC47": 0.70416, "PC49": -0.30313, "PC48": 0.12976, "PC29": -0.34, "PC28": 0.17406, "PC32": -0.06411, "PC46": 0.69257, "PC31": 0.07523, "PC45": -0.03461, "PC36": 0.29732, "PC44": 0.14516, "PC37": -0.19109, "PC34": 0.58399, "PC35": 0.37608, "PC33": -0.00378, "PC8": -0.88156, "PC9": 0.38233, "PC2": -0.56685, "PC3": 0.56321, "PC1": 0.49171, "PC6": -0.09854, "PC7": -1.24639, "PC4": 1.50134, "PC5": -0.03161, "PC50": 0.17349, "PC30": -1.29612}' ] ] show_doc(self.test_scenario6, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) pca_create.i_create_a_pca_with_params(self, example[6]) pca_create.the_pca_is_finished_in_less_than(self, example[3]) projection_create.i_create_a_projection(self, example[5]) projection_create.the_projection_is(self, example[7]) compare_predictions.create_local_pca(self) compare_predictions.i_create_a_local_projection(self, example[5]) compare_predictions.the_local_projection_is(self, example[7])
def test_scenario1(self): """ Scenario: Successfully comparing remote and local predictions with raw date input for anomaly detectors Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create an anomaly detector And I wait until the anomaly detector is ready less than <time_3> secs And I create a local anomaly detector When I create an anomaly score for "<data_input>" Then the anomaly score is "<score>" And I create a local anomaly score for "<data_input>" Then the local anomaly score is "<score>" Examples: |data|time_1|time_2|time_3|data_input|score| """ examples = [ [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', 0.52477 ], [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', 0.50654 ] ] show_doc(self.test_scenario1, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) anomaly_create.i_create_an_anomaly(self) anomaly_create.the_anomaly_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_anomaly(self) prediction_create.i_create_an_anomaly_score(self, example[4]) prediction_create.the_anomaly_score_is(self, example[5]) prediction_compare.i_create_a_local_anomaly_score(self, example[4]) prediction_compare.the_local_anomaly_score_is(self, example[5])
def test_scenario8(self): """ Scenario: Successfully comparing predictions with text options and proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: """ examples = [ [ 'data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap' ], [ 'data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"category1": "a"}', "000003", 'paperwork' ] ] show_doc(self.test_scenario8, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[5]) prediction_create.the_prediction_is(self, example[6], example[7]) prediction_compare.i_create_a_proportional_local_prediction( self, example[5]) prediction_compare.the_local_prediction_is(self, example[7])
def test_scenario10(self): """ Scenario 10: Successfully creating a local fusion from an exported file: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_3> secs And I export the fusion to "<exported_file>" When I create a local fusion from the file "<exported_file>" Then the fusion ID and the local fusion ID match Examples: | data | time_1 | time_2 | time_3 | exported_file | params | tag | ../data/iris.csv | 10 | 10 | 50 | ./tmp/fusion.json """ print self.test_scenario10.__doc__ examples = [[ 'data/iris.csv', '10', '10', '50', './tmp/fusion.json', 'my_fusion_tag' ]] for example in examples: print "\nTesting with:\n", example tag = "%s_%s" % (example[5], PY3) tag_args = '{"tags":["%s"]}' % tag source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with(self, tag_args) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, tag_args) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, tag_args) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_retrieve_a_list_of_remote_models(self, tag) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[3]) model_create.i_export_fusion(self, example[4]) model_create.i_create_local_fusion_from_file(self, example[4]) model_create.check_fusion_id_local_id(self)
def test_scenario2(self): """ Scenario: Successfully creating a prediction from linear regression: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a pca And I wait until the linear regression is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input |objective | prediction | """ print self.test_scenario2.__doc__ examples = [ [ 'data/grades.csv', '30', '30', '30', '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', "000005", '2.27312', '{}' ], [ 'data/grades.csv', '30', '30', '30', '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', "000005", '8.19619', '{"bias": false}' ], [ 'data/dates.csv', '30', '30', '30', '{"test-num1": 23, "test-num2" : 54, "test-date.day-of-month":2, "test-date.month":12, "test-date.day-of-week": 2, "test-date.year": 2012}', "000003", '48.27679', '{"bias": false}' ] ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) linear_create.i_create_a_linear_regression_with_objective_and_params( self, example[5], example[7]) linear_create.the_linear_regression_is_finished_in_less_than( self, example[3]) prediction_create.i_create_a_linear_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) print "\nEnd of tests in: %s\n-------------------\n" % __name__
def test_scenario9(self): """ Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression with objective "<objective>" And I wait until the logistic regression is ready less than <time_3> secs And I create a local supervised model When I create a prediction with operating kind "<operating_kind>" for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction with operating point "<operating_kind>" for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params | operating_point, """ examples = [[ 'data/iris.csv', '10', '50', '30000', '{"petal length": 5}', '000004', 'Iris-versicolor', '{}', "probability" ], [ 'data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability" ]] show_doc(self.test_scenario9, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_logistic_model(self) model_create.the_logistic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_supervised_model( self, model_type="logistic_regression") prediction_create.i_create_a_logistic_prediction_with_op_kind( self, example[4], example[8]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_logistic_prediction_op_kind( self, example[4], example[8]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario1(self): """ Scenario: Successfully comparing centroids with or without text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples headers: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | """ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 7', '0.36637'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile calls"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 1', '0.36637'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "Ok"}', 'Cluster 0', '0.478833312167'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 1', '0.5'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3', '0.5033378686559257'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": true}', 'Cluster 3', '0.5033378686559257'], ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.8752380218327035'], ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "age_range": "18-24", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', 'Cluster 1', '0.7294650227133437']] show_doc(self.test_scenario1, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario2(self): """ Scenario: Successfully comparing centroids with configuration options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | full_data_input """ examples = [[ 'data/iris.csv', '20', '20', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.16436', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}' ], [ 'data/iris.csv', '20', '20', '30', '{"default_numeric_value": "zero"}', '{"petal length": 1}', 'Cluster 4', '1.41215', '{"petal length": 1, "petal width": 0, "sepal length": 0, "sepal width": 0, "species": ""}' ]] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[8]) prediction_create.the_centroid_is_with_distance( self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])