def test_scenario4(self): """ Scenario: Successfully creating a source from a batch prediction: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a batch prediction for the dataset with the model And I wait until the batch prediction is ready less than <time_4> secs Then I create a source from the batch prediction And I wait until the source is ready less than <time_1> secs Examples: | data | time_1 | time_2 | time_3 | time_4 | | ../data/iris.csv | 30 | 30 | 50 | 50 | """ print self.test_scenario4.__doc__ examples = [['data/diabetes.csv', '30', '30', '50', '50']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than( self, example[4]) batch_pred_create.i_create_a_source_from_batch_prediction(self) source_create.the_source_is_finished(self, example[1])
def test_scenario1(self): """ Scenario: Successfully creating a prediction: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | | ../data/iris_sp_chars.csv | 10 | 10 | 10 | {"pétal&width\u0000": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario1.__doc__ examples = [ ["data/iris.csv", "10", "10", "10", '{"petal width": 0.5}', "000004", "Iris-setosa"], ["data/iris_sp_chars.csv", "10", "10", "10", '{"pétal&width\u0000": 0.5}', "000004", "Iris-setosa"], ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario1(self): """ Scenario: Successfully creating a prediction in DEV mode: Given I want to use api in DEV mode When I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And the source has DEV True And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_read.source_has_dev(self, True) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario2(self): """ Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the output is like "<expected_file>" expected file Examples: | data | time_1 | time_2 | time_3 | expected_file | | data/iris_missing2.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_missing2_MISSINGS.txt | """ print self.test_scenario2.__doc__ examples = [["data/iris_missing2.csv", "10", "10", "10", "data/model/if_then_rules_iris_missing2_MISSINGS.txt"]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with_missing_splits(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
def test_scenario1(self): """ Scenario: Successfully creating an evaluation: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create an evaluation for the model with the dataset And I wait until the evaluation is ready less than <time_4> secs Then the measured "<measure>" is <value> Examples: | data | time_1 | time_2 | time_3 | time_4 | measure | value | | ../data/iris.csv | 30 | 30 | 30 | 30 | average_phi | 1 | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '50', '50', '50', '50', 'average_phi', '1']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) evaluation_create.i_create_an_evaluation(self) evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4]) evaluation_create.the_measured_measure_is_value(self, example[5], example[6])
def test_scenario1(self): """ Scenario: Successfully creating a model from a dataset list: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I store the dataset id in a list And I create a dataset And I wait until the dataset is ready less than <time_3> secs And I store the dataset id in a list Then I create a model from a dataset list And I wait until the model is ready less than <time_4> secs And I check the model stems from the original dataset list Examples: | data | time_1 | time_2 | time_3 | time_4 | | ../data/iris.csv | 10 | 10 | 10 | 10 """ print self.test_scenario1.__doc__ examples = [["data/iris.csv", "10", "10", "10", "10"]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) multimodel_create.i_store_dataset_id(self) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) multimodel_create.i_store_dataset_id(self) model_create.i_create_a_model_from_dataset_list(self) model_create.the_model_is_finished_in_less_than(self, example[4]) multimodel_create.i_check_model_datasets_and_datasets_ids(self)
def test_scenario2(self): """ Scenario: Successfully creating models for first centroid of a cluster: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs When I create a model associated to centroid "<centroid_id>" And I wait until the model is ready less than <time_4> secs Then the model is associated to the centroid "<centroid_id>" of the cluster Examples: | data | time_1 | time_2 | time_3 | centroid_id | time_4 | | ../data/iris.csv | 10 | 10 | 40 | 000001 | 10 | """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '10', '10', '40', '000001', '10', '{"model_clusters": true}']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[6]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_from_cluster(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[5]) model_create.is_associated_to_centroid_id(self, example[4])
def test_scenario1(self): """ Scenario: Successfully changing duplicated field names: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset with "<options>" And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model Then "<field_id>" field's name is changed to "<new_name>" Examples: | data | time_1 | time_2 | time_3 | options | field_id | new_name | ../data/iris.csv | 20 | 20 | 30 | {"fields": {"000001": {"name": "species"}}} | 000001 | species1 | ../data/iris.csv | 20 | 20 | 30 | {"fields": {"000001": {"name": "petal width"}}} | 000001 | petal width1 """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "species"}}}', '000001', 'species1'], ['data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "petal width"}}}', '000003', 'petal width3']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset_with(self, example[4]) dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_preds.i_create_a_local_model(self) model_create.field_name_to_new_name(self, example[5], example[6])
def test_scenario2(self): """ Scenario: Successfully creating a model from a dataset list and predicting with it using median: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local multi model When I create a local multimodel batch prediction using median for <input_data> Then the local prediction is <prediction> Examples: | data | time_1 | time_2 | time_3 | input_data | prediction | ../data/grades.csv | 10 | 10 | 10 | {'Tutorial': 99.47, 'Midterm': 53.12, 'TakeHome': 87.96} | 50 """ print self.test_scenario2.__doc__ examples = [ ["data/grades.csv", "10", "10", "10", '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', 50] ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) world.list_of_models = [world.model] compare_pred.i_create_a_local_multi_model(self) compare_pred.i_create_a_local_mm_median_batch_prediction(self, example[4]) compare_pred.the_local_prediction_is(self, example[5])
def test_scenario1(self): """ Scenario 1: Successfully creating a local model from an exported file: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I export the "<pmml>" model to "<exported_file>" When I create a local model from the file "<exported_file>" Then the model ID and the local model ID match Examples: | data | time_1 | time_2 | time_3 | pmml | exported_file | ../data/iris.csv | 10 | 10 | 10 | False | ./tmp/model.json """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', False, './tmp/model.json']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_export_model(self, example[4], example[5]) model_create.i_create_local_model_from_file(self, example[5]) model_create.check_model_id_local_id(self)
def test_scenario2(self): """ Scenario: Successfully creating a model and exporting it: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I export the <"pmml"> model to file "<expected_file>" Then I check the model is stored in "<expected_file>" file in <"pmml"> Examples: | data | time_1 | time_2 | time_3 | expected_file | pmml | data/iris.csv | 10 | 10 | 10 | tmp/model/iris.json | false | data/iris_sp_chars.csv | 10 | 10 | 10 | tmp/model/iris_sp_chars.pmml | true """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '30', '30', '30', 'tmp/model/iris.json', False], ['data/iris_sp_chars.csv', '30', '30', '30', 'tmp/model/iris_sp_chars.pmml', True]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_export_model(self, example[5], example[4]) model_create.i_check_model_stored(self, example[4], example[5])
def test_scenario2(self): """ Scenario: Successfully creating a prediction from a source in a remote location Given I create a data source using the url "<url>" And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | url | time_1 | time_2 | time_3 | data_input | objective | prediction | | s3://bigml-public/csv/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario2.__doc__ examples = [[ 's3://bigml-public/csv/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa' ]] for example in examples: print "\nTesting with:\n", example source_create.i_create_using_url(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario2(self): """ Scenario: Successfully creating a prediction from a source in a remote location Given I create a data source using the url "<url>" And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | url | time_1 | time_2 | time_3 | data_input | objective | prediction | | s3://bigml-public/csv/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario2.__doc__ examples = [ ['s3://bigml-public/csv/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_create_using_url(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario1(self): """ Scenario: Successfully creating a prediction using a public model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I make the model public And I wait until the model is ready less than <time_3> secs And I check the model status using the model's public url When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.make_the_model_public(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.model_from_public_url(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6])
def test_scenario1(self): """ Scenario: Successfully creating a prediction with a user's project connection: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And the source is in the project And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file_with_project_conn(self, example[0]) source_create.the_source_is_finished(self, example[1]) assert world.source['project'] == world.project_id dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) assert world.dataset['project'] == world.project_id model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) assert world.model['project'] == world.project_id prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) assert world.prediction['project'] == world.project_id
def test_scenario4(self): """ Scenario: Successfully creating a source from a batch prediction: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a batch prediction for the dataset with the model And I wait until the batch prediction is ready less than <time_4> secs Then I create a source from the batch prediction And I wait until the source is ready less than <time_1> secs Examples: | data | time_1 | time_2 | time_3 | time_4 | | ../data/iris.csv | 30 | 30 | 50 | 50 | """ print self.test_scenario4.__doc__ examples = [ ['data/diabetes.csv', '30', '30', '50', '50']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) batch_pred_create.i_create_a_source_from_batch_prediction(self) source_create.the_source_is_finished(self, example[1])
def test_scenario1(self): """ Scenario: Successfully creating a batch prediction: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create a batch prediction for the dataset with the model And I wait until the batch prediction is ready less than <time_4> secs And I download the created predictions file to "<local_file>" Then the batch prediction file is like "<predictions_file>" Examples: | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | | ../data/iris.csv | 30 | 30 | 50 | 50 | ./tmp/batch_predictions.csv |./data/batch_predictions.csv | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions.csv']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) batch_pred_create.i_download_predictions_file(self, example[5]) batch_pred_create.i_check_predictions(self, example[6])
def test_scenario2(self): """ Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the output is like "<expected_file>" expected file Examples: | data | time_1 | time_2 | time_3 | expected_file | | data/iris_missing2.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_missing2_MISSINGS.txt | """ print self.test_scenario2.__doc__ examples = [ ['data/iris_missing2.csv', '10', '10', '10', 'data/model/if_then_rules_iris_missing2_MISSINGS.txt']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with_missing_splits(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
def test_scenario1(self): """ Scenario: Successfully creating an evaluation: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs When I create an evaluation for the model with the dataset And I wait until the evaluation is ready less than <time_4> secs Then the measured "<measure>" is <value> Examples: | data | time_1 | time_2 | time_3 | time_4 | measure | value | | ../data/iris.csv | 30 | 30 | 30 | 30 | average_phi | 1 | """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', '50', '50', '50', '50', 'average_phi', '1' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) evaluation_create.i_create_an_evaluation(self) evaluation_create.the_evaluation_is_finished_in_less_than( self, example[4]) evaluation_create.the_measured_measure_is_value( self, example[5], example[6])
def test_scenario3(self): """ Scenario: Successfully comparing predictions with proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | | ../data/iris.csv | 10 | 10 | 10 | {} | 000004 | Iris-setosa | 0.2629 | | ../data/grades.csv | 10 | 10 | 10 | {} | 000005 | 68.62224 | 27.5358 | | ../data/grades.csv | 10 | 10 | 10 | {"Midterm": 20} | 000005 | 46.69889 | 37.27594297134128 | | ../data/grades.csv | 10 | 10 | 10 | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100} | 000005 | 28.06 | 24.86634 | """ print self.test_scenario3.__doc__ examples = [[ 'data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.2629' ], [ 'data/grades.csv', '10', '10', '10', '{}', '000005', '68.62224', '27.5358' ], [ 'data/grades.csv', '10', '10', '10', '{"Midterm": 20}', '000005', '46.69889', '37.27594297134128' ], [ 'data/grades.csv', '10', '10', '10', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '24.86634' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_proportional_local_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario5(self): """ Scenario: Successfully comparing predictions in operating kind for models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" in "<operating_kind>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" in "<operating_kind>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | prediction | operating_point """ examples = [[ 'data/iris.csv', '10', '50', '50', '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', "probability", "000004" ], [ 'data/iris.csv', '10', '50', '50', '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', "confidence", "000004" ], [ 'data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "probability", "000004" ], [ 'data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "confidence", "000004" ]] show_doc(self.test_scenario5, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction_op_kind( self, example[4], example[6]) prediction_create.the_prediction_is(self, example[7], example[5]) prediction_compare.i_create_a_local_prediction_op_kind( self, example[4], example[6]) prediction_compare.the_local_prediction_is(self, example[5])
def test_scenario2(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | objective | prediction | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"} | 000000 | spam | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"} | 000000 | spam | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"} | 000000 | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"} | 000000 | spam | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"} | 000000 | ham | | ../data/movies.csv | 20 | 20 | 30 | {"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}} |{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}'| 000009| 3.93064 | ../data/text_missing.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{} | 000003 | paperwork | """ print self.test_scenario2.__doc__ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'], ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'], ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[5]) prediction_create.the_prediction_is(self, example[6], example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[7])
def test_scenario1(self): """ Scenario: Successfully comparing predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 6, "petal width": 2} | 000004 | Iris-virginica | | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 4, "petal width": 1.5}| 000004 | Iris-versicolor | | ../data/iris_sp_chars.csv | 10 | 10 | 10 | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004 | Iris-versicolor | """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa' ], [ 'data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica' ], [ 'data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor' ], [ 'data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario10(self): """ Scenario: Successfully comparing predictions with proportional missing strategy and balanced models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a balanced model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" And I create local probabilities for "<data_input>" Then the local probabilities are "<probabilities>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | """ examples = [ [ 'data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]' ], [ 'data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]' ] ] show_doc(self.test_scenario10, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_balanced_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_proportional_local_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.the_local_prediction_confidence_is( self, example[7]) prediction_compare.i_create_local_probabilities(self, example[4]) prediction_compare.the_local_probabilities_are(self, example[8])
def test_scenario3(self): """ Scenario: Successfully comparing predictions with proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | | ../data/iris.csv | 10 | 10 | 10 | {} | 000004 | Iris-setosa | 0.2629 | | ../data/grades.csv | 10 | 10 | 10 | {} | 000005 | 68.62224 | 27.5358 | | ../data/grades.csv | 10 | 10 | 10 | {"Midterm": 20} | 000005 | 46.69889 | 37.27594297134128 | | ../data/grades.csv | 10 | 10 | 10 | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100} | 000005 | 28.06 | 24.86634 | """ print self.test_scenario3.__doc__ examples = [ ["data/iris.csv", "10", "10", "10", "{}", "000004", "Iris-setosa", "0.2629"], ["data/grades.csv", "10", "10", "10", "{}", "000005", "68.62224", "27.5358"], ["data/grades.csv", "10", "10", "10", '{"Midterm": 20}', "000005", "46.69889", "37.27594297134128"], [ "data/grades.csv", "10", "10", "10", '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', "000005", "28.06", "24.86634", ], ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario6(self): """ Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with missing splits And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | | ../data/iris_missing2.csv | 10 | 10 | 10 | {"petal width": 1} | 000004 | Iris-setosa | 0.8064 | | ../data/iris_missing2.csv | 10 | 10 | 10 | {"petal width": 1, "petal length": 4} | 000004 | Iris-versicolor | 0.7847 | """ print self.test_scenario6.__doc__ examples = [[ 'data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1}', '000004', 'Iris-setosa', '0.8064' ], [ 'data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1, "petal length": 4}', '000004', 'Iris-versicolor', '0.7847' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with_missing_splits(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.i_create_a_proportional_local_prediction( self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_prediction_confidence_is( self, example[7])
def test_scenario6(self): """ Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with missing splits And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | | ../data/iris_missing2.csv | 10 | 10 | 10 | {"petal width": 1} | 000004 | Iris-setosa | 0.8064 | | ../data/iris_missing2.csv | 10 | 10 | 10 | {"petal width": 1, "petal length": 4} | 000004 | Iris-versicolor | 0.7847 | """ print self.test_scenario6.__doc__ examples = [ ["data/iris_missing2.csv", "10", "10", "10", '{"petal width": 1}', "000004", "Iris-setosa", "0.8064"], [ "data/iris_missing2.csv", "10", "10", "10", '{"petal width": 1, "petal length": 4}', "000004", "Iris-versicolor", "0.7847", ], ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with_missing_splits(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_prediction_confidence_is(self, example[7])
def test_scenario3(self): """ Scenario: Successfully comparing predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | """ examples = [ [ 'data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}', '30', '{"sepal length": "foo", "petal length": 3}', '000004', 'Iris-versicolor' ], [ 'data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}', '30', '{"sepal length": "foo", "petal length": 5, "petal width": 1.5}', '000004', 'Iris-virginica' ] ] show_doc(self.test_scenario3, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[2]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[3]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario8(self): """ Scenario: Successfully comparing predictions with text options and proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: """ examples = [ [ 'data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap' ], [ 'data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"category1": "a"}', "000003", 'paperwork' ] ] show_doc(self.test_scenario8, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction( self, example[5]) prediction_create.the_prediction_is(self, example[6], example[7]) prediction_compare.i_create_a_proportional_local_prediction( self, example[5]) prediction_compare.the_local_prediction_is(self, example[7])
def test_scenario2(self): """ Scenario: Successfully creating a prediction using a shared model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I make the model shared And I wait until the model is ready less than <time_3> secs And I get the model sharing info And I check the model status using the model's shared url And I check the model status using the model's shared key And I create a local model When I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | Iris-setosa | """ print self.test_scenario2.__doc__ examples = [[ 'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', 'Iris-setosa' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.make_the_model_shared(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.get_sharing_info(self) model_create.model_from_shared_url(self) model_create.model_from_shared_key(self) compare_pred.i_create_a_local_model(self) compare_pred.i_create_a_local_prediction(self, example[4]) compare_pred.the_local_prediction_is(self, example[5])
def test_scenario6(self): """ Scenario: Successfully creating a model and check its summary information: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the model summary with "<expected_file>" file Examples: | data | time_1 | time_2 | time_3 | expected_file | | data/iris.csv | 10 | 10 | 10 | data/model/summarize_iris.txt | | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/summarize_iris_sp_chars.txt | | data/spam.csv | 20 | 20 | 30 | data/model/summarize_spam.txt | | data/grades.csv | 10 | 10 | 10 | data/model/summarize_grades.txt | | data/diabetes.csv | 20 | 20 | 30 | data/model/summarize_diabetes.txt | | data/iris_missing2.csv | 10 | 10 | 10 | data/model/summarize_iris_missing2.txt | | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/summarize_tiny_kdd.txt | """ print self.test_scenario6.__doc__ examples = [ ["data/iris.csv", "10", "10", "10", "data/model/summarize_iris.txt"], ["data/iris_sp_chars.csv", "10", "10", "10", "data/model/summarize_iris_sp_chars.txt"], ["data/spam.csv", "10", "10", "10", "data/model/summarize_spam.txt"], ["data/grades.csv", "10", "10", "10", "data/model/summarize_grades.txt"], ["data/diabetes.csv", "10", "10", "10", "data/model/summarize_diabetes.txt"], ["data/iris_missing2.csv", "10", "10", "10", "data/model/summarize_iris_missing2.txt"], ["data/tiny_kdd.csv", "10", "10", "10", "data/model/summarize_tiny_kdd.txt"], ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_check_the_model_summary_with(self, example[4])
def test_scenario10(self): """ Scenario: Successfully comparing predictions with proportional missing strategy and balanced models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a balanced model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" And I create local probabilities for "<data_input>" Then the local probabilities are "<probabilities>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | """ examples = [ ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'], ['data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]']] show_doc(self.test_scenario10, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_balanced_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.the_local_prediction_confidence_is(self, example[7]) prediction_compare.i_create_local_probabilities(self, example[4]) prediction_compare.the_local_probabilities_are(self, example[8])
def test_scenario1(self): """ Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the output is like "<expected_file>" expected file Examples: | data | time_1 | time_2 | time_3 | expected_file | | data/iris.csv | 10 | 10 | 10 | data/model/if_then_rules_iris.txt | | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_sp_chars.txt | | data/spam.csv | 20 | 20 | 30 | data/model/if_then_rules_spam.txt | | data/grades.csv | 10 | 10 | 10 | data/model/if_then_rules_grades.txt | | data/diabetes.csv | 20 | 20 | 30 | data/model/if_then_rules_diabetes.txt | | data/iris_missing2.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_missing2.txt | | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/if_then_rules_tiny_kdd.txt | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '30', '30', '30', 'data/model/if_then_rules_iris.txt'], ['data/iris_sp_chars.csv', '30', '30', '30', 'data/model/if_then_rules_iris_sp_chars.txt'], ['data/spam.csv', '30', '30', '30', 'data/model/if_then_rules_spam.txt'], ['data/grades.csv', '30', '30', '30', 'data/model/if_then_rules_grades.txt'], ['data/diabetes.csv', '30', '30', '30', 'data/model/if_then_rules_diabetes.txt'], ['data/iris_missing2.csv', '30', '30', '30', 'data/model/if_then_rules_iris_missing2.txt'], ['data/tiny_kdd.csv', '30', '30', '30', 'data/model/if_then_rules_tiny_kdd.txt']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
def test_scenario3(self): """ Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with "<options>" waiting less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the output is like "<expected_file>" expected file Examples: | data | time_1 | time_2 | time_3 | options | expected_file | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_1.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} | data/model/if_then_rules_spam_textanalysis_2.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_3.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_4.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_5.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_6.txt | """ print self.test_scenario3.__doc__ examples = [ ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}','data/model/if_then_rules_spam_textanalysis_1.txt'], ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', 'data/model/if_then_rules_spam_textanalysis_2.txt'], ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_3.txt'], ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_4.txt'], ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_5.txt'], ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_6.txt']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) inspect_model.i_check_if_the_output_is_like_expected_file(self, example[5])
def test_scenario13(self): """ Scenario: Successfully comparing predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | """ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"], ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', "tmp/my_model.json", "my_test"], ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"], ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"]] show_doc(self.test_scenario13, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) args = '{"tags": ["%s"]}' % example[8] model_create.i_create_a_model_with(self, data=args) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_export_model(self, False, example[7]) # no pmml prediction_compare.i_create_a_local_model_from_file(self, example[7]) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) model_create.i_export_tags_model(self, example[7], example[8]) prediction_compare.i_create_a_local_model_from_file(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario3(self): """ Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with "<options>" waiting less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the output is like "<expected_file>" expected file Examples: | data | time_1 | time_2 | time_3 | options | expected_file | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_1.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} | data/model/if_then_rules_spam_textanalysis_2.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_3.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_4.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_5.txt | | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_6.txt | """ print self.test_scenario3.__doc__ examples = [ ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}','data/model/if_then_rules_spam_textanalysis_1.txt'], ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', 'data/model/if_then_rules_spam_textanalysis_2.txt'], ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_3.txt'], ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_4.txt'], ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_5.txt'], ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_6.txt']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) inspect_model.i_check_if_the_output_is_like_expected_file(self, example[5])
def test_scenario6(self): """ Scenario: Successfully creating a model and check its summary information: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model And I translate the tree into IF_THEN rules Then I check the model summary with "<expected_file>" file Examples: | data | time_1 | time_2 | time_3 | expected_file | | data/iris.csv | 10 | 10 | 10 | data/model/summarize_iris.txt | | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/summarize_iris_sp_chars.txt | | data/spam.csv | 20 | 20 | 30 | data/model/summarize_spam.txt | | data/grades.csv | 10 | 10 | 10 | data/model/summarize_grades.txt | | data/diabetes.csv | 20 | 20 | 30 | data/model/summarize_diabetes.txt | | data/iris_missing2.csv | 10 | 10 | 10 | data/model/summarize_iris_missing2.txt | | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/summarize_tiny_kdd.txt | """ print self.test_scenario6.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', 'data/model/summarize_iris.txt'], ['data/iris_sp_chars.csv', '10', '10', '10', 'data/model/summarize_iris_sp_chars.txt'], ['data/spam.csv', '10', '10', '10', 'data/model/summarize_spam.txt'], ['data/grades.csv', '10', '10', '10', 'data/model/summarize_grades.txt'], ['data/diabetes.csv', '10', '10', '10', 'data/model/summarize_diabetes.txt'], ['data/iris_missing2.csv', '10', '10', '10', 'data/model/summarize_iris_missing2.txt'], ['data/tiny_kdd.csv', '10', '10', '10', 'data/model/summarize_tiny_kdd.txt']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) inspect_model.i_check_the_model_summary_with(self, example[4])
def test_scenario2(self): """ Scenario: Successfully creating a prediction using a shared model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I make the model shared And I wait until the model is ready less than <time_3> secs And I get the model sharing info And I check the model status using the model's shared url And I check the model status using the model's shared key And I create a local model When I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | Iris-setosa | """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', 'Iris-setosa']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.make_the_model_shared(self) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.get_sharing_info(self) model_create.model_from_shared_url(self) model_create.model_from_shared_key(self) compare_pred.i_create_a_local_model(self) compare_pred.i_create_a_local_prediction(self, example[4]) compare_pred.the_local_prediction_is(self, example[5])
def test_scenario1(self): """ Scenario: Successfully comparing predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 6, "petal width": 2} | 000004 | Iris-virginica | | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 4, "petal width": 1.5}| 000004 | Iris-versicolor | | ../data/iris_sp_chars.csv | 10 | 10 | 10 | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004 | Iris-versicolor | """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa'], ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica'], ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor'], ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario3(self): """ Scenario: Successfully comparing predictions with proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And the confidence for the prediction is "<confidence>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" And the local prediction's confidence is "<confidence>" Examples: | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | """ examples = [ ['data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.2629'], ['data/grades.csv', '10', '10', '10', '{}', '000005', '68.62224', '27.5358'], ['data/grades.csv', '10', '10', '10', '{"Midterm": 20}', '000005', '40.46667', '54.89713'], ['data/grades.csv', '10', '10', '10', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '25.65806']] show_doc(self.test_scenario3, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction(self, example[4]) prediction_create.the_prediction_is(self, example[5], example[6]) prediction_create.the_confidence_is(self, example[7]) prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_prediction_confidence_is(self, example[7])
def test_scenario2(self): """ Scenario: Successfully comparing predictions in operating points for models: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a prediction for "<data_input>" in "<operating_point>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" in "<operating_point>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | prediction | operating_point """ examples = [ ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-setosa', {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"], ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-versicolor', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"], ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', {"kind": "confidence", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"], ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', {"kind": "confidence", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"]] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_prediction_op(self, example[4], example[6]) prediction_create.the_prediction_is(self, example[7], example[5]) prediction_compare.i_create_a_local_prediction_op(self, example[4], example[6]) prediction_compare.the_local_prediction_is(self, example[5])
def test_scenario10(self): """ Scenario: Successfully comparing predictions for fusions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs And I create a local fusion When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | params| tag | data_input | objective | prediction | params """ examples = [[ 'data/iris_unbalanced.csv', '30', '30', '120', '120', '{"tags":["my_fusion_tag"]}', 'my_fusion_tag', '{"petal width": 4}', '000004', 'Iris-virginica' ], [ 'data/grades.csv', '30', '30', '120', '120', '{"tags":["my_fusion_tag_reg"]}', 'my_fusion_tag_reg', '{"Midterm": 20}', '000005', 43.65286 ]] show_doc(self.test_scenario10, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_retrieve_a_list_of_remote_models( self, example[6]) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[4]) prediction_compare.i_create_a_local_fusion(self) prediction_create.i_create_a_fusion_prediction(self, example[7]) prediction_create.the_prediction_is(self, example[8], example[9]) prediction_compare.i_create_a_local_prediction(self, example[7]) prediction_compare.the_local_prediction_is(self, example[9])
def test_scenario2(self): """ Scenario 2: Successfully creating a fusion: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs And I update the fusion name to "<fusion_name>" When I wait until the fusion is ready less than <time_5> secs And I create a prediction for "<data_input>" Then the fusion name is "<fusion_name>" And the prediction for "<objective>" is "<prediction>" And I create an evaluation for the fusion with the dataset And I wait until the evaluation is ready less than <time_4> secs Then the measured "<measure>" is <value> Examples: | data | time_1 | time_2 | time_3 | time_4 | fusion_name | data_input | objective | prediction | ../data/iris.csv | 10 | 10 | 20 | 20 | my new fusion name | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '10', '10', '20', '20', 'my new fusion name', '{"tags":["my_fusion_2_tag"]}', 'my_fusion_2_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", "Iris-setosa", 'average_phi', '1.0']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with(self, example[6]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[6]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[6]) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_models(self, example[7]) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[3]) model_create.i_update_fusion_name(self, example[5]) model_create.the_fusion_is_finished_in_less_than(self, example[4]) model_create.i_check_fusion_name(self, example[5]) prediction_create.i_create_a_fusion_prediction(self, example[8]) prediction_create.the_prediction_is(self, example[9], example[10]) evaluation_create.i_create_an_evaluation_fusion(self) evaluation_create.the_evaluation_is_finished_in_less_than(self, example[3]) evaluation_create.the_measured_measure_is_value(self, example[11], example[12])
def test_scenario1(self): """ Scenario: Successfully changing duplicated field names: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset with "<options>" And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model Then "<field_id>" field's name is changed to "<new_name>" Examples: | data | time_1 | time_2 | time_3 | options | field_id | new_name | ../data/iris.csv | 20 | 20 | 30 | {"fields": {"000001": {"name": "species"}}} | 000001 | species1 | ../data/iris.csv | 20 | 20 | 30 | {"fields": {"000001": {"name": "petal width"}}} | 000001 | petal width1 """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "species"}}}', '000001', 'species1' ], [ 'data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "petal width"}}}', '000003', 'petal width3' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset_with(self, example[4]) dataset_create.the_dataset_is_finished_in_less_than( self, example[3]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_preds.i_create_a_local_model(self) model_create.field_name_to_new_name(self, example[5], example[6])
def test_scenario11(self): """ Scenario: Successfully comparing predictions with text options and proportional missing strategy: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local model When I create a proportional missing strategy prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a proportional missing strategy local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | ../data/text_missing.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{} | paperwork | """ print self.test_scenario11.__doc__ examples = [ ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003",'swap'], ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"category1": "a"}', "000003",'paperwork']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_model(self) prediction_create.i_create_a_proportional_prediction(self, example[5]) prediction_create.the_prediction_is(self, example[6], example[7]) prediction_compare.i_create_a_proportional_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[7])
def test_scenario2(self): """ Scenario: Successfully creating a model from a dataset list and predicting with it using median: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model And I wait until the model is ready less than <time_3> secs And I create a local multi model When I create a local multimodel batch prediction using median for <input_data> Then the local prediction is <prediction> Examples: | data | time_1 | time_2 | time_3 | input_data | prediction | ../data/grades.csv | 10 | 10 | 10 | {'Tutorial': 99.47, 'Midterm': 53.12, 'TakeHome': 87.96} | 50 """ print self.test_scenario2.__doc__ examples = [[ 'data/grades.csv', '30', '30', '30', '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', 63.33 ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model(self) model_create.the_model_is_finished_in_less_than(self, example[3]) world.list_of_models = [world.model] compare_pred.i_create_a_local_multi_model(self) compare_pred.i_create_a_local_mm_median_batch_prediction( self, example[4]) compare_pred.the_local_prediction_is(self, example[5])
def test_scenario1(self): """ Scenario: Successfully creating a model from a dataset list: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I store the dataset id in a list And I create a dataset And I wait until the dataset is ready less than <time_3> secs And I store the dataset id in a list Then I create a model from a dataset list And I wait until the model is ready less than <time_4> secs And I check the model stems from the original dataset list Examples: | data | time_1 | time_2 | time_3 | time_4 | | ../data/iris.csv | 10 | 10 | 10 | 10 """ print self.test_scenario1.__doc__ examples = [['data/iris.csv', '10', '10', '10', '10']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) multimodel_create.i_store_dataset_id(self) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[3]) multimodel_create.i_store_dataset_id(self) model_create.i_create_a_model_from_dataset_list(self) model_create.the_model_is_finished_in_less_than(self, example[4]) multimodel_create.i_check_model_datasets_and_datasets_ids(self)
def test_scenario12(self): """ Scenario: Successfully comparing predictions for fusions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs And I create a local fusion When I create a prediction for "<data_input>" Then the prediction for "<objective>" is "<prediction>" And I create a local prediction for "<data_input>" Then the local prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | params| tag | data_input | objective | prediction | params """ tag = "my_fusion_tag_12_%s" % PY3 tag_reg = "my_fusion_tag_12_reg_%s" % PY3 examples = [ ['data/iris_unbalanced.csv', '30', '30', '120', '120', '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag, tag, '{"petal width": 4}', '000004', 'Iris-virginica'], ['data/grades.csv', '30', '30', '120', '120', '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag_reg, tag_reg, '{"Midterm": 20}', '000005', 44.37625]] show_doc(self.test_scenario12, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_retrieve_a_list_of_remote_models(self, example[6]) model_create.i_create_a_fusion_with_weights(self) model_create.the_fusion_is_finished_in_less_than(self, example[4]) prediction_compare.i_create_a_local_fusion(self) prediction_create.i_create_a_fusion_prediction(self, example[7]) prediction_create.the_prediction_is(self, example[8], example[9]) prediction_compare.i_create_a_local_prediction(self, example[7]) prediction_compare.the_local_prediction_is(self, example[9])
def test_scenario3(self): """ Scenario 3: Successfully creating a fusion from a dataset: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a fusion from a list of models And I wait until the fusion is ready less than <time_4> secs When I create a batch prediction for the dataset with the fusion And I wait until the batch prediction is ready less than <time_4> secs And I download the created predictions file to "<local_file>" Then the batch prediction file is like "<predictions_file>" Examples: | data | time_1 | time_2 | time_3 | time_4 | tag | local_file | predictions_file | | ../data/iris.csv | 10 | 10 | 20 | 20 | mytag | ./tmp/batch_predictions.csv | ./data/batch_predictions_fs.csv | """ print self.test_scenario3.__doc__ examples = [[ 'data/iris.csv', '10', '10', '20', '20', '{"tags":["my_fusion_3_tag"]}', 'my_fusion_3_tag', 'tmp/batch_predictions.csv', 'data/batch_predictions_fs.csv' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[5]) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_models(self, example[6]) model_create.i_create_a_fusion(self) model_create.the_fusion_is_finished_in_less_than(self, example[3]) batch_pred_create.i_create_a_batch_prediction_fusion(self) batch_pred_create.the_batch_prediction_is_finished_in_less_than( self, example[4]) batch_pred_create.i_download_predictions_file(self, example[7]) batch_pred_create.i_check_predictions(self, example[8])
def test_scenario1(self): """ Scenario: Successfully creating a batch prediction from a multi model: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I create a model with "<params>" And I wait until the model is ready less than <time_3> secs And I retrieve a list of remote models tagged with "<tag>" And I create a local multi model When I create a batch prediction for "<data_input>" and save it in "<path>" And I combine the votes in "<path>" Then the plurality combined predictions are "<predictions>" And the confidence weighted predictions are "<predictions>" Examples: | data | time_1 | time_2 | time_3 | params | tag | data_input | path | predictions | | ../data/iris.csv | 10 | 10 | 10 | {"tags":["mytag"]} | mytag | [{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}] | ./tmp | ["Iris-setosa", "Iris-virginica", "Iris-versicolor"] | """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}]', './tmp', '["Iris-setosa", "Iris-virginica", "Iris-versicolor"]' ]] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_model_with(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[3]) model_create.i_create_a_model_with(self, example[4]) model_create.the_model_is_finished_in_less_than(self, example[3]) compare_pred.i_retrieve_a_list_of_remote_models(self, example[5]) compare_pred.i_create_a_local_multi_model(self) compare_pred.i_create_a_batch_prediction(self, example[6], example[7]) compare_pred.i_combine_the_votes(self, example[7]) compare_pred.the_plurality_combined_prediction(self, example[8]) compare_pred.the_confidence_weighted_prediction(self, example[8])