def test_scenario7(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability """ examples = [ [ 'data/spam.csv', '20', '20', '180', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "A normal message"}', 'ham', 0.9169, "000000" ], [ 'data/spam.csv', '20', '20', '180', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"Message": "mobile"}', 'ham', 0.815, "000000" ], [ 'data/movies.csv', '20', '20', '180', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', 'Under 18', '0.8393', '000002' ] ] show_doc(self.test_scenario7, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms( self, example[8]) model_create.the_logistic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario9(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | prediction | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"} | ham | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"} | ham | """ print self.test_scenario9.__doc__ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'spam'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', 'ham']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model(self) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario12(self): """ Scenario: Successfully comparing logistic regression predictions with constant fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I update the dataset with "<params>" And I wait until the dataset is ready less than <time_4> secs And I create a logistic regression model And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 |time_4| data_input | prediction | field_id """ examples = [[ 'data/constant_field.csv', '10', '10', '50', '10', '{"a": 1, "b": 1, "c": 1}', 'a', '{"fields": {"000000": {"preferred": true}}}' ]] show_doc(self.test_scenario12, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) dataset_create.i_update_dataset_with(self, example[7]) dataset_create.the_dataset_is_finished_in_less_than( self, example[4]) model_create.i_create_a_logistic_model(self) model_create.the_logistic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario15(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability """ examples = [[ 'data/iris.csv', '20', '20', '180', '{"weight_field": "000000", "missing_numerics": false}', '{"petal width": 1.5, "petal length": 2, "sepal width":1}', 'Iris-versicolor', '0.9547', '000004' ]] show_doc(self.test_scenario15, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms( self, example[8], example[4]) model_create.the_logistic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario8(self): """ Scenario: Successfully comparing logistic regression predictions: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regresssion model And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 | data_input | prediction | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5} | 'Iris-virginica' | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5} | Iris-virginica | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5} | Iris-virginica | | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 1} | Iris-versicolor | | ../data/iris_sp_chars.csv | 10 | 10 | 10 | {"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}| Iris-virginica | | ../data/price.csv | 10 | 10 | 10 | {"Price": 1200}| Product1 | """ print self.test_scenario8.__doc__ examples = [ ['data/iris.csv', '10', '10', '50', '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'], ['data/iris.csv', '10', '10', '50', '{"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'], ['data/iris.csv', '10', '10', '50', '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'], ['data/iris.csv', '10', '10', '50', '{"petal length": 1}', 'Iris-virginica'], ['data/iris_sp_chars.csv', '10', '10', '50', '{"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}', 'Iris-virginica'], ['data/price.csv', '10', '10', '50', '{"Price": 1200}', 'Product2']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model(self) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[4]) prediction_create.the_logistic_prediction_is(self, example[5]) prediction_compare.i_create_a_local_prediction(self, example[4]) prediction_compare.the_local_prediction_is(self, example[5])
def test_scenario10(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability | ../data/spam.csv | 20 | 20 | 000002 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "A normal message"} | ham | 0.7645 """ print self.test_scenario10.__doc__ examples = [ ['data/spam.csv', '20', '20', '80', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "A normal message"}', 'ham', 0.7645, "000000"], ['data/spam.csv', '20', '20', '80', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"Message": "mobile"}', 'spam', 0.7175, "000000"], ['data/movies.csv', '20', '20', '80', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', '25-34', '0.4135', '000002']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario9(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" and parms "<parms>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" """ examples = [ ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", '{"field_codings": [{"field": "species", "coding": "dummy", "dummy_class": "Iris-setosa"}]}'], ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "contrast", "coefficients": [[1, 2, -1, -2]]}]}'], ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'], ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", '{"bias": false}']] show_doc(self.test_scenario9, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario9(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" and parms "<parms>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" """ examples = [ ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", '{"field_codings": [{"field": "species", "coding": "dummy", "dummy_class": "Iris-setosa"}]}'], ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0511, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "contrast", "coefficients": [[1, 2, -1, -2]]}]}'], ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0511, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'], ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", '{"bias": false}']] show_doc(self.test_scenario9, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario12(self): """ Scenario: Successfully comparing logistic regression predictions with constant fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I update the dataset with "<params>" And I wait until the dataset is ready less than <time_4> secs And I create a logistic regression model And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" Examples: | data | time_1 | time_2 | time_3 |time_4| data_input | prediction | field_id """ examples = [ ['data/constant_field.csv', '10', '10', '50', '10','{"a": 1, "b": 1, "c": 1}', 'a', '{"fields": {"000000": {"preferred": true}}}']] show_doc(self.test_scenario12, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) dataset_create.i_update_dataset_with(self, example[7]) dataset_create.the_dataset_is_finished_in_less_than(self, example[4]) model_create.i_create_a_logistic_model(self) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6])
def test_scenario15(self): """ Scenario: Successfully comparing predictions with text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability """ examples = [ ['data/iris.csv', '20', '20', '180', '{"weight_field": "000000", "missing_numerics": false}', '{"petal width": 1.5, "petal length": 2, "sepal width":1}', 'Iris-versicolor', '0.9547', '000004']] show_doc(self.test_scenario15, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[4]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario11(self): """ Scenario: Successfully comparing predictions for logistic regression with balance_fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" and flags And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability """ examples = [ [ 'data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' ' "000001": {"name": "gender", "optype": "categorical"},' ' "000002": {"name": "age_range", "optype": "categorical"},' ' "000003": {"name": "occupation", "optype": "categorical"},' ' "000004": {"name": "zipcode", "optype": "numeric"},' ' "000005": {"name": "movie_id", "optype": "numeric"},' ' "000006": {"name": "title", "optype": "text"},' ' "000007": {"name": "genres", "optype": "items",' '"item_analysis": {"separator": "$"}},' '"000008": {"name": "timestamp", "optype": "numeric"},' '"000009": {"name": "rating", "optype": "categorical"}},' '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.4028, "000009", '{"balance_fields": false}' ], [ 'data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' ' "000001": {"name": "gender", "optype": "categorical"},' ' "000002": {"name": "age_range", "optype": "categorical"},' ' "000003": {"name": "occupation", "optype": "categorical"},' ' "000004": {"name": "zipcode", "optype": "numeric"},' ' "000005": {"name": "movie_id", "optype": "numeric"},' ' "000006": {"name": "title", "optype": "text"},' ' "000007": {"name": "genres", "optype": "items",' '"item_analysis": {"separator": "$"}},' '"000008": {"name": "timestamp", "optype": "numeric"},' '"000009": {"name": "rating", "optype": "categorical"}},' '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2622, "000009", '{"normalize": true}' ], [ 'data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' ' "000001": {"name": "gender", "optype": "categorical"},' ' "000002": {"name": "age_range", "optype": "categorical"},' ' "000003": {"name": "occupation", "optype": "categorical"},' ' "000004": {"name": "zipcode", "optype": "numeric"},' ' "000005": {"name": "movie_id", "optype": "numeric"},' ' "000006": {"name": "title", "optype": "text"},' ' "000007": {"name": "genres", "optype": "items",' '"item_analysis": {"separator": "$"}},' '"000008": {"name": "timestamp", "optype": "numeric"},' '"000009": {"name": "rating", "optype": "categorical"}},' '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2622, "000009", '{"balance_fields": true, "normalize": true}' ] ] show_doc(self.test_scenario11, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms( self, example[8], example[9]) model_create.the_logistic_model_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])
def test_scenario11(self): """ Scenario: Successfully comparing predictions for logistic regression with balance_fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a logistic regression model with objective "<objective>" and flags And I wait until the logistic regression model is ready less than <time_3> secs And I create a local logistic regression model When I create a logistic regression prediction for "<data_input>" Then the logistic regression prediction is "<prediction>" And the logistic regression probability for the prediction is "<probability>" And I create a local logistic regression prediction for "<data_input>" Then the local logistic regression prediction is "<prediction>" And the local logistic regression probability for the prediction is "<probability>" Examples: | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability """ examples = [ ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' ' "000001": {"name": "gender", "optype": "categorical"},' ' "000002": {"name": "age_range", "optype": "categorical"},' ' "000003": {"name": "occupation", "optype": "categorical"},' ' "000004": {"name": "zipcode", "optype": "numeric"},' ' "000005": {"name": "movie_id", "optype": "numeric"},' ' "000006": {"name": "title", "optype": "text"},' ' "000007": {"name": "genres", "optype": "items",' '"item_analysis": {"separator": "$"}},' '"000008": {"name": "timestamp", "optype": "numeric"},' '"000009": {"name": "rating", "optype": "categorical"}},' '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.3231, "000009", '{"balance_fields": false}'], ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' ' "000001": {"name": "gender", "optype": "categorical"},' ' "000002": {"name": "age_range", "optype": "categorical"},' ' "000003": {"name": "occupation", "optype": "categorical"},' ' "000004": {"name": "zipcode", "optype": "numeric"},' ' "000005": {"name": "movie_id", "optype": "numeric"},' ' "000006": {"name": "title", "optype": "text"},' ' "000007": {"name": "genres", "optype": "items",' '"item_analysis": {"separator": "$"}},' '"000008": {"name": "timestamp", "optype": "numeric"},' '"000009": {"name": "rating", "optype": "categorical"}},' '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2622, "000009", '{"normalize": true}'], ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' ' "000001": {"name": "gender", "optype": "categorical"},' ' "000002": {"name": "age_range", "optype": "categorical"},' ' "000003": {"name": "occupation", "optype": "categorical"},' ' "000004": {"name": "zipcode", "optype": "numeric"},' ' "000005": {"name": "movie_id", "optype": "numeric"},' ' "000006": {"name": "title", "optype": "text"},' ' "000007": {"name": "genres", "optype": "items",' '"item_analysis": {"separator": "$"}},' '"000008": {"name": "timestamp", "optype": "numeric"},' '"000009": {"name": "rating", "optype": "categorical"}},' '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2622, "000009", '{"balance_fields": true, "normalize": true}']] show_doc(self.test_scenario11, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9]) model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_logistic_model(self) prediction_create.i_create_a_logistic_prediction(self, example[5]) prediction_create.the_logistic_prediction_is(self, example[6]) prediction_create.the_logistic_probability_is(self, example[7]) prediction_compare.i_create_a_local_prediction(self, example[5]) prediction_compare.the_local_prediction_is(self, example[6]) prediction_compare.the_local_probability_is(self, example[7])