def test_scenario3(self):
        """

            Scenario3: Successfully creating an evaluation for a logistic regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression
                And I wait until the logistic regression is ready less than <time_3> secs
                When I create an evaluation for the logistic regression with the dataset
                And I wait until the evaluation is ready less than <time_4> secs
                Then the measured "<measure>" is <value>

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | measure       | value  |
                | ../data/iris.csv | 30      | 30     | 50     | 30     | average_phi   | 0.94107   |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', '50', '50', '800', '80', 'average_phi', '0.89054']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            evaluation_create.i_create_an_evaluation_logistic(self)
            evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4])
            evaluation_create.the_measured_measure_is_value(self, example[5], example[6])
예제 #2
0
    def test_scenario6(self):
        """
            Scenario: Successfully creating a batch prediction for a logistic regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression
                And I wait until the logistic regression is ready less than <time_3> secs
                When I create a batch prediction for the dataset with the logistic regression
                And I wait until the batch prediction is ready less than <time_4> secs
                And I download the created predictions file to "<local_file>"
                Then the batch prediction file is like "<predictions_file>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | local_file | predictions_file       |
                | ../data/iris.csv | 30      | 30     | 80     | 50     | ./tmp/batch_predictions.csv | ./data/batch_predictions_lr.csv |

        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', '30', '30', '80', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_lr.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            batch_pred_create.i_create_a_batch_prediction_logistic_model(self)
            batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4])
            batch_pred_create.i_download_predictions_file(self, example[5])
            batch_pred_create.i_check_predictions(self, example[6])
예제 #3
0
 def test_scenario3(self):
     """
         Scenario 3: Successfully creating a local logistic regression from an exported file:
             Given I create a data source uploading a "<data>" file
             And I wait until the source is ready less than <time_1> secs
             And I create a dataset
             And I wait until the dataset is ready less than <time_2> secs
             And I create a logistic regression
             And I wait until the logistic regression is ready less than <time_3> secs
             And I export the logistic regression to "<exported_file>"
             When I create a local logistic regression from the file "<exported_file>"
             Then the logistic regression ID and the local logistic regression ID match
             Examples:
             | data                | time_1  | time_2 | time_3 | exported_file
             | ../data/iris.csv | 10      | 10     | 50 | ./tmp/logistic.json
     """
     print self.test_scenario3.__doc__
     examples = [
         ['data/iris.csv', '10', '10', '50', './tmp/logistic.json']]
     for example in examples:
         print "\nTesting with:\n", example
         source_create.i_upload_a_file(self, example[0])
         source_create.the_source_is_finished(self, example[1])
         dataset_create.i_create_a_dataset(self)
         dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
         model_create.i_create_a_logistic_model(self)
         model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
         model_create.i_export_logistic_regression(self, example[4])
         model_create.i_create_local_logistic_regression_from_file(self, example[4])
         model_create.check_logistic_regression_id_local_id(self)
예제 #4
0
    def test_scenario6(self):
        """
            Scenario 6: Successfully creating a fusion:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I retrieve a list of remote logistic regression tagged with "<tag>"
                And I create a fusion from a list of models and weights
                And I wait until the fusion is ready less than <time_4> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the fusion probability for the prediction is "<probability>"
                And I create a local fusion prediction for "<data_input>"
                Then the local fusion prediction is "<prediction>"
                And the local fusion probability for the prediction is "<probability>"

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | data_input | objective | prediction
                | ../data/iris.csv | 10      | 10     | 20     | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa"
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '20', '20',
             '{"tags":["my_fusion_6_tag"], "missing_numerics": true}',
             'my_fusion_6_tag',
             '{"petal width": 1.75, "petal length": 2.45}',
             "000004",
             "Iris-setosa",
             '0.4727',
             '{"tags":["my_fusion_6_tag"], "missing_numerics": false, "balance_fields": false }', '[1, 2]']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[11])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6])
            model_create.i_create_a_fusion_with_weights(self, example[12])
            model_create.the_fusion_is_finished_in_less_than(self, example[3])
            compare_pred.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[7])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_create.the_fusion_probability_is(self, example[10])
            compare_pred.i_create_a_local_prediction(self, example[7])
            compare_pred.the_local_prediction_is(self, example[9])
            compare_pred.the_local_probability_is(self, example[10])
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | prediction  |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"}             | ham    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}        | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"}          | ham   |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}       | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"}             | ham    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"}       | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}       | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"}       | ham     |


        """
        print self.test_scenario9.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', 'ham']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario8(self):
        """
            Scenario: Successfully comparing logistic regression predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regresssion model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                                                                                 | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}         | 'Iris-virginica' |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}             | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}           | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 1}                                                                         | Iris-versicolor |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}| Iris-virginica |
                | ../data/price.csv | 10      | 10     | 10     | {"Price": 1200}| Product1 |

        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '50', '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '50', '{"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '50', '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '50', '{"petal length": 1}', 'Iris-virginica'],
            ['data/iris_sp_chars.csv', '10', '10', '50', '{"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}', 'Iris-virginica'],
            ['data/price.csv', '10', '10', '50', '{"Price": 1200}', 'Product2']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[4])
            prediction_create.the_logistic_prediction_is(self, example[5])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[5])
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data             | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability
                | ../data/spam.csv | 20      | 20     | 000002 | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "A normal message"}       | ham     | 0.7645

        """
        print self.test_scenario10.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '80', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "A normal message"}', 'ham', 0.7645, "000000"],
            ['data/spam.csv', '20', '20', '80', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"Message": "mobile"}', 'spam', 0.7175, "000000"],
            ['data/movies.csv', '20', '20', '80', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', '25-34', '0.4135', '000002']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>" and parms "<parms>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

        """
        examples = [
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", '{"field_codings": [{"field": "species", "coding": "dummy", "dummy_class": "Iris-setosa"}]}'],
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0511, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "contrast", "coefficients": [[1, 2, -1, -2]]}]}'],
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0511, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'],
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", '{"bias": false}']]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
예제 #9
0
    def test_scenario12(self):
        """
            Scenario: Successfully comparing logistic regression predictions with constant fields:

                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I update the dataset with "<params>"
                And I wait until the dataset is ready less than <time_4> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 |time_4| data_input                                 | prediction  | field_id

        """
        examples = [
            ['data/constant_field.csv', '10', '10', '50', '10','{"a": 1, "b": 1, "c": 1}', 'a', '{"fields": {"000000": {"preferred": true}}}']]
        show_doc(self.test_scenario12, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            dataset_create.i_update_dataset_with(self, example[7])
            dataset_create.the_dataset_is_finished_in_less_than(self, example[4])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario12(self):
        """
            Scenario: Successfully comparing logistic regression predictions with constant fields:

                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I update the dataset with "<params>"
                And I wait until the dataset is ready less than <time_4> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 |time_4| data_input                                 | prediction  | field_id

        """
        examples = [
            ['data/constant_field.csv', '10', '10', '50', '10','{"a": 1, "b": 1, "c": 1}', 'a', '{"fields": {"000000": {"preferred": true}}}']]
        show_doc(self.test_scenario12, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            dataset_create.i_update_dataset_with(self, example[7])
            dataset_create.the_dataset_is_finished_in_less_than(self, example[4])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
예제 #11
0
    def test_scenario6(self):
        """
            Scenario: Successfully creating a batch prediction for a logistic regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression
                And I wait until the logistic regression is ready less than <time_3> secs
                When I create a batch prediction for the dataset with the logistic regression
                And I wait until the batch prediction is ready less than <time_4> secs
                And I download the created predictions file to "<local_file>"
                Then the batch prediction file is like "<predictions_file>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | local_file | predictions_file       |
                | ../data/iris.csv | 30      | 30     | 80     | 50     | ./tmp/batch_predictions.csv | ./data/batch_predictions_lr.csv |

        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris.csv', '30', '30', '80', '50',
            'tmp/batch_predictions.csv', 'data/batch_predictions_lr.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            batch_pred_create.i_create_a_batch_prediction_logistic_model(self)
            batch_pred_create.the_batch_prediction_is_finished_in_less_than(
                self, example[4])
            batch_pred_create.i_download_predictions_file(self, example[5])
            batch_pred_create.i_check_predictions(self, example[6])
    def test_scenario15(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data             | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability

        """
        examples = [
            ['data/iris.csv', '20', '20', '180', '{"weight_field": "000000", "missing_numerics": false}', '{"petal width": 1.5, "petal length": 2, "sepal width":1}', 'Iris-versicolor', '0.9547', '000004']]
        show_doc(self.test_scenario15, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[4])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
예제 #13
0
    def test_scenario15(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data             | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability

        """
        examples = [
            ['data/iris.csv', '20', '20', '180', '{"weight_field": "000000", "missing_numerics": false}', '{"petal width": 1.5, "petal length": 2, "sepal width":1}', 'Iris-versicolor', '0.9547', '000004']]
        show_doc(self.test_scenario15, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[4])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario5(self):
        """
            Scenario: Successfully comparing logistic regression predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                                                                                 | prediction  |
        """
        examples = [
            ['data/iris.csv', '10', '10', '50', '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-versicolor'],
            ['data/iris.csv', '10', '10', '50', '{"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-versicolor'],
            ['data/iris.csv', '10', '10', '50', '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-versicolor'],
            ['data/iris.csv', '10', '10', '50', '{"petal length": 1}', 'Iris-setosa'],
            ['data/iris_sp_chars.csv', '10', '10', '50', '{"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}', 'Iris-versicolor'],
            ['data/price.csv', '10', '10', '50', '{"Price": 1200}', 'Product1']]
        show_doc(self.test_scenario5, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[4])
            prediction_create.the_logistic_prediction_is(self, example[5])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[5])
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression with objective "<objective>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a local supervised model
                When I create a prediction with operating kind "<operating_kind>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_kind>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [
            ['data/iris.csv', '10', '50', '30000', '{"petal length": 5}', '000004', 'Iris-versicolor', '{}', "probability"],
            ['data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability"]]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_supervised_model(self, model_type="logistic_regression")
            prediction_create.i_create_a_logistic_prediction_with_op_kind(self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_logistic_prediction_op_kind(self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
예제 #16
0
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression with objective "<objective>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a local supervised model
                When I create a prediction with operating kind "<operating_kind>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_kind>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [
            ['data/iris.csv', '10', '50', '30000', '{"petal length": 5}', '000004', 'Iris-versicolor', '{}', "probability"],
            ['data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability"]]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_supervised_model(self, model_type="logistic_regression")
            prediction_create.i_create_a_logistic_prediction_with_op_kind(self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_logistic_prediction_op_kind(self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
예제 #17
0
    def test_scenario3(self):
        """

            Scenario3: Successfully creating an evaluation for a logistic regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression
                And I wait until the logistic regression is ready less than <time_3> secs
                When I create an evaluation for the logistic regression with the dataset
                And I wait until the evaluation is ready less than <time_4> secs
                Then the measured "<measure>" is <value>

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | measure       | value  |
                | ../data/iris.csv | 30      | 30     | 50     | 30     | average_phi   | 0.94107   |
        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/iris.csv', '50', '50', '800', '80', 'average_phi', '0.89054'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            evaluation_create.i_create_an_evaluation_logistic(self)
            evaluation_create.the_evaluation_is_finished_in_less_than(
                self, example[4])
            evaluation_create.the_measured_measure_is_value(
                self, example[5], example[6])
    def test_scenario11(self):
        """
            Scenario: Successfully comparing predictions for logistic regression with balance_fields:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>" and flags
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data               | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability

        """
        examples = [
            [
                'data/movies.csv', '20', '20', '180',
                '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},'
                ' "000001": {"name": "gender", "optype": "categorical"},'
                ' "000002": {"name": "age_range", "optype": "categorical"},'
                ' "000003": {"name": "occupation", "optype": "categorical"},'
                ' "000004": {"name": "zipcode", "optype": "numeric"},'
                ' "000005": {"name": "movie_id", "optype": "numeric"},'
                ' "000006": {"name": "title", "optype": "text"},'
                ' "000007": {"name": "genres", "optype": "items",'
                '"item_analysis": {"separator": "$"}},'
                '"000008": {"name": "timestamp", "optype": "numeric"},'
                '"000009": {"name": "rating", "optype": "categorical"}},'
                '"source_parser": {"separator": ";"}}',
                '{"timestamp": "999999999"}', '4', 0.4028, "000009",
                '{"balance_fields": false}'
            ],
            [
                'data/movies.csv', '20', '20', '180',
                '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},'
                ' "000001": {"name": "gender", "optype": "categorical"},'
                ' "000002": {"name": "age_range", "optype": "categorical"},'
                ' "000003": {"name": "occupation", "optype": "categorical"},'
                ' "000004": {"name": "zipcode", "optype": "numeric"},'
                ' "000005": {"name": "movie_id", "optype": "numeric"},'
                ' "000006": {"name": "title", "optype": "text"},'
                ' "000007": {"name": "genres", "optype": "items",'
                '"item_analysis": {"separator": "$"}},'
                '"000008": {"name": "timestamp", "optype": "numeric"},'
                '"000009": {"name": "rating", "optype": "categorical"}},'
                '"source_parser": {"separator": ";"}}',
                '{"timestamp": "999999999"}', '4', 0.2622, "000009",
                '{"normalize": true}'
            ],
            [
                'data/movies.csv', '20', '20', '180',
                '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},'
                ' "000001": {"name": "gender", "optype": "categorical"},'
                ' "000002": {"name": "age_range", "optype": "categorical"},'
                ' "000003": {"name": "occupation", "optype": "categorical"},'
                ' "000004": {"name": "zipcode", "optype": "numeric"},'
                ' "000005": {"name": "movie_id", "optype": "numeric"},'
                ' "000006": {"name": "title", "optype": "text"},'
                ' "000007": {"name": "genres", "optype": "items",'
                '"item_analysis": {"separator": "$"}},'
                '"000008": {"name": "timestamp", "optype": "numeric"},'
                '"000009": {"name": "rating", "optype": "categorical"}},'
                '"source_parser": {"separator": ";"}}',
                '{"timestamp": "999999999"}', '4', 0.2622, "000009",
                '{"balance_fields": true, "normalize": true}'
            ]
        ]
        show_doc(self.test_scenario11, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(
                self, example[8], example[9])
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>" and parms "<parms>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

        """
        examples = [
            [
                'data/iris.csv', '20', '20', '130',
                '{"fields": {"000000": {"optype": "categorical"}}}',
                '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000",
                '{"field_codings": [{"field": "species", "coding": "dummy", "dummy_class": "Iris-setosa"}]}'
            ],
            [
                'data/iris.csv', '20', '20', '130',
                '{"fields": {"000000": {"optype": "categorical"}}}',
                '{"species": "Iris-setosa"}', '5.0', 0.051, "000000",
                '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "contrast", "coefficients": [[1, 2, -1, -2]]}]}'
            ],
            [
                'data/iris.csv', '20', '20', '130',
                '{"fields": {"000000": {"optype": "categorical"}}}',
                '{"species": "Iris-setosa"}', '5.0', 0.051, "000000",
                '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'
            ],
            [
                'data/iris.csv', '20', '20', '130',
                '{"fields": {"000000": {"optype": "categorical"}}}',
                '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000",
                '{"bias": false}'
            ]
        ]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(
                self, example[8], example[9])
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | prediction  |

        """
        examples = [
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}',
                '{"Message": "Mobile call"}', 'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}',
                '{"Message": "A normal message"}', 'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}',
                '{"Message": "Mobile calls"}', 'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}',
                '{"Message": "A normal message"}', 'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}',
                '{"Message": "Mobile call"}', 'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}',
                '{"Message": "A normal message"}', 'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}',
                '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}',
                'ham'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}',
                '{"Message": "Ok"}', 'ham'
            ]
        ]
        show_doc(self.test_scenario6, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
예제 #21
0
    def test_scenario6(self):
        """
            Scenario 6: Successfully creating a fusion:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I retrieve a list of remote logistic regression tagged with "<tag>"
                And I create a fusion from a list of models and weights
                And I wait until the fusion is ready less than <time_4> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the fusion probability for the prediction is "<probability>"
                And I create a local fusion prediction for "<data_input>"
                Then the local fusion prediction is "<prediction>"
                And the local fusion probability for the prediction is "<probability>"

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | data_input | objective | prediction
                | ../data/iris.csv | 10      | 10     | 20     | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa"
        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '20', '20',
            '{"tags":["my_fusion_6_tag"], "missing_numerics": true}',
            'my_fusion_6_tag', '{"petal width": 1.75, "petal length": 2.45}',
            "000004", "Iris-setosa", '0.4727',
            '{"tags":["my_fusion_6_tag"], "missing_numerics": false, "balance_fields": false }',
            '[1, 2]'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(
                self, example[8], example[5])
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            model_create.i_create_a_logistic_model_with_objective_and_parms(
                self, example[8], example[11])
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(
                self, example[6])
            model_create.i_create_a_fusion_with_weights(self, example[12])
            model_create.the_fusion_is_finished_in_less_than(self, example[3])
            compare_pred.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[7])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_create.the_fusion_probability_is(self, example[10])
            compare_pred.i_create_a_local_prediction(self, example[7])
            compare_pred.the_local_prediction_is(self, example[9])
            compare_pred.the_local_probability_is(self, example[10])
    def test_scenario11(self):
        """
            Scenario: Successfully comparing predictions for logistic regression with balance_fields:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>" and flags
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data               | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability

        """
        examples = [
            ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},'
                                                  ' "000001": {"name": "gender", "optype": "categorical"},'
                                                  ' "000002": {"name": "age_range", "optype": "categorical"},'
                                                  ' "000003": {"name": "occupation", "optype": "categorical"},'
                                                  ' "000004": {"name": "zipcode", "optype": "numeric"},'
                                                  ' "000005": {"name": "movie_id", "optype": "numeric"},'
                                                  ' "000006": {"name": "title", "optype": "text"},'
                                                  ' "000007": {"name": "genres", "optype": "items",'
                                                  '"item_analysis": {"separator": "$"}},'
                                                  '"000008": {"name": "timestamp", "optype": "numeric"},'
                                                  '"000009": {"name": "rating", "optype": "categorical"}},'
                                                  '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.3231, "000009", '{"balance_fields": false}'],
            ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},'
                                                  ' "000001": {"name": "gender", "optype": "categorical"},'
                                                  ' "000002": {"name": "age_range", "optype": "categorical"},'
                                                  ' "000003": {"name": "occupation", "optype": "categorical"},'
                                                  ' "000004": {"name": "zipcode", "optype": "numeric"},'
                                                  ' "000005": {"name": "movie_id", "optype": "numeric"},'
                                                  ' "000006": {"name": "title", "optype": "text"},'
                                                  ' "000007": {"name": "genres", "optype": "items",'
                                                  '"item_analysis": {"separator": "$"}},'
                                                  '"000008": {"name": "timestamp", "optype": "numeric"},'
                                                  '"000009": {"name": "rating", "optype": "categorical"}},'
                                                  '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2622, "000009", '{"normalize": true}'],
            ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},'
                                                  ' "000001": {"name": "gender", "optype": "categorical"},'
                                                  ' "000002": {"name": "age_range", "optype": "categorical"},'
                                                  ' "000003": {"name": "occupation", "optype": "categorical"},'
                                                  ' "000004": {"name": "zipcode", "optype": "numeric"},'
                                                  ' "000005": {"name": "movie_id", "optype": "numeric"},'
                                                  ' "000006": {"name": "title", "optype": "text"},'
                                                  ' "000007": {"name": "genres", "optype": "items",'
                                                  '"item_analysis": {"separator": "$"}},'
                                                  '"000008": {"name": "timestamp", "optype": "numeric"},'
                                                  '"000009": {"name": "rating", "optype": "categorical"}},'
                                                  '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2622, "000009", '{"balance_fields": true, "normalize": true}']]
        show_doc(self.test_scenario11, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])