def test_scenario2(self):
        """
            Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                         |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_missing2_MISSINGS.txt     |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_missing2.csv', '10', '10', '10', 'data/model/if_then_rules_iris_missing2_MISSINGS.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
    def test_scenario1(self):
        """
            Scenario: Successfully changing duplicated field names:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset with "<options>"
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                Then "<field_id>" field's name is changed to "<new_name>"

        Examples:
                | data             | time_1  | time_2 | time_3 | options | field_id | new_name
                | ../data/iris.csv | 20      | 20     | 30     | {"fields": {"000001": {"name": "species"}}} | 000001 | species1
                | ../data/iris.csv | 20      | 20     | 30     | {"fields": {"000001": {"name": "petal width"}}} | 000001 | petal width1
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "species"}}}', '000001', 'species1'],
            ['data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "petal width"}}}', '000003', 'petal width3']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset_with(self, example[4])
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[3])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_preds.i_create_a_local_model(self)
            model_create.field_name_to_new_name(self, example[5],  example[6])
    def test_scenario2(self):
        """
            Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                         |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_missing2_MISSINGS.txt     |

        """
        print self.test_scenario2.__doc__
        examples = [["data/iris_missing2.csv", "10", "10", "10", "data/model/if_then_rules_iris_missing2_MISSINGS.txt"]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
예제 #4
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris.csv   | 10      | 10     | 10     | {}                   | 000004    | Iris-setosa    | 0.2629     |
                | ../data/grades.csv | 10      | 10     | 10     | {}                   | 000005    | 68.62224       | 27.5358    |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20}      | 000005    | 46.69889      | 37.27594297134128   |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100}     | 000005    | 28.06      | 24.86634   |

        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa',
            '0.2629'
        ],
                    [
                        'data/grades.csv', '10', '10', '10', '{}', '000005',
                        '68.62224', '27.5358'
                    ],
                    [
                        'data/grades.csv', '10', '10', '10', '{"Midterm": 20}',
                        '000005', '46.69889', '37.27594297134128'
                    ],
                    [
                        'data/grades.csv', '10', '10', '10',
                        '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}',
                        '000005', '28.06', '24.86634'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario5(self):
        """
            Scenario: Successfully comparing predictions in operating kind for models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>" in "<operating_kind>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>" in "<operating_kind>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_point


        """
        examples = [[
            'data/iris.csv', '10', '50', '50',
            '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor',
            "probability", "000004"
        ],
                    [
                        'data/iris.csv', '10', '50', '50',
                        '{"petal length": 2.46, "sepal length": 5}',
                        'Iris-versicolor', "confidence", "000004"
                    ],
                    [
                        'data/iris.csv', '10', '50', '50',
                        '{"petal length": 2}', 'Iris-setosa', "probability",
                        "000004"
                    ],
                    [
                        'data/iris.csv', '10', '50', '50',
                        '{"petal length": 2}', 'Iris-setosa', "confidence",
                        "000004"
                    ]]
        show_doc(self.test_scenario5, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction_op_kind(
                self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_prediction_op_kind(
                self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])
예제 #6
0
    def test_scenario1(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5}                   | 000004    | Iris-setosa |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 6, "petal width": 2}  | 000004    | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 4, "petal width": 1.5}| 000004    | Iris-versicolor |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004    | Iris-versicolor |

        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}',
            '000004', 'Iris-setosa'
        ],
                    [
                        'data/iris.csv', '10', '10', '10',
                        '{"petal length": 6, "petal width": 2}', '000004',
                        'Iris-virginica'
                    ],
                    [
                        'data/iris.csv', '10', '10', '10',
                        '{"petal length": 4, "petal width": 1.5}', '000004',
                        'Iris-versicolor'
                    ],
                    [
                        'data/iris_sp_chars.csv', '10', '10', '10',
                        '{"pétal.length": 4, "pétal&width\u0000": 1.5}',
                        '000004', 'Iris-versicolor'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | objective | prediction  |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"}             | 000000    | ham    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}        | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"}          | 000000    | spam   |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"}            | 000000    | spam    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}       | 000000    | spam     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"}       | 000000    | ham     |
                | ../data/movies.csv | 20      | 20     | 30     | {"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}} |{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}'| 000009| 3.93064
                | ../data/text_missing.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{}       | 000003 | paperwork     |


        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'],
            ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | objective | prediction  |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"}             | 000000    | ham    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}        | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"}          | 000000    | spam   |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"}            | 000000    | spam    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}       | 000000    | spam     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"}       | 000000    | ham     |
                | ../data/movies.csv | 20      | 20     | 30     | {"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}} |{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}'| 000009| 3.93064
                | ../data/text_missing.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{}       | 000003 | paperwork     |


        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'],
            ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy and balanced models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a balanced model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"
                And I create local probabilities for "<data_input>"
                Then the local probabilities are "<probabilities>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            [
                'data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004',
                'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'
            ],
            [
                'data/iris_unbalanced.csv', '10', '10', '10',
                '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}',
                '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]'
            ]
        ]
        show_doc(self.test_scenario10, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_balanced_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.the_local_prediction_confidence_is(
                self, example[7])
            prediction_compare.i_create_local_probabilities(self, example[4])
            prediction_compare.the_local_probabilities_are(self, example[8])
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris.csv   | 10      | 10     | 10     | {}                   | 000004    | Iris-setosa    | 0.2629     |
                | ../data/grades.csv | 10      | 10     | 10     | {}                   | 000005    | 68.62224       | 27.5358    |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20}      | 000005    | 46.69889      | 37.27594297134128   |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100}     | 000005    | 28.06      | 24.86634   |

        """
        print self.test_scenario3.__doc__
        examples = [
            ["data/iris.csv", "10", "10", "10", "{}", "000004", "Iris-setosa", "0.2629"],
            ["data/grades.csv", "10", "10", "10", "{}", "000005", "68.62224", "27.5358"],
            ["data/grades.csv", "10", "10", "10", '{"Midterm": 20}', "000005", "46.69889", "37.27594297134128"],
            [
                "data/grades.csv",
                "10",
                "10",
                "10",
                '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}',
                "000005",
                "28.06",
                "24.86634",
            ],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
예제 #11
0
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with missing splits
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1}             | 000004    | Iris-setosa    | 0.8064     |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1, "petal length": 4}             | 000004    | Iris-versicolor    | 0.7847     |

        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1}',
            '000004', 'Iris-setosa', '0.8064'
        ],
                    [
                        'data/iris_missing2.csv', '10', '10', '10',
                        '{"petal width": 1, "petal length": 4}', '000004',
                        'Iris-versicolor', '0.7847'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(
                self, example[7])
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with missing splits
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1}             | 000004    | Iris-setosa    | 0.8064     |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1, "petal length": 4}             | 000004    | Iris-versicolor    | 0.7847     |

        """
        print self.test_scenario6.__doc__
        examples = [
            ["data/iris_missing2.csv", "10", "10", "10", '{"petal width": 1}', "000004", "Iris-setosa", "0.8064"],
            [
                "data/iris_missing2.csv",
                "10",
                "10",
                "10",
                '{"petal width": 1, "petal length": 4}',
                "000004",
                "Iris-versicolor",
                "0.7847",
            ],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
    def test_scenario1(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5}                   | 000004    | Iris-setosa |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 6, "petal width": 2}  | 000004    | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 4, "petal width": 1.5}| 000004    | Iris-versicolor |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004    | Iris-versicolor |

        """
        print self.test_scenario1.__doc__
        examples = [
            ["data/iris.csv", "10", "10", "10", '{"petal width": 0.5}', "000004", "Iris-setosa"],
            ["data/iris.csv", "10", "10", "10", '{"petal length": 6, "petal width": 2}', "000004", "Iris-virginica"],
            ["data/iris.csv", "10", "10", "10", '{"petal length": 4, "petal width": 1.5}', "000004", "Iris-versicolor"],
            [
                "data/iris_sp_chars.csv",
                "10",
                "10",
                "10",
                '{"pétal.length": 4, "pétal&width\u0000": 1.5}',
                "000004",
                "Iris-versicolor",
            ],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
예제 #14
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |

        """
        examples = [
            [
                'data/iris_missing.csv', '30',
                '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}',
                '30', '{"sepal length": "foo", "petal length": 3}', '000004',
                'Iris-versicolor'
            ],
            [
                'data/iris_missing.csv', '30',
                '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}',
                '30',
                '{"sepal length": "foo", "petal length": 5, "petal width": 1.5}',
                '000004', 'Iris-virginica'
            ]
        ]

        show_doc(self.test_scenario3, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[2])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[3])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario8(self):
        """
            Scenario: Successfully comparing predictions with text options and proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:

        """
        examples = [
            [
                'data/text_missing.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}',
                '{}', "000003", 'swap'
            ],
            [
                'data/text_missing.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}',
                '{"category1": "a"}', "000003", 'paperwork'
            ]
        ]
        show_doc(self.test_scenario8, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
예제 #16
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction using a shared model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I make the model shared
                And I wait until the model is ready less than <time_3> secs
                And I get the model sharing info
                And I check the model status using the model's shared url
                And I check the model status using the model's shared key
                And I create a local model
                When I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"


                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | Iris-setosa |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}',
            'Iris-setosa'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.make_the_model_shared(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.get_sharing_info(self)
            model_create.model_from_shared_url(self)
            model_create.model_from_shared_key(self)
            compare_pred.i_create_a_local_model(self)
            compare_pred.i_create_a_local_prediction(self, example[4])
            compare_pred.the_local_prediction_is(self, example[5])
    def test_scenario3(self):
        """
            Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with "<options>" waiting less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | options  |   expected_file                                        |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_1.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} | data/model/if_then_rules_spam_textanalysis_2.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_3.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_4.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_5.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_6.txt              |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}','data/model/if_then_rules_spam_textanalysis_1.txt'],
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', 'data/model/if_then_rules_spam_textanalysis_2.txt'],
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_3.txt'],
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_4.txt'],
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_5.txt'],
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_6.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[5])
    def test_scenario6(self):
        """
            Scenario: Successfully creating a model and check its summary information:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the model summary with "<expected_file>" file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                        |
                | data/iris.csv          | 10      | 10     | 10     | data/model/summarize_iris.txt              |
                | data/iris_sp_chars.csv | 10      | 10     | 10     | data/model/summarize_iris_sp_chars.txt     |
                | data/spam.csv          | 20      | 20     | 30     | data/model/summarize_spam.txt              |
                | data/grades.csv        | 10      | 10     | 10     | data/model/summarize_grades.txt            |
                | data/diabetes.csv      | 20      | 20     | 30     | data/model/summarize_diabetes.txt          |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/summarize_iris_missing2.txt     |
                | data/tiny_kdd.csv      | 20      | 20     | 30     | data/model/summarize_tiny_kdd.txt          |

        """
        print self.test_scenario6.__doc__
        examples = [
            ["data/iris.csv", "10", "10", "10", "data/model/summarize_iris.txt"],
            ["data/iris_sp_chars.csv", "10", "10", "10", "data/model/summarize_iris_sp_chars.txt"],
            ["data/spam.csv", "10", "10", "10", "data/model/summarize_spam.txt"],
            ["data/grades.csv", "10", "10", "10", "data/model/summarize_grades.txt"],
            ["data/diabetes.csv", "10", "10", "10", "data/model/summarize_diabetes.txt"],
            ["data/iris_missing2.csv", "10", "10", "10", "data/model/summarize_iris_missing2.txt"],
            ["data/tiny_kdd.csv", "10", "10", "10", "data/model/summarize_tiny_kdd.txt"],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_check_the_model_summary_with(self, example[4])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                        |
                | data/iris.csv          | 10      | 10     | 10     | data/model/if_then_rules_iris.txt              |
                | data/iris_sp_chars.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_sp_chars.txt     |
                | data/spam.csv          | 20      | 20     | 30     | data/model/if_then_rules_spam.txt              |
                | data/grades.csv        | 10      | 10     | 10     | data/model/if_then_rules_grades.txt            |
                | data/diabetes.csv      | 20      | 20     | 30     | data/model/if_then_rules_diabetes.txt          |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_missing2.txt     |
                | data/tiny_kdd.csv      | 20      | 20     | 30     | data/model/if_then_rules_tiny_kdd.txt          |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '30', '30', '30', 'data/model/if_then_rules_iris.txt'],
            ['data/iris_sp_chars.csv', '30', '30', '30', 'data/model/if_then_rules_iris_sp_chars.txt'],
            ['data/spam.csv', '30', '30', '30', 'data/model/if_then_rules_spam.txt'],
            ['data/grades.csv', '30', '30', '30', 'data/model/if_then_rules_grades.txt'],
            ['data/diabetes.csv', '30', '30', '30', 'data/model/if_then_rules_diabetes.txt'],
            ['data/iris_missing2.csv', '30', '30', '30', 'data/model/if_then_rules_iris_missing2.txt'],
            ['data/tiny_kdd.csv', '30', '30', '30', 'data/model/if_then_rules_tiny_kdd.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
    def test_scenario3(self):
        """
            Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with "<options>" waiting less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | options  |   expected_file                                        |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_1.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} | data/model/if_then_rules_spam_textanalysis_2.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_3.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_4.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_5.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_6.txt              |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}','data/model/if_then_rules_spam_textanalysis_1.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', 'data/model/if_then_rules_spam_textanalysis_2.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_3.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_4.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_5.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_6.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[5])
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy and balanced models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a balanced model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"
                And I create local probabilities for "<data_input>"
                Then the local probabilities are "<probabilities>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'],
            ['data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]']]
        show_doc(self.test_scenario10, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_balanced_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
            prediction_compare.i_create_local_probabilities(self, example[4])
            prediction_compare.the_local_probabilities_are(self, example[8])
    def test_scenario6(self):
        """
            Scenario: Successfully creating a model and check its summary information:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the model summary with "<expected_file>" file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                        |
                | data/iris.csv          | 10      | 10     | 10     | data/model/summarize_iris.txt              |
                | data/iris_sp_chars.csv | 10      | 10     | 10     | data/model/summarize_iris_sp_chars.txt     |
                | data/spam.csv          | 20      | 20     | 30     | data/model/summarize_spam.txt              |
                | data/grades.csv        | 10      | 10     | 10     | data/model/summarize_grades.txt            |
                | data/diabetes.csv      | 20      | 20     | 30     | data/model/summarize_diabetes.txt          |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/summarize_iris_missing2.txt     |
                | data/tiny_kdd.csv      | 20      | 20     | 30     | data/model/summarize_tiny_kdd.txt          |

        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', 'data/model/summarize_iris.txt'],
            ['data/iris_sp_chars.csv', '10', '10', '10', 'data/model/summarize_iris_sp_chars.txt'],
            ['data/spam.csv', '10', '10', '10', 'data/model/summarize_spam.txt'],
            ['data/grades.csv', '10', '10', '10', 'data/model/summarize_grades.txt'],
            ['data/diabetes.csv', '10', '10', '10', 'data/model/summarize_diabetes.txt'],
            ['data/iris_missing2.csv', '10', '10', '10', 'data/model/summarize_iris_missing2.txt'],
            ['data/tiny_kdd.csv', '10', '10', '10', 'data/model/summarize_tiny_kdd.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_check_the_model_summary_with(self, example[4])
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            ['data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.2629'],
            ['data/grades.csv', '10', '10', '10', '{}', '000005', '68.62224', '27.5358'],
            ['data/grades.csv', '10', '10', '10', '{"Midterm": 20}', '000005', '40.46667', '54.89713'],
            ['data/grades.csv', '10', '10', '10', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '25.65806']]
        show_doc(self.test_scenario3, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction using a shared model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I make the model shared
                And I wait until the model is ready less than <time_3> secs
                And I get the model sharing info
                And I check the model status using the model's shared url
                And I check the model status using the model's shared key
                And I create a local model
                When I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"


                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | Iris-setosa |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.make_the_model_shared(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.get_sharing_info(self)
            model_create.model_from_shared_url(self)
            model_create.model_from_shared_key(self)
            compare_pred.i_create_a_local_model(self)
            compare_pred.i_create_a_local_prediction(self, example[4])
            compare_pred.the_local_prediction_is(self, example[5])
예제 #25
0
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions in operating points for models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>" in "<operating_point>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>" in "<operating_point>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_point


        """
        examples = [
            ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-setosa',  {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-versicolor', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}',  'Iris-setosa', {"kind": "confidence", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor',  {"kind": "confidence", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"]]
        show_doc(self.test_scenario2, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction_op(self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_prediction_op(self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])
    def test_scenario11(self):
        """
            Scenario: Successfully comparing predictions with text options and proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | ../data/text_missing.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{}       | paperwork     |

        """
        print self.test_scenario11.__doc__
        examples = [
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003",'swap'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"category1": "a"}', "000003",'paperwork']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
    def test_scenario1(self):
        """
            Scenario: Successfully changing duplicated field names:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset with "<options>"
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                Then "<field_id>" field's name is changed to "<new_name>"

        Examples:
                | data             | time_1  | time_2 | time_3 | options | field_id | new_name
                | ../data/iris.csv | 20      | 20     | 30     | {"fields": {"000001": {"name": "species"}}} | 000001 | species1
                | ../data/iris.csv | 20      | 20     | 30     | {"fields": {"000001": {"name": "petal width"}}} | 000001 | petal width1
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', '20', '20', '30',
            '{"fields": {"000001": {"name": "species"}}}', '000001', 'species1'
        ],
                    [
                        'data/iris.csv', '20', '20', '30',
                        '{"fields": {"000001": {"name": "petal width"}}}',
                        '000003', 'petal width3'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset_with(self, example[4])
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[3])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_preds.i_create_a_local_model(self)
            model_create.field_name_to_new_name(self, example[5], example[6])