コード例 #1
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction in DEV mode:
                Given I want to use api in DEV mode
                When I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And the source has DEV True 
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_read.source_has_dev(self, True)
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
コード例 #2
0
ファイル: test_01_prediction.py プロジェクト: jaor/python
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal&width\u0000": 0.5} | 000004    | Iris-setosa |

        """
        print self.test_scenario1.__doc__
        examples = [
            ["data/iris.csv", "10", "10", "10", '{"petal width": 0.5}', "000004", "Iris-setosa"],
            ["data/iris_sp_chars.csv", "10", "10", "10", '{"pétal&width\u0000": 0.5}', "000004", "Iris-setosa"],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
コード例 #3
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction with a user's project connection:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And the source is in the project
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file_with_project_conn(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            assert world.source['project'] == world.project_id
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            assert world.dataset['project'] == world.project_id
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            assert world.model['project'] == world.project_id
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            assert world.prediction['project'] == world.project_id
コード例 #4
0
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions for linear regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a linear regression with objective "<objective>" and "<params>"
                And I wait until the linear regression is ready less than <time_3> secs
                And I create a local linear regression
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params

        """
        examples = [
            [
                'data/grades.csv', '10', '50', '30000',
                '{"000000": 1, "000001": 1, "000002": 1}', '000005', 29.63024,
                '{"input_fields": ["000000", "000001", "000002"]}'
            ],
            [
                'data/iris.csv', '10', '50', '30000',
                '{"000000": 1, "000001": 1, "000004": "Iris-virginica"}',
                '000003', 1.21187,
                '{"input_fields": ["000000", "000001", "000004"]}'
            ],
            [
                'data/movies.csv', '10', '50', '30000', '{"000007": "Action"}',
                '000009', 4.33333, '{"input_fields": ["000007"]}'
            ],
            [
                'data/movies.csv', '10', '50', '30000', '{"000006": "1999"}',
                '000009', 3.28427,
                '{"input_fields": ["000006"], "bias": false}'
            ]
        ]
        show_doc(self.test_scenario10, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            linear_create.i_create_a_linear_regression_with_objective_and_params( \
                self, example[5], example[7])
            linear_create.the_linear_regression_is_finished_in_less_than( \
                self, example[3])
            prediction_compare.i_create_a_local_linear(self)
            prediction_create.i_create_a_linear_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_linear_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #5
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction using a public model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I make the model public
                And I wait until the model is ready less than <time_3> secs
                And I check the model status using the model's public url
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.make_the_model_public(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.model_from_public_url(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
コード例 #6
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction with a user's project connection:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And the source is in the project
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file_with_project_conn(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            assert world.source['project'] == world.project_id
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            assert world.dataset['project'] == world.project_id
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            assert world.model['project'] == world.project_id
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            assert world.prediction['project'] == world.project_id
コード例 #7
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction from a source in a remote location

                Given I create a data source using the url "<url>"
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | url                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | s3://bigml-public/csv/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
        """
        print self.test_scenario2.__doc__
        examples = [[
            's3://bigml-public/csv/iris.csv', '10', '10', '10',
            '{"petal width": 0.5}', '000004', 'Iris-setosa'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_create_using_url(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
コード例 #8
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction using a public model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I make the model public
                And I wait until the model is ready less than <time_3> secs
                And I check the model status using the model's public url
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.make_the_model_public(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.model_from_public_url(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
コード例 #9
0
ファイル: test_01_prediction.py プロジェクト: GregTarr/python
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction from a source in a remote location

                Given I create a data source using the url "<url>"
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | url                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | s3://bigml-public/csv/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['s3://bigml-public/csv/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_create_using_url(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
コード例 #10
0
ファイル: test_43_linear.py プロジェクト: vinnu007/python
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction from linear regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a pca
                And I wait until the linear regression is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    |objective | prediction  |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/grades.csv', '30', '30', '30', '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', "000005", '2.27312', '{}'],
            ['data/grades.csv', '30', '30', '30', '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', "000005", '8.19619', '{"bias": false}']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            linear_create.i_create_a_linear_regression_with_objective_and_params(self, example[5], example[7])
            linear_create.the_linear_regression_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_linear_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])

        print "\nEnd of tests in: %s\n-------------------\n" % __name__
コード例 #11
0
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions for fusions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I create a local fusion
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params| tag | data_input                             | objective | prediction  | params

        """
        examples = [[
            'data/iris_unbalanced.csv', '30', '30', '120', '120',
            '{"tags":["my_fusion_tag"]}', 'my_fusion_tag',
            '{"petal width": 4}', '000004', 'Iris-virginica'
        ],
                    [
                        'data/grades.csv', '30', '30', '120', '120',
                        '{"tags":["my_fusion_tag_reg"]}', 'my_fusion_tag_reg',
                        '{"Midterm": 20}', '000005', 43.65286
                    ]]
        show_doc(self.test_scenario10, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_retrieve_a_list_of_remote_models(
                self, example[6])
            model_create.i_create_a_fusion(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            prediction_compare.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[7])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_compare.i_create_a_local_prediction(self, example[7])
            prediction_compare.the_local_prediction_is(self, example[9])
コード例 #12
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris.csv   | 10      | 10     | 10     | {}                   | 000004    | Iris-setosa    | 0.2629     |
                | ../data/grades.csv | 10      | 10     | 10     | {}                   | 000005    | 68.62224       | 27.5358    |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20}      | 000005    | 46.69889      | 37.27594297134128   |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100}     | 000005    | 28.06      | 24.86634   |

        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa',
            '0.2629'
        ],
                    [
                        'data/grades.csv', '10', '10', '10', '{}', '000005',
                        '68.62224', '27.5358'
                    ],
                    [
                        'data/grades.csv', '10', '10', '10', '{"Midterm": 20}',
                        '000005', '46.69889', '37.27594297134128'
                    ],
                    [
                        'data/grades.csv', '10', '10', '10',
                        '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}',
                        '000005', '28.06', '24.86634'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #13
0
    def test_scenario1(self):
        """
            Scenario: Successfully comparing predictions for deepnets:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a deepnet with objective "<objective>" and "<params>"
                And I wait until the deepnet is ready less than <time_3> secs
                And I create a local deepnet
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params,


        """
        examples = [[
            'data/iris.csv', '30', '50', '30000', '{"petal width": 4}',
            '000004', 'Iris-virginica', '{}'
        ],
                    [
                        'data/iris.csv', '30', '50', '30000',
                        '{"sepal length": 4.1, "sepal width": 2.4}', '000004',
                        'Iris-setosa', '{}'
                    ],
                    [
                        'data/iris_missing2.csv', '30', '50', '30000', '{}',
                        '000004', 'Iris-setosa', '{}'
                    ],
                    [
                        'data/grades.csv', '30', '50', '30000', '{}', '000005',
                        42.15473, '{}'
                    ],
                    [
                        'data/spam.csv', '30', '50', '30000', '{}', '000000',
                        'ham', '{}'
                    ]]
        show_doc(self.test_scenario1, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_deepnet_with_objective_and_params(
                self, example[5], example[7])
            model_create.the_deepnet_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_deepnet(self)
            prediction_create.i_create_a_deepnet_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_deepnet_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #14
0
    def test_scenario11(self):
        """
            Scenario: Successfully comparing remote and local predictions
                      with raw date input for linear regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a linear regression
                And I wait until the linear regression is ready
                less than <time_3> secs
                And I create a local linear regression
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                |data|time_1|time_2|time_3|data_input|objective|prediction

        """
        examples = [
            ['data/dates2.csv', '20', '20', '25',
             '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}',
             '000002',  -0.01284],
            ['data/dates2.csv', '20', '20', '25',
             '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}',
             '000002', -0.09459],
            ['data/dates2.csv', '20', '20', '25',
             '{"time-1": "1932-01-30T19:24:11.440",  "cat-0":"cat2"}',
             '000002', -0.02259],
            ['data/dates2.csv', '20', '20', '25',
             '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}',
             '000002', -0.06754],
            ['data/dates2.csv', '20', '20', '25',
             '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}',
             '000002', 0.05204],
            ['data/dates2.csv', '20', '20', '25',
             '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}',
             '000002', 0.05878]]
        show_doc(self.test_scenario11, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[2])
            linear_create.i_create_a_linear_regression(self)
            linear_create.the_linear_regression_is_finished_in_less_than(self,
                                                                         example[3])
            prediction_compare.i_create_a_local_linear(self)
            prediction_create.i_create_a_linear_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_linear_prediction(self,
                                                                  example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #15
0
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions for ensembles:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params

            ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 46.261364, '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-setosa', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],


        """
        examples = [
            [
                'data/iris_unbalanced.csv', '10', '10', '120',
                '{"petal width": 4}', '000004', 'Iris-virginica',
                '{"boosting": {"iterations": 5}, "number_of_models": 5}'
            ],
            [
                'data/grades.csv', '10', '10', '120', '{"Midterm": 20}',
                '000005', 61.61036,
                '{"boosting": {"iterations": 5}, "number_of_models": 5}'
            ]
        ]
        show_doc(self.test_scenario6, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[7])
            ensemble_create.the_ensemble_is_finished_in_less_than(
                self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_ensemble_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #16
0
    def test_scenario2(self):
        """
            Scenario 2: Successfully creating a fusion:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I update the fusion name to "<fusion_name>"
                When I wait until the fusion is ready less than <time_5> secs
                And I create a prediction for "<data_input>"
                Then the fusion name is "<fusion_name>"
                And the prediction for "<objective>" is "<prediction>"
                And I create an evaluation for the fusion with the dataset
                And I wait until the evaluation is ready less than <time_4> secs
                Then the measured "<measure>" is <value>

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | fusion_name | data_input | objective | prediction
                | ../data/iris.csv | 10      | 10     | 20     | 20 | my new fusion name | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa"
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '20', '20', 'my new fusion name',
             '{"tags":["my_fusion_2_tag"]}', 'my_fusion_2_tag',
             '{"petal width": 1.75, "petal length": 2.45}', "000004",
             "Iris-setosa", 'average_phi', '1.0']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with(self, example[6])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[6])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[6])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_models(self, example[7])
            model_create.i_create_a_fusion(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[3])
            model_create.i_update_fusion_name(self, example[5])
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            model_create.i_check_fusion_name(self, example[5])
            prediction_create.i_create_a_fusion_prediction(self, example[8])
            prediction_create.the_prediction_is(self, example[9], example[10])
            evaluation_create.i_create_an_evaluation_fusion(self)
            evaluation_create.the_evaluation_is_finished_in_less_than(self, example[3])
            evaluation_create.the_measured_measure_is_value(self, example[11], example[12])
コード例 #17
0
    def test_scenario5(self):
        """
            Scenario: Successfully comparing predictions in operating kind for models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>" in "<operating_kind>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>" in "<operating_kind>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_point


        """
        examples = [[
            'data/iris.csv', '10', '50', '50',
            '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor',
            "probability", "000004"
        ],
                    [
                        'data/iris.csv', '10', '50', '50',
                        '{"petal length": 2.46, "sepal length": 5}',
                        'Iris-versicolor', "confidence", "000004"
                    ],
                    [
                        'data/iris.csv', '10', '50', '50',
                        '{"petal length": 2}', 'Iris-setosa', "probability",
                        "000004"
                    ],
                    [
                        'data/iris.csv', '10', '50', '50',
                        '{"petal length": 2}', 'Iris-setosa', "confidence",
                        "000004"
                    ]]
        show_doc(self.test_scenario5, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction_op_kind(
                self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_prediction_op_kind(
                self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])
コード例 #18
0
    def test_scenario2(self):
        """
            Scenario 2: Successfully creating a fusion:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I update the fusion name to "<fusion_name>"
                When I wait until the fusion is ready less than <time_5> secs
                And I create a prediction for "<data_input>"
                Then the fusion name is "<fusion_name>"
                And the prediction for "<objective>" is "<prediction>"
                And I create an evaluation for the fusion with the dataset
                And I wait until the evaluation is ready less than <time_4> secs
                Then the measured "<measure>" is <value>

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | fusion_name | data_input | objective | prediction
                | ../data/iris.csv | 10      | 10     | 20     | 20 | my new fusion name | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa"
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '20', '20', 'my new fusion name',
             '{"tags":["my_fusion_2_tag"]}', 'my_fusion_2_tag',
             '{"petal width": 1.75, "petal length": 2.45}', "000004",
             "Iris-setosa", 'average_phi', '1.0']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with(self, example[6])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[6])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[6])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_models(self, example[7])
            model_create.i_create_a_fusion(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[3])
            model_create.i_update_fusion_name(self, example[5])
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            model_create.i_check_fusion_name(self, example[5])
            prediction_create.i_create_a_fusion_prediction(self, example[8])
            prediction_create.the_prediction_is(self, example[9], example[10])
            evaluation_create.i_create_an_evaluation_fusion(self)
            evaluation_create.the_evaluation_is_finished_in_less_than(self, example[3])
            evaluation_create.the_measured_measure_is_value(self, example[11], example[12])
コード例 #19
0
    def test_scenario1(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5}                   | 000004    | Iris-setosa |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 6, "petal width": 2}  | 000004    | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 4, "petal width": 1.5}| 000004    | Iris-versicolor |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004    | Iris-versicolor |

        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}',
            '000004', 'Iris-setosa'
        ],
                    [
                        'data/iris.csv', '10', '10', '10',
                        '{"petal length": 6, "petal width": 2}', '000004',
                        'Iris-virginica'
                    ],
                    [
                        'data/iris.csv', '10', '10', '10',
                        '{"petal length": 4, "petal width": 1.5}', '000004',
                        'Iris-versicolor'
                    ],
                    [
                        'data/iris_sp_chars.csv', '10', '10', '10',
                        '{"pétal.length": 4, "pétal&width\u0000": 1.5}',
                        '000004', 'Iris-versicolor'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #20
0
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | objective | prediction  |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"}             | 000000    | ham    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}        | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"}          | 000000    | spam   |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"}            | 000000    | spam    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}       | 000000    | spam     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"}       | 000000    | ham     |
                | ../data/movies.csv | 20      | 20     | 30     | {"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}} |{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}'| 000009| 3.93064
                | ../data/text_missing.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{}       | 000003 | paperwork     |


        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'],
            ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
コード例 #21
0
    def test_scenario6(self):
        """
            Scenario 6: Successfully creating a fusion:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I retrieve a list of remote logistic regression tagged with "<tag>"
                And I create a fusion from a list of models and weights
                And I wait until the fusion is ready less than <time_4> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the fusion probability for the prediction is "<probability>"
                And I create a local fusion prediction for "<data_input>"
                Then the local fusion prediction is "<prediction>"
                And the local fusion probability for the prediction is "<probability>"

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | data_input | objective | prediction
                | ../data/iris.csv | 10      | 10     | 20     | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa"
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '20', '20',
             '{"tags":["my_fusion_6_tag"], "missing_numerics": true}',
             'my_fusion_6_tag',
             '{"petal width": 1.75, "petal length": 2.45}',
             "000004",
             "Iris-setosa",
             '0.4727',
             '{"tags":["my_fusion_6_tag"], "missing_numerics": false, "balance_fields": false }', '[1, 2]']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[11])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6])
            model_create.i_create_a_fusion_with_weights(self, example[12])
            model_create.the_fusion_is_finished_in_less_than(self, example[3])
            compare_pred.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[7])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_create.the_fusion_probability_is(self, example[10])
            compare_pred.i_create_a_local_prediction(self, example[7])
            compare_pred.the_local_prediction_is(self, example[9])
            compare_pred.the_local_probability_is(self, example[10])
コード例 #22
0
    def test_scenario6(self):
        """
            Scenario 6: Successfully creating a fusion:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a logistic regression with "<params>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I retrieve a list of remote logistic regression tagged with "<tag>"
                And I create a fusion from a list of models and weights
                And I wait until the fusion is ready less than <time_4> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the fusion probability for the prediction is "<probability>"
                And I create a local fusion prediction for "<data_input>"
                Then the local fusion prediction is "<prediction>"
                And the local fusion probability for the prediction is "<probability>"

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | data_input | objective | prediction
                | ../data/iris.csv | 10      | 10     | 20     | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa"
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '20', '20',
             '{"tags":["my_fusion_6_tag"], "missing_numerics": true}',
             'my_fusion_6_tag',
             '{"petal width": 1.75, "petal length": 2.45}',
             "000004",
             "Iris-setosa",
             '0.4727',
             '{"tags":["my_fusion_6_tag"], "missing_numerics": false, "balance_fields": false }', '[1, 2]']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[11])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6])
            model_create.i_create_a_fusion_with_weights(self, example[12])
            model_create.the_fusion_is_finished_in_less_than(self, example[3])
            compare_pred.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[7])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_create.the_fusion_probability_is(self, example[10])
            compare_pred.i_create_a_local_prediction(self, example[7])
            compare_pred.the_local_prediction_is(self, example[9])
            compare_pred.the_local_probability_is(self, example[10])
コード例 #23
0
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions for ensembles with proportional missing strategy in a supervised model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an esemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence | params
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'],


        """
        examples = [[
            'data/iris.csv', '10', '10', '50', '{}', '000004',
            'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}
        ],
                    [
                        'data/iris.csv', '10', '10', '50', '{}', '000004',
                        'Iris-versicolor', '0.27261',
                        '{"number_of_models": 5"}', {
                            "operating_kind": "confidence"
                        }
                    ]]
        show_doc(self.test_scenario7, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[8])
            ensemble_create.the_ensemble_is_finished_in_less_than(
                self, example[3])
            ensemble_create.create_local_supervised_ensemble(self)
            prediction_create.i_create_an_ensemble_proportional_prediction(
                self, example[4], example[9])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_create.create_local_ensemble_proportional_prediction_with_confidence(
                self, example[4], example[9])
            prediction_compare.the_local_ensemble_prediction_is(
                self, example[6])
            prediction_compare.the_local_prediction_confidence_is(
                self, example[7])
コード例 #24
0
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | objective | prediction  |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile call"}             | 000000    | ham    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}        | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "Mobile calls"}          | 000000    | spam   |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "Mobile call"}            | 000000    | spam    |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Message": "A normal message"}       | 000000    | ham     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}       | 000000    | spam     |
                | ../data/spam.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "Ok"}       | 000000    | ham     |
                | ../data/movies.csv | 20      | 20     | 30     | {"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}} |{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}'| 000009| 3.93064
                | ../data/text_missing.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{}       | 000003 | paperwork     |


        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'],
            ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
コード例 #25
0
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy and balanced models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a balanced model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"
                And I create local probabilities for "<data_input>"
                Then the local probabilities are "<probabilities>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            [
                'data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004',
                'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'
            ],
            [
                'data/iris_unbalanced.csv', '10', '10', '10',
                '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}',
                '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]'
            ]
        ]
        show_doc(self.test_scenario10, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_balanced_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.the_local_prediction_confidence_is(
                self, example[7])
            prediction_compare.i_create_local_probabilities(self, example[4])
            prediction_compare.the_local_probabilities_are(self, example[8])
コード例 #26
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction from an ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble of <number_of_models> models and <tlp> tlp
                And I wait until the ensemble is ready less than <time_3> secs
                When I create an ensemble prediction for "<data_input>"
                And I wait until the prediction is ready less than <time_4> secs
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data               | time_1  | time_2 | time_3 | time_4 | number_of_models | tlp   |  data_input    | objective | prediction  |
                | ../data/iris.csv   | 10      | 10     | 50     | 20     | 5                | 1     | {"petal width": 0.5} | 000004    | Iris-versicolor |
                | ../data/iris_sp_chars.csv   | 10      | 10     | 50     | 20     | 5                | 1     | {"pétal&width\u0000": 0.5} | 000004    | Iris-versicolor |
                | ../data/grades.csv | 10      | 10     | 150     | 20     | 10               | 1     | {"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93} | 000005    | 88.205575 |
                | ../data/grades.csv | 10      | 10     | 150     | 20     | 10               | 1     | {"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89} | 000005    | 84.29401 |
        """
        print self.test_scenario1.__doc__
        examples = [
            [
                'data/iris.csv', '30', '30', '50', '20', '5', '1',
                '{"petal width": 0.5}', '000004', 'Iris-versicolor'
            ],
            [
                'data/iris_sp_chars.csv', '30', '30', '50', '20', '5', '1',
                '{"pétal&width\u0000": 0.5}', '000004', 'Iris-versicolor'
            ],
            [
                'data/grades.csv', '30', '30', '150', '20', '10', '1',
                '{"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93}',
                '000005', '84.556'
            ],
            [
                'data/grades.csv', '30', '30', '150', '20', '10', '1',
                '{"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89}',
                '000005', '73.13558'
            ]
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            ensemble_create.i_create_an_ensemble(self, example[5], example[6])
            ensemble_create.the_ensemble_is_finished_in_less_than(
                self, example[3])
            prediction_create.i_create_an_ensemble_prediction(self, example[7])
            prediction_create.the_prediction_is_finished_in_less_than(
                self, example[4])
            prediction_create.the_prediction_is(self, example[8], example[9])
コード例 #27
0
    def test_scenario13(self):
        """
            Scenario: Successfully comparing predictions for fusions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I create a local fusion
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params| tag | data_input                             | objective | prediction  | params

        """
        examples = [[
            'data/grades.csv', '30', '30', '120', '120', 'my_fusion_tag_lreg',
            '{"000000": 10, "000001": 10, "000002": 10, "000003": 10, "000004": 10}',
            '000005', 21.01712
        ]]
        show_doc(self.test_scenario13, examples)

        for example in examples:
            print "\nTesting with:\n", example
            tag = "%s_%s" % (example[5], PY3)
            tag_args = '{"tags":["%s"]}' % tag
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            linear_create.i_create_a_linear_regression_with_params(
                self, tag_args)
            linear_create.the_linear_regression_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_retrieve_a_list_of_remote_linear_regressions(
                self, tag)
            model_create.i_create_a_fusion(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            prediction_compare.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[6])
            prediction_create.the_prediction_is(self, example[7], example[8])
            prediction_compare.i_create_a_local_prediction(self, example[6])
            prediction_compare.the_local_prediction_is(self, example[8])
コード例 #28
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris.csv   | 10      | 10     | 10     | {}                   | 000004    | Iris-setosa    | 0.2629     |
                | ../data/grades.csv | 10      | 10     | 10     | {}                   | 000005    | 68.62224       | 27.5358    |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20}      | 000005    | 46.69889      | 37.27594297134128   |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100}     | 000005    | 28.06      | 24.86634   |

        """
        print self.test_scenario3.__doc__
        examples = [
            ["data/iris.csv", "10", "10", "10", "{}", "000004", "Iris-setosa", "0.2629"],
            ["data/grades.csv", "10", "10", "10", "{}", "000005", "68.62224", "27.5358"],
            ["data/grades.csv", "10", "10", "10", '{"Midterm": 20}', "000005", "46.69889", "37.27594297134128"],
            [
                "data/grades.csv",
                "10",
                "10",
                "10",
                '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}',
                "000005",
                "28.06",
                "24.86634",
            ],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #29
0
    def test_scenario12(self):
        """
            Scenario: Successfully comparing predictions for fusions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I create a local fusion
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params| tag | data_input                             | objective | prediction  | params

        """
        tag = "my_fusion_tag_12_%s" % PY3
        tag_reg = "my_fusion_tag_12_reg_%s" % PY3
        examples = [
            ['data/iris_unbalanced.csv', '30', '30', '120', '120', '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag, tag, '{"petal width": 4}', '000004', 'Iris-virginica'],
            ['data/grades.csv', '30', '30', '120', '120', '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag_reg, tag_reg, '{"Midterm": 20}', '000005', 44.37625]]
        show_doc(self.test_scenario12, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_retrieve_a_list_of_remote_models(self, example[6])
            model_create.i_create_a_fusion_with_weights(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            prediction_compare.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction(self, example[7])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_compare.i_create_a_local_prediction(self, example[7])
            prediction_compare.the_local_prediction_is(self, example[9])
コード例 #30
0
    def test_scenario11(self):
        """
            Scenario: Successfully comparing predictions in operating points for fusions:
            Scenario: Successfully comparing predictions for fusions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I create a local fusion
                When I create a prediction for "<data_input>" in "<operating_point>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local fusion prediction for "<data_input>" in "<operating_point>"
                Then the local ensemble prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params| tag | data_input                             | objective | prediction  | params | operating_point


        """
        examples = [
            ['data/iris_unbalanced.csv', '30', '30', '120', '120', '{"tags":["my_fusion_tag_11"]}', 'my_fusion_tag_11', '{"petal width": 4}', '000004', 'Iris-virginica',  {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}],
           ['data/iris_unbalanced.csv', '30', '30', '120', '120', '{"tags":["my_fusion_tag_11_b"]}', 'my_fusion_tag_11_b', '{"petal width": 4}', '000004', 'Iris-virginica',  {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}]]
        show_doc(self.test_scenario11, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_retrieve_a_list_of_remote_models(self, example[6])
            model_create.i_create_a_fusion(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            prediction_compare.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction_op(self, example[7], example[10])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_compare.i_create_a_local_prediction_op(self, example[7], example[10])
            prediction_compare.the_local_prediction_is(self, example[9])
コード例 #31
0
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with missing splits
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1}             | 000004    | Iris-setosa    | 0.8064     |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1, "petal length": 4}             | 000004    | Iris-versicolor    | 0.7847     |

        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1}',
            '000004', 'Iris-setosa', '0.8064'
        ],
                    [
                        'data/iris_missing2.csv', '10', '10', '10',
                        '{"petal width": 1, "petal length": 4}', '000004',
                        'Iris-versicolor', '0.7847'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(
                self, example[7])
コード例 #32
0
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with missing splits
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1}             | 000004    | Iris-setosa    | 0.8064     |
                | ../data/iris_missing2.csv   | 10      | 10     | 10     | {"petal width": 1, "petal length": 4}             | 000004    | Iris-versicolor    | 0.7847     |

        """
        print self.test_scenario6.__doc__
        examples = [
            ["data/iris_missing2.csv", "10", "10", "10", '{"petal width": 1}', "000004", "Iris-setosa", "0.8064"],
            [
                "data/iris_missing2.csv",
                "10",
                "10",
                "10",
                '{"petal width": 1, "petal length": 4}',
                "000004",
                "Iris-versicolor",
                "0.7847",
            ],
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
コード例 #33
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |

        """
        examples = [
            [
                'data/iris_missing.csv', '30',
                '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}',
                '30', '{"sepal length": "foo", "petal length": 3}', '000004',
                'Iris-versicolor'
            ],
            [
                'data/iris_missing.csv', '30',
                '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}',
                '30',
                '{"sepal length": "foo", "petal length": 5, "petal width": 1.5}',
                '000004', 'Iris-virginica'
            ]
        ]

        show_doc(self.test_scenario3, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[2])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[3])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #34
0
    def test_scenario8(self):
        """
            Scenario: Successfully comparing predictions with text options and proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:

        """
        examples = [
            [
                'data/text_missing.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}',
                '{}', "000003", 'swap'
            ],
            [
                'data/text_missing.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}',
                '{"category1": "a"}', "000003", 'paperwork'
            ]
        ]
        show_doc(self.test_scenario8, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(
                self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_proportional_local_prediction(
                self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
コード例 #35
0
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression with objective "<objective>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a local supervised model
                When I create a prediction with operating kind "<operating_kind>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_kind>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [[
            'data/iris.csv', '10', '50', '30000', '{"petal length": 5}',
            '000004', 'Iris-versicolor', '{}', "probability"
        ],
                    [
                        'data/iris.csv', '10', '50', '30000',
                        '{"petal length": 2}', '000004', 'Iris-setosa', '{}',
                        "probability"
                    ]]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_supervised_model(
                self, model_type="logistic_regression")
            prediction_create.i_create_a_logistic_prediction_with_op_kind(
                self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_logistic_prediction_op_kind(
                self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #36
0
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions for deepnets with operating kind:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a deepnet with objective "<objective>" and "<params>"
                And I wait until the deepnet is ready less than <time_3> secs
                And I create a local deepnet
                When I create a prediction with operating kind "<operating_kind>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_kind>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [[
            'data/iris.csv', '10', '50', '60', '{"petal length": 2.46}',
            '000004', 'Iris-setosa', '{}', "probability"
        ],
                    [
                        'data/iris.csv', '10', '50', '60',
                        '{"petal length": 2}', '000004', 'Iris-setosa', '{}',
                        "probability"
                    ]]
        show_doc(self.test_scenario6, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_deepnet_with_objective_and_params(
                self, example[5], example[7])
            model_create.the_deepnet_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_deepnet(self)
            prediction_create.i_create_a_deepnet_prediction_op_kind(
                self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_deepnet_prediction_op_kind(
                self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #37
0
    def test_scenario7(self):
        """
            Scenario: Successfully comparing predictions for ensembles with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an esemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence | params
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'],


        """
        examples = [
            ['data/iris.csv', '30', '30', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}],
            ['data/iris.csv', '30', '30', '50', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}],
            ['data/grades.csv', '30', '30', '50', '{}', '000005', '70.505792', '30.7161', '{"number_of_models": 5}', {}],
            ['data/grades.csv', '30', '30', '50', '{"Midterm": 20}', '000005', '54.82214', '25.89672', '{"number_of_models": 5}', {"operating_kind": "confidence"}],
            ['data/grades.csv', '30', '30', '50', '{"Midterm": 20}', '000005', '45.4573', '29.58403', '{"number_of_models": 5}', {}],
            ['data/grades.csv', '30', '30', '50', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '42.814', '31.51804', '{"number_of_models": 5}', {}]]
        show_doc(self.test_scenario7, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[8])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_proportional_prediction(self, example[4], example[9])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_create.create_local_ensemble_proportional_prediction_with_confidence(self, example[4], example[9])
            prediction_compare.the_local_ensemble_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
コード例 #38
0
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions for ensembles:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params

            ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 46.261364, '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-setosa', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],


        """
        examples = [
            ['data/iris_unbalanced.csv', '30', '30', '120', '{"petal width": 4}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/grades.csv', '30', '30', '120', '{"Midterm": 20}', '000005', 61.61036, '{"boosting": {"iterations": 5}, "number_of_models": 5}']]
        show_doc(self.test_scenario6, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[7])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_ensemble_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #39
0
    def test_scenario13(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |

        """
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', "tmp/my_model.json", "my_test"],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"],
            ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"]]
        show_doc(self.test_scenario13, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            args = '{"tags": ["%s"]}' % example[8]
            model_create.i_create_a_model_with(self, data=args)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_export_model(self, False, example[7]) # no pmml
            prediction_compare.i_create_a_local_model_from_file(self, example[7])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            model_create.i_export_tags_model(self, example[7], example[8])
            prediction_compare.i_create_a_local_model_from_file(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #40
0
    def test_scenario7(self):
        """
            Scenario: Successfully comparing predictions for ensembles with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an esemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence | params

        """
        examples = [
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'],
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-versicolor', '0.3174', '{"number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '50', '{}', '000005', '70.505792', '30.7161', '{"number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '50', '{"Midterm": 20}', '000005', '45.4573', '29.58403', '{"number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '50', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '42.814', '31.51804', '{"number_of_models": 5}']]
        show_doc(self.test_scenario7, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[8])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_create.create_local_ensemble_proportional_prediction_with_confidence(self, example[4])
            prediction_compare.the_local_ensemble_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
コード例 #41
0
    def test_scenario13(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |

        """
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', "tmp/my_model.json", "my_test"],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"],
            ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"]]
        show_doc(self.test_scenario13, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            args = '{"tags": ["%s"]}' % example[8]
            model_create.i_create_a_model_with(self, data=args)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_export_model(self, False, example[7]) # no pmml
            prediction_compare.i_create_a_local_model_from_file(self, example[7])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            model_create.i_export_tags_model(self, example[7], example[8])
            prediction_compare.i_create_a_local_model_from_file(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #42
0
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy and balanced models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a balanced model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"
                And I create local probabilities for "<data_input>"
                Then the local probabilities are "<probabilities>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'],
            ['data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]']]
        show_doc(self.test_scenario10, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_balanced_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
            prediction_compare.i_create_local_probabilities(self, example[4])
            prediction_compare.the_local_probabilities_are(self, example[8])
コード例 #43
0
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions for linear regression:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a linear regression with objective "<objective>" and "<params>"
                And I wait until the linear regression is ready less than <time_3> secs
                And I create a local linear regression
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params


        """
        examples = [
            ['data/grades.csv', '10', '50', '30000', '{"000000": 1, "000001": 1, "000002": 1}', '000005', 29.63024, '{"input_fields": ["000000", "000001", "000002"]}'],
            ['data/iris.csv', '10', '50', '30000', '{"000000": 1, "000001": 1, "000004": "Iris-virginica"}', '000003', 1.21187, '{"input_fields": ["000000", "000001", "000004"]}'],
            ['data/movies.csv', '10', '50', '30000', '{"000007": "Action"}', '000009', 4.33333, '{"input_fields": ["000007"]}'],
            ['data/movies.csv', '10', '50', '30000', '{"000006": "1999"}', '000009', 0, '{"input_fields": ["000006"], "bias": false}']]
        show_doc(self.test_scenario10, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            linear_create.i_create_a_linear_regression_with_objective_and_params( \
                self, example[5], example[7])
            linear_create.the_linear_regression_is_finished_in_less_than( \
                self, example[3])
            prediction_compare.i_create_a_local_linear(self)
            prediction_create.i_create_a_linear_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_linear_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #44
0
    def test_scenario1(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5}                   | 000004    | Iris-setosa |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 6, "petal width": 2}  | 000004    | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal length": 4, "petal width": 1.5}| 000004    | Iris-versicolor |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal.length": 4, "pétal&width\u0000": 1.5}| 000004    | Iris-versicolor |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa'],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor'],
            ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #45
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            ['data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.2629'],
            ['data/grades.csv', '10', '10', '10', '{}', '000005', '68.62224', '27.5358'],
            ['data/grades.csv', '10', '10', '10', '{"Midterm": 20}', '000005', '40.46667', '54.89713'],
            ['data/grades.csv', '10', '10', '10', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '25.65806']]
        show_doc(self.test_scenario3, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
コード例 #46
0
    def test_scenario7(self):
        """
            Scenario: Successfully comparing predictions in operating points for ensembles:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a prediction for "<data_input>" in "<operating_kind>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local ensemble prediction for "<data_input>" in "<operating_kind>"
                Then the local ensemble prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_kind


        """
        examples = [
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor',  "probability", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa',  "probability", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor',  "confidence", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa',  "confidence", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor',  "votes", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 1}', 'Iris-setosa',  "votes", "000004"]]
        show_doc(self.test_scenario7, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self)
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_prediction_op_kind(self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_ensemble_prediction_op_kind(self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])
コード例 #47
0
    def test_scenario1(self):
        """
            Scenario: Successfully comparing predictions for deepnets:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a deepnet with objective "<objective>" and "<params>"
                And I wait until the deepnet is ready less than <time_3> secs
                And I create a local deepnet
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params,


        """
        examples = [
            ['data/iris.csv', '30', '50', '30000', '{"petal width": 4}', '000004', 'Iris-virginica', '{}'],
            ['data/iris.csv', '30', '50', '30000', '{"sepal length": 4.1, "sepal width": 2.4}', '000004', 'Iris-setosa', '{}'],
            ['data/iris_missing2.csv', '30', '50', '30000', '{}', '000004', 'Iris-setosa', '{}'],
            ['data/grades.csv', '30', '50', '30000', '{}', '000005', 42.15473, '{}'],
            ['data/spam.csv', '30', '50', '30000', '{}', '000000', 'ham', '{}']]
        show_doc(self.test_scenario1, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_deepnet_with_objective_and_params(self, example[5], example[7])
            model_create.the_deepnet_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_deepnet(self)
            prediction_create.i_create_a_deepnet_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_deepnet_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #48
0
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions in operating points for models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>" in "<operating_point>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>" in "<operating_point>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_point


        """
        examples = [
            ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-setosa',  {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-versicolor', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}',  'Iris-setosa', {"kind": "confidence", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor',  {"kind": "confidence", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"]]
        show_doc(self.test_scenario2, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction_op(self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_prediction_op(self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])
コード例 #49
0
    def test_scenario4(self):
        """
            Scenario: Successfully comparing predictions in operating points for ensembles:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a prediction for "<data_input>" in "<operating_point>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local ensemble prediction for "<data_input>" in "<operating_point>"
                Then the local ensemble prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_point


        """
        examples = [
            ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-setosa',  {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-virginica', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}',  'Iris-setosa', {"kind": "confidence", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"],
            ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor',  {"kind": "confidence", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"]]
        show_doc(self.test_scenario4, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self)
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_prediction_op(self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_ensemble_prediction_op(self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])
コード例 #50
0
    def test_scenario11(self):
        """
            Scenario: Successfully comparing predictions with text options and proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | ../data/text_missing.csv | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, {"000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}} |{}       | paperwork     |

        """
        print self.test_scenario11.__doc__
        examples = [
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003",'swap'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"category1": "a"}', "000003",'paperwork']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
コード例 #51
0
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression with objective "<objective>"
                And I wait until the logistic regression is ready less than <time_3> secs
                And I create a local supervised model
                When I create a prediction with operating kind "<operating_kind>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_kind>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [
            ['data/iris.csv', '10', '50', '30000', '{"petal length": 5}', '000004', 'Iris-versicolor', '{}', "probability"],
            ['data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability"]]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_supervised_model(self, model_type="logistic_regression")
            prediction_create.i_create_a_logistic_prediction_with_op_kind(self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_logistic_prediction_op_kind(self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #52
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction from an ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble of <number_of_models> models and <tlp> tlp
                And I wait until the ensemble is ready less than <time_3> secs
                When I create an ensemble prediction for "<data_input>"
                And I wait until the prediction is ready less than <time_4> secs
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data               | time_1  | time_2 | time_3 | time_4 | number_of_models | tlp   |  data_input    | objective | prediction  |
                | ../data/iris.csv   | 10      | 10     | 50     | 20     | 5                | 1     | {"petal width": 0.5} | 000004    | Iris-versicolor |
                | ../data/iris_sp_chars.csv   | 10      | 10     | 50     | 20     | 5                | 1     | {"pétal&width\u0000": 0.5} | 000004    | Iris-versicolor |
                | ../data/grades.csv | 10      | 10     | 150     | 20     | 10               | 1     | {"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93} | 000005    | 88.205575 |
                | ../data/grades.csv | 10      | 10     | 150     | 20     | 10               | 1     | {"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89} | 000005    | 84.29401 |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '50', '20', '5', '1', '{"petal width": 0.5}', '000004', 'Iris-versicolor'],
            ['data/iris_sp_chars.csv', '10', '10', '50', '20', '5', '1', '{"pétal&width\u0000": 0.5}', '000004', 'Iris-versicolor'],
            ['data/grades.csv', '10', '10', '150', '20', '10', '1', '{"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93}', '000005', '88.205575'],
            ['data/grades.csv', '10', '10', '150', '20', '10', '1', '{"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89}', '000005', '84.29401']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self, example[5], example[6])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_an_ensemble_prediction(self, example[7])
            prediction_create.the_prediction_is_finished_in_less_than(self, example[4])
            prediction_create.the_prediction_is(self, example[8], example[9])
コード例 #53
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions for deepnets with operating point:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a deepnet with objective "<objective>" and "<params>"
                And I wait until the deepnet is ready less than <time_3> secs
                And I create a local deepnet
                When I create a prediction with operating point "<operating_point>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_point>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [
            ['data/iris.csv', '10', '50', '30000', '{"petal width": 4}', '000004', 'Iris-versicolor', '{}', {"kind": "probability", "threshold": 1, "positive_class": "Iris-virginica"}]]
        show_doc(self.test_scenario3, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_deepnet_with_objective_and_params(self, example[5], example[7])
            model_create.the_deepnet_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_deepnet(self)
            prediction_create.i_create_a_deepnet_prediction_with_op(self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_deepnet_prediction_with_op(self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
コード例 #54
0
    def test_scenario11(self):
        """
            Scenario: Successfully comparing predictions in operating points for fusions:
            Scenario: Successfully comparing predictions for fusions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a fusion from a list of models
                And I wait until the fusion is ready less than <time_4> secs
                And I create a local fusion
                When I create a prediction for "<data_input>" in "<operating_point>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local fusion prediction for "<data_input>" in "<operating_point>"
                Then the local ensemble prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params| tag | data_input                             | objective | prediction  | params | operating_point


        """
        examples = [[
            'data/iris_unbalanced.csv', '30', '30', '120', '120',
            '{"tags":["my_fusion_tag_11"]}', 'my_fusion_tag_11',
            '{"petal width": 4}', '000004', 'Iris-virginica', {
                "kind": "probability",
                "threshold": 0.1,
                "positive_class": "Iris-setosa"
            }
        ],
                    [
                        'data/iris_unbalanced.csv', '30', '30', '120', '120',
                        '{"tags":["my_fusion_tag_11_b"]}',
                        'my_fusion_tag_11_b', '{"petal width": 4}', '000004',
                        'Iris-virginica', {
                            "kind": "probability",
                            "threshold": 0.9,
                            "positive_class": "Iris-setosa"
                        }
                    ]]
        show_doc(self.test_scenario11, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_retrieve_a_list_of_remote_models(
                self, example[6])
            model_create.i_create_a_fusion(self)
            model_create.the_fusion_is_finished_in_less_than(self, example[4])
            prediction_compare.i_create_a_local_fusion(self)
            prediction_create.i_create_a_fusion_prediction_op(
                self, example[7], example[10])
            prediction_create.the_prediction_is(self, example[8], example[9])
            prediction_compare.i_create_a_local_prediction_op(
                self, example[7], example[10])
            prediction_compare.the_local_prediction_is(self, example[9])
コード例 #55
0
    def test_scenario12(self):
        """
            Scenario: Successfully comparing remote and local predictions
                      with raw date input for deepnet:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a deepnet
                And I wait until the deepnet is ready
                less than <time_3> secs
                And I create a local deepnet
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                |data|time_1|time_2|time_3|data_input|objective|prediction






            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}',
             '000002', 0.04082],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}',
             '000002', 0.02919],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}',
             '000002', 0.28517],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1932-01-30T19:24:11.440",  "cat-0":"cat2"}',
             '000002', 0.16183],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}',
             '000002', 0.0199]

        """
        examples = [
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}',
             '000002', 0.04082],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}',
             '000002', 0.02919],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}',
             '000002', 0.0199],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}',
             '000002', 0.28517],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}',
             '000002', -0.05673],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "1932-01-30T19:24:11.440",  "cat-0":"cat2"}',
             '000002', 0.16183],
            ['data/dates2.csv', '20', '45', '60',
             '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}',
             '000002', 0.0199]
]
        show_doc(self.test_scenario12, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_no_suggest_deepnet(self)
            model_create.the_deepnet_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_deepnet(self)
            prediction_create.i_create_a_deepnet_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_deepnet_prediction(self,
                                                                   example[4])
            prediction_compare.the_local_prediction_is(self, example[6])