def test_scenario04(self):
        """
        Scenario: Successfully building test predictions from dataset
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML logistic regression resources using dataset to test "<test>" and log predictions in "<output>"
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario3/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.test_scenario04.__doc__
        examples = [[
            'scenario1_lr',
            '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}',
            'data/test_iris.csv', 'scenario3_lr/predictions.csv',
            'check_files/predictions_iris_lr.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            lr_pred.i_create_lr_resources_from_dataset(self,
                                                       None,
                                                       test=example[2],
                                                       output=example[3])
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #2
0
    def test_scenario24(self):
        """
            Scenario: Successfully  test predictions remotely from boosted ensemble
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using boosted ensemble in <iterations> iterations to remotely test "<test>" and log predictions in "<output>"
                And I check that the ensemble has been created
                And I check that the predictions are ready
                And I check that the batch prediction is ready
                And I check that the bath predictions datset is ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | iterations | test                    | output                        |predictions_file                      |

        """
        examples = [[
            'scenario1',
            '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
            '10', 'data/test_iris.csv', 'scenario24/predictions.csv',
            'check_files/predictions_iris_boost.csv'
        ]]
        show_doc(self.test_scenario24, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it( \
                self, example[0], example[1])
            test_pred.i_create_resources_remotely_from_boosted_ensemble( \
                self, iterations=example[2], test=example[3],
                output=example[4])
            test_pred.i_check_create_ensemble(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML multi-label resources using source with objective "<objective>" and model fields "<model_fields>" to test "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | objective | model_fields | test                    | output                        |predictions_file           |
                | scenario_mlm_1| {"tag": "my_multilabelm_1", "data": "../data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mlm_1/predictions.csv", "test": "../data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}   | class | -type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P |../data/test_multilabel.csv   | ./scenario_mlm_2/predictions.csv   | ./check_files/predictions_ml_comma.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_mlm_1', '{"tag": "my_multilabelm_1", "data": "data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mlm_1/predictions.csv", "test": "data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}', 'class', '-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P', 'data/test_multilabel.csv', 'scenario_mlm_2/predictions.csv', 'check_files/predictions_ml_comma.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_source_with_objective(self, multi_label='multi-label ', objective=example[2], model_fields=example[3], test=example[4], output=example[5])
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[6])
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | ../data/diabetes.csv   | ./scenario_c_2/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_cluster.i_create_cluster_resources_from_source(self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario26(self):
        """
            Scenario: Successfully building test predictions from ensemble
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                Given I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2>
                And I create BigML resources using local ensemble of <number_of_models> models in "<scenario2>" to test "<test>" and log predictions in "<output>"
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |scenario2    | kwargs2                                                  | number_of_models | test                    | output                        |predictions_file                      |


        """
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}',
             '10', 'scenario5', 'data/test_iris.csv', 'scenario26/predictions_p.csv', 'check_files/predictions_iris_e_op_prob.csv', 'data/operating_point_prob.json'],
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}',
             '10', 'scenario5', 'data/test_iris.csv', 'scenario26/predictions_c.csv', 'check_files/predictions_iris_e_op_conf.csv', 'data/operating_point_conf.json']]
        show_doc(self.test_scenario26, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3])
            test_pred.i_create_resources_from_local_ensemble_with_op( \
                self, number_of_models=example[4], directory=example[5],
                test=example[6], output=example[7], operating_point=example[9])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[8])
    def test_scenario5(self):
        """
            Scenario: Successfully building test predictions from models retrieved by tag
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML multi-label resources with labels "<labels>" and objective "<objective>" using models tagged as "<tag>" to test "<test>" and log predictions in "<output>"
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |labels      | objective | tag       | test                  | output                      |predictions_file                    |
                | scenario_mlm_1| {"tag": "my_multilabelm_1", "data": "../data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mlm_1/predictions.csv", "test": "../data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}    | Adult,Student | class | my_multilabelm_1 | ../data/test_multilabel.csv | ./scenario_mlm_5/predictions.csv | ./check_files/predictions_ml_labels.csv |
        """
        print self.test_scenario5.__doc__
        examples = [[
            'scenario_mlm_1',
            '{"tag": "my_multilabelm_1", "data": "data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mlm_1/predictions.csv", "test": "data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}',
            'Adult,Student', 'class', 'my_multilabelm_1',
            'data/test_multilabel.csv', 'scenario_mlm_5/predictions.csv',
            'check_files/predictions_ml_labels.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            ml_pred.i_predict_ml_from_model_tag_with_labels_with_objective(
                self,
                labels=example[2],
                objective=example[3],
                tag=example[4],
                test=example[5],
                output=example[6])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[7])
Exemple #7
0
    def test_scenario09(self):
        """
        Scenario: Successfully combining test predictions from existing directories
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2>
            Given I combine BigML predictions files in "<directory1>" and "<directory2>" into "<output>"
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  |scenario2    | kwargs2                                                  | directory1  | directory2  | output                      |predictions_file         |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | scenario5| {"number_of_models": 10, "output": "./scenario5/predictions.csv", "test": "../data/test_iris.csv"}   | ./scenario1 | ./scenario5 | ./scenario8/predictions.csv | ./check_files/predictions_iris.csv |


        """
        print self.test_scenario09.__doc__
        examples = [[
            'scenario1',
            '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
            'scenario5',
            '{"number_of_models": 10, "test": "data/test_iris.csv", "output": "scenario5/predictions.csv"}',
            'scenario1', 'scenario5', 'scenario8/predictions.csv',
            'check_files/predictions_iris.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[2], example[3])
            test_pred.i_find_predictions_files(self,
                                               directory1=example[4],
                                               directory2=example[5],
                                               output=example[6])
            test_pred.i_check_predictions(self, example[7])
    def test_scenario14(self):
        """
            Scenario: Successfully building test predictions from start with user-given separator:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with "<separator>" as test field separator
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | separator | output                        |predictions_file           |

        """
        examples = [
            ['data/iris.csv', 'data/test_iris.tsv', '"\t"', 'scenario14/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.test_scenario14, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_separator(self, data=example[0], test=example[1], output=example[3], separator=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source using datasets with max categories
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources with <max_categories> as categories limit and <objective> as objective field using source to test "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the max_categories datasets have been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |max_categories| objective | test                    | output                        |predictions_file           |
                | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"}   |1| species | ../data/test_iris.csv   | ./scenario_mc_2/predictions.csv   | ./check_files/predictions_mc.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_mc_1', '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}', '1', 'species', 'data/test_iris.csv', 'scenario_mc_2/predictions.csv', 'check_files/predictions_mc.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            max_cat.i_create_all_mc_resources_from_source(self, max_categories=example[2], objective=example[3], test=example[4], output=example[5])
            test_pred.i_check_create_dataset(self, suffix=None)
            max_cat.i_check_create_max_categories_datasets(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[6])
Exemple #10
0
    def setup_scenario06(self):
        """
        Scenario: Successfully building test predictions from ensemble
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML resources using ensemble of <number_of_models> models to test "<test>" and log predictions in "<output>"
            And I check that the ensemble has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | number_of_models | test                    | output                        |predictions_file                      |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | 10               | ../data/test_iris.csv   | ./scenario5/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.setup_scenario06.__doc__
        examples = [[
            'scenario1',
            '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
            '10', 'data/test_iris.csv', 'scenario5/predictions.csv',
            'check_files/predictions_iris.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_ensemble(
                self,
                number_of_models=example[2],
                test=example[3],
                output=example[4])
            test_pred.i_check_create_ensemble(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
    def setup_scenario1(self):
        """
            Scenario: Successfully building test predictions from training data using datasets with max categories
                Given I create BigML resources from "<data>" with <max_categories> as categories limit and <objective> as objective field to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the max_categories datasets have been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data |max_categories | objective | test                        | output                          |predictions_file           |
                |../data/iris.csv |1| species |../data/test_iris.csv | ./scenario_mc_1/predictions.csv | ./check_files/predictions_mc.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['data/iris.csv', '1', 'species', 'data/test_iris.csv', 'scenario_mc_1/predictions.csv', 'check_files/predictions_mc.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            max_cat.i_create_all_mc_resources(self, example[0], max_categories=example[1], objective=example[2], test=example[3], output=example[4])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            max_cat.i_check_create_max_categories_datasets(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
Exemple #12
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroid predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #13
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
        """
        examples = [[
            'data/grades.csv', 'data/test_grades.csv',
            'scenario1_r/predictions.csv', 'check_files/predictions_grades.csv'
        ],
                    [
                        'data/iris.csv', 'data/test_iris.csv',
                        'scenario1/predictions.csv',
                        'check_files/predictions_iris.csv'
                    ]]
        show_doc(self.setup_scenario02, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources(self, example[0], example[1],
                                             example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario1(self):
        """
            Scenario 1: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ../data/grades_fields_map.csv | ./scenario_r1_r/predictions.csv | ./check_files/predictions_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'data/grades_fields_map.csv', 'scenario_r1_r/predictions.csv', 'check_files/predictions_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_map(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #15
0
    def test_scenario11(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                         |predictions_file                        | objective | fields   |
                | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario11/predictions.csv   | ./check_files/predictions_iris_b.csv   | 0         | "petal length","petal width" |

        """
        print self.test_scenario11.__doc__
        examples = [[
            'scenario1',
            '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
            'data/test_iris.csv', 'scenario11/predictions.csv',
            'check_files/predictions_iris_b.csv', '0',
            '"petal length","petal width"'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_objective_model(
                self,
                objective=example[5],
                fields=example[6],
                test=example[2],
                output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #16
0
    def test_scenario07(self):
        """
        Scenario: Successfully building test predictions from models file
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2>
            And I create BigML resources using models in file "<models_file>" to test "<test>" and log predictions in "<output>"
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  |scenario2    | kwargs2                                                  | models_file        | test                  | output                      |predictions_file                    |

        """
        print self.test_scenario07.__doc__
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "test": "data/test_iris.csv", "output": "scenario5/predictions.csv"}',
             'scenario5/models', 'data/test_iris.csv', 'scenario6/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.test_scenario07, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3])
            test_pred.i_create_resources_from_models_file(self, multi_label=None, models_file=example[4], test=example[5], output=example[6])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[7])
Exemple #17
0
    def test_scenario14(self):
        """
            Scenario: Successfully building test predictions from start with user-given separator:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with "<separator>" as test field separator
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | separator | output                        |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris.tsv   | "\t"        |./scenario14/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.test_scenario14.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris.tsv', '"\t"',
            'scenario14/predictions.csv', 'check_files/predictions_iris.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_separator(
                self,
                data=example[0],
                test=example[1],
                output=example[3],
                separator=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #18
0
    def test_scenario21(self):
        """
            Scenario 1: Successfully building test predictions from ensemble
                    And I create BigML resources from "<data>" using ensemble of <number_of_models> models to test "<test>" and log predictions in "<output>"
                    And I check that the source has been created
                    And I check that the dataset has been created
                    And I check that the ensemble has been created
                    And I check that the models in the ensembles have been created
                    And I check that the predictions are ready
                    Then the local prediction file is like "<predictions_file>"

                    Examples:
                    |data               | number_of_models | test                      | output                         | predictions_file                         |
        """
        examples = [
            ['data/grades.csv', '5', 'data/test_grades.csv', 'scenario21/predictions.csv', 'check_files/predictions_grades_e.csv']]
        show_doc(self.test_scenario21, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_resources_in_prod_from_ensemble( \
                self, data=example[0], number_of_models=example[1],
                test=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_ensemble(self)
            test_pred.i_check_create_models_in_ensembles(self,
                                                         in_ensemble=True)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #19
0
    def test_scenario15(self):
        """
            Scenario: Successfully building test predictions from start with different prediction file format:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with prediction options "<options>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                        |options     |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris.csv   |./scenario15/predictions.csv   |--prediction-header --prediction-fields 'petal length,petal width' --prediction-info full | ./check_files/predictions_iris_h.csv   |

        """
        print self.test_scenario15.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris.csv',
            'scenario15/predictions.csv',
            '--prediction-header --prediction-fields \'petal length,petal width\' --prediction-info full',
            'check_files/predictions_iris_h.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_options(self,
                                                          data=example[0],
                                                          test=example[1],
                                                          output=example[2],
                                                          options=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #21
0
    def test_scenario17(self):
        """
            Scenario: Successfully building test predictions from local model
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using local model in "<scenario>" to test "<test>" and log predictions in "<output>"
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario17/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.test_scenario17.__doc__
        examples = [[
            'scenario1',
            '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
            'data/test_iris.csv', 'scenario17/predictions.csv',
            'check_files/predictions_iris.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_local_model(self,
                                                          directory=example[0],
                                                          test=example[2],
                                                          output=example[3])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML logistic regression resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario1_lr/predictions.csv   | ./check_files/predictions_iris_lr.csv   |
        """
        print self.setup_scenario02.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1_lr/predictions.csv', 'check_files/predictions_iris_lr.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            lr_pred.i_create_all_lr_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #23
0
    def test_scenario19(self):
        """
            Scenario: Successfully building test predictions from start using median:
                Given I create BigML resources uploading train "<data>" file using the median to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                       |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario19/predictions.csv | ./check_files/predictions_grades_median.csv |
        """
        print self.test_scenario19.__doc__
        examples = [[
            'data/grades.csv', 'data/test_grades.csv',
            'scenario19/predictions.csv',
            'check_files/predictions_grades_median.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_median(self,
                                                         data=example[0],
                                                         test=example[1],
                                                         output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroid predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario8(self):
        """
        Scenario: Successfully building test predictions from model with operating point
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML remote batch predictions one by one using model to test "<test>" and log predictions in "<output>"
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  |
            operating_point | test                    | output                        |predictions_file           |

        """
        examples = [
            ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario_r8/predictions.csv', 'check_files/predictions_iris.csv']]

        print self.test_scenario8.__doc__
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it( \
                self, example[0], example[1])
            test_batch_pred.i_create_resources_from_model_remote_no_batch( \
                self, test=example[2], output=example[3])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario2(self):
        """
            Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with proportional missing strategy and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the source has been created from the test file
            And I check that the dataset has been created from the test file
            And I check that the batch prediction has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_2/predictions.csv   | ./check_files/predictions_iris_nulls.csv
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris_nulls.csv', 'scenario_mis_2/predictions.csv', 'check_files/predictions_iris_nulls.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_remote_proportional(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #27
0
    def test_scenario5(self):
        """
            Scenario: Successfully building test predictions from models retrieved by tag
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML multi-label resources using models tagged as "<tag>" to test "<test>" and log predictions in "<output>"
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | tag       | test                  | output                      |predictions_file                    |
                | scenario_ml_6| {"tag": "my_multilabel_5", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_6/predictions.csv", "test": "../data/test_multilabel.csv"}    | my_multilabel_5 | ../data/test_multilabel.csv | ./scenario_ml_5/predictions.csv | ./check_files/predictions_ml_comma.csv |

        """
        print self.test_scenario5.__doc__
        examples = [[
            'scenario_ml_6',
            '{"tag": "my_multilabel_5", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_6/predictions.csv", "test": "data/test_multilabel.csv"}',
            'my_multilabel_5', 'data/test_multilabel.csv',
            'scenario_ml_5/predictions.csv',
            'check_files/predictions_ml_comma.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            ml_pred.i_predict_ml_from_model_tag(self,
                                                tag=example[2],
                                                test=example[3],
                                                output=example[4])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML logistic regression resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario1_lr/predictions.csv   | ./check_files/predictions_iris_lr.csv   |
        """
        print self.setup_scenario02.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris.csv',
            'scenario1_lr/predictions.csv',
            'check_files/predictions_iris_lr.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            lr_pred.i_create_all_lr_resources(self, example[0], example[1],
                                              example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #29
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building multi-label test predictions from start:
                Given I create BigML multi-label resources tagged as "<tag>" with "<label_separator>" label separator and <number_of_labels> labels uploading train "<data>" file with "<training_separator>" field separator to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |tag |label_separator |number_of_labels | data                   |training_separator | test                        | output                          |predictions_file           |
                |my_multilabel_1|:|7| ../data/multilabel.csv |,| ../data/test_multilabel.csv | ./scenario_ml_1/predictions.csv | ./check_files/predictions_ml.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'my_multilabel_1', ':', '7', 'data/multilabel.csv', ',',
            'data/test_multilabel.csv', 'scenario_ml_1/predictions.csv',
            'check_files/predictions_ml.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_all_ml_resources(self,
                                              tag=example[0],
                                              label_separator=example[1],
                                              number_of_labels=example[2],
                                              data=example[3],
                                              training_separator=example[4],
                                              test=example[5],
                                              output=example[6])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[7])
    def test_scenario5(self):
        """
            Scenario 5: Successfully building test predictions from dataset and prediction format info
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using a model to test the previous test dataset remotely with prediction headers and fields "<fields>" and log predictions in "<output>"
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | fields | output                        |predictions_file           |
                | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"}   | sepal length,sepal width | ./scenario_r4/predictions.csv   | ./check_files/predictions_iris_format.csv   |

        """

        print self.test_scenario5.__doc__
        examples = [
            ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'sepal length,sepal width', 'scenario_r4/predictions.csv', 'check_files/predictions_iris_format.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_model_batch(self, fields=example[2], output=example[3])
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #31
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML multi-label resources using source to test "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_ml_1| {"tag": "my_multilabel_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_1/predictions.csv", "test": "../data/test_multilabel.csv"}   | ../data/test_multilabel.csv   | ./scenario_ml_2/predictions.csv   | ./check_files/predictions_ml_comma.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'scenario_ml_1',
            '{"tag": "my_multilabel_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_1/predictions.csv", "test": "data/test_multilabel.csv"}',
            'data/test_multilabel.csv', 'scenario_ml_2/predictions.csv',
            'check_files/predictions_ml_comma.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_source(self,
                                                     multi_label='multi-label',
                                                     test=example[2],
                                                     output=example[3])
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #32
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with proportional missing strategy:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with proportional missing strategy and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_1/predictions.csv | ./check_files/predictions_iris_nulls.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris_nulls.csv',
            'scenario_mis_1/predictions.csv',
            'check_files/predictions_iris_nulls.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_proportional(self,
                                                          data=example[0],
                                                          test=example[1],
                                                          output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #33
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building test predictions from training data using datasets with max categories
                Given I create BigML resources from "<data>" with <max_categories> as categories limit and <objective> as objective field to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the max_categories datasets have been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data |max_categories | objective | test                        | output                          |predictions_file           |
                |../data/iris.csv |1| species |../data/test_iris.csv | ./scenario_mc_1/predictions.csv | ./check_files/predictions_mc.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['data/iris.csv', '1', 'species', 'data/test_iris.csv', 'scenario_mc_1/predictions.csv', 'check_files/predictions_mc.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            max_cat.i_create_all_mc_resources(self, example[0], max_categories=example[1], objective=example[2], test=example[3], output=example[4])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            max_cat.i_check_create_max_categories_datasets(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
Exemple #34
0
    def test_scenario10(self):
        """
            Scenario: Successfully combining test predictions from existing directories
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2>
                And I combine BigML predictions files in "<directory1>" and "<directory2>" into "<output>" with method "<method>"
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |scenario2    | kwargs2                                                  | directory1  | directory2  | output                         |predictions_file                    | method                 |

        """
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario1', 'scenario5', 'scenario9/predictions_c.csv', 'check_files/predictions_iris.csv', '"confidence weighted"'],
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario1', 'scenario5', 'scenario9/predictions_p.csv', 'check_files/predictions_iris_p.csv', '"probability weighted"'],
            ['scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}',
             'scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}',
             'scenario1_r', 'scenario1_r', 'scenario10/predictions_c.csv', 'check_files/predictions_grades.csv', '"confidence weighted"'],
            ['scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}',
             'scenario1_r', '{"data": "data/grades.csv", "output": "scenario1_r/predictions.csv", "test": "data/test_grades.csv"}',
             'scenario1_r', 'scenario1_r', 'scenario10/predictions_p.csv', 'check_files/predictions_grades_p.csv', '"probability weighted"']]
        show_doc(self.test_scenario10, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3])
            test_pred.i_find_predictions_files_with_method(self, directory1=example[4], directory2=example[5], output=example[6], method=example[8])
            test_pred.i_check_predictions(self, example[7])
Exemple #35
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source using datasets with max categories
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources with <max_categories> as categories limit and <objective> as objective field using source to test "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the max_categories datasets have been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |max_categories| objective | test                    | output                        |predictions_file           |
                | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"}   |1| species | ../data/test_iris.csv   | ./scenario_mc_2/predictions.csv   | ./check_files/predictions_mc.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_mc_1', '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}', '1', 'species', 'data/test_iris.csv', 'scenario_mc_2/predictions.csv', 'check_files/predictions_mc.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            max_cat.i_create_all_mc_resources_from_source(self, max_categories=example[2], objective=example[3], test=example[4], output=example[5])
            test_pred.i_check_create_dataset(self, suffix=None)
            max_cat.i_check_create_max_categories_datasets(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[6])
Exemple #36
0
    def test_scenario15(self):
        """
            Scenario: Successfully building test predictions from start with different prediction file format:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with prediction options "<options>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                        |options     |predictions_file           |

        """
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario15/predictions.csv', '--prediction-header --prediction-fields \'petal length,petal width\' --prediction-info full', 'check_files/predictions_iris_h.csv']]
        show_doc(self.test_scenario15, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_options(self, data=example[0], test=example[1], output=example[2], options=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #37
0
    def test_scenario8(self):
        """
        Scenario: Successfully building test predictions from model with operating point
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML remote batch predictions one by one using model to test "<test>" and log predictions in "<output>"
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  |
            operating_point | test                    | output                        |predictions_file           |

        """
        examples = [
            ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario_r8/predictions.csv', 'check_files/predictions_iris.csv']]

        print self.test_scenario8.__doc__
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it( \
                self, example[0], example[1])
            test_batch_pred.i_create_resources_from_model_remote_no_batch( \
                self, test=example[2], output=example[3])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #38
0
    def test_scenario24(self):
        """
            Scenario: Successfully  test predictions remotely from boosted ensemble
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using boosted ensemble in <iterations> iterations to remotely test "<test>" and log predictions in "<output>"
                And I check that the ensemble has been created
                And I check that the predictions are ready
                And I check that the batch prediction is ready
                And I check that the bath predictions datset is ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | iterations | test                    | output                        |predictions_file                      |

        """
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', '10', 'data/test_iris.csv', 'scenario24/predictions.csv', 'check_files/predictions_iris_boost.csv']]
        show_doc(self.test_scenario24, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it( \
                self, example[0], example[1])
            test_pred.i_create_resources_remotely_from_boosted_ensemble( \
                self, iterations=example[2], test=example[3],
                output=example[4])
            test_pred.i_check_create_ensemble(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
Exemple #39
0
    def test_scenario1(self):
        """
            Scenario 1: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ../data/grades_fields_map.csv | ./scenario_r1_r/predictions.csv | ./check_files/predictions_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'data/grades_fields_map.csv', 'scenario_r1_r/predictions.csv', 'check_files/predictions_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_map(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #40
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
        """
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario1_r/predictions.csv', 'check_files/predictions_grades.csv'],
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.setup_scenario02, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #41
0
    def test_scenario4(self):
        """
            Scenario 4: Successfully building test predictions from dataset
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using dataset to test the previous test dataset remotely and log predictions in "<output>"
                And I check that the model has been created
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario_r3/predictions.csv   | ./check_files/predictions_iris.csv   |

        """

        print self.test_scenario4.__doc__
        examples = [[
            'scenario_r1',
            '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}',
            'scenario_r3/predictions.csv', 'check_files/predictions_iris.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_batch(self,
                                                            output=example[2])
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with missing-splits model:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with a missing-splits model and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris_missing.csv   | ../data/test_iris_missing.csv   | ./scenario_mspl_1/predictions.csv | ./check_files/predictions_iris_missing.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_missing.csv', 'data/test_iris_missing.csv', 'scenario_mspl_1/predictions.csv', 'check_files/predictions_iris_missing.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_missing_splits(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #43
0
    def test_scenario5(self):
        """
            Scenario 5: Successfully building test predictions from dataset and prediction format info
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using a model to test the previous test dataset remotely with prediction headers and fields "<fields>" and log predictions in "<output>"
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | fields | output                        |predictions_file           |
                | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"}   | sepal length,sepal width | ./scenario_r4/predictions.csv   | ./check_files/predictions_iris_format.csv   |

        """

        print self.test_scenario5.__doc__
        examples = [[
            'scenario_r1',
            '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}',
            'sepal length,sepal width', 'scenario_r4/predictions.csv',
            'check_files/predictions_iris_format.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_model_batch(self,
                                                          fields=example[2],
                                                          output=example[3])
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario03(self):
        """
        Scenario: Successfully building test predictions from source
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML logistic regression resources using source to test "<test>" and log predictions in "<output>"
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1_lr/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario2/predictions.csv   | ./check_files/predictions_iris.csv   |
        """
        print self.test_scenario03.__doc__
        examples = [
            ['scenario1_lr', '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario2_lr/predictions.csv', 'check_files/predictions_iris_lr.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            lr_pred.i_create_lr_resources_from_source(self, None, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #45
0
    def test_scenario07(self):
        """
        Scenario: Successfully building batch test predictions from model with customized output
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML deepnet resources using model to test "<test>" as a batch prediction with output format "<batch-output>" and log predictions in "<output>"
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                |batch_output    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ../data/batch_output.json  | ./scenario6_dn/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.test_scenario07.__doc__
        examples = [[
            'scenario1_dn',
            '{"data": "data/iris.csv", "output": "scenario1_dn/predictions.csv", "test": "data/test_iris.csv"}',
            'data/test_iris.csv', 'data/batch_output.json',
            'scenario6_dn/predictions.csv',
            'check_files/predictions_iris_dn_prob.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            dn_pred.i_create_dn_resources_from_model_remote_with_options(
                self,
                test=example[2],
                output=example[4],
                options_file=example[3])
            batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
    def setup_scenario1(self):
        """
            Scenario: Successfully building multi-label test predictions from start:
                Given I create BigML multi-label resources tagged as "<tag>" with "<label_separator>" label separator and <number_of_labels> labels uploading train "<data>" file with "<training_separator>" field separator and "<ml_fields>" as multi-label fields using model_fields "<model_fields>" and objective "<objective>" to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |tag |label_separator |number_of_labels | data                   |training_separator | ml_fields | model_fields | objective | test                        | output                         |predictions_file           |
                |my_multilabelm_1|:|7| ../data/multilabel_multi.csv |,  | type,class | -type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P | class |../data/test_multilabel.csv | ./scenario_mlm_1/predictions.csv | ./check_files/predictions_ml.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['my_multilabelm_1', ':', '7', 'data/multilabel_multi.csv', ',', 'type,class', '-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P', 'class', 'data/test_multilabel.csv', 'scenario_mlm_1/predictions.csv', 'check_files/predictions_ml.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_all_mlm_resources(self, tag=example[0], label_separator=example[1], number_of_labels=example[2], data=example[3], training_separator=example[4], ml_fields=example[5], model_fields=example[6], objective=example[7], test=example[8], output=example[9])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[10])
Exemple #47
0
    def test_scenario01(self):
        """
        Scenario: Successfully building deepnet test predictions from start with no headers:
            Given I create BigML deepnet resources uploading train "<data>" file with no headers to test "<test>" with no headers and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the deepnet model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris_nh.csv   | ../data/test_iris_nh.csv   | ./scenario1_dn_nh/predictions.csv   | ./check_files/predictions_iris_dn.csv   |


        """
        print self.test_scenario01.__doc__
        examples = [[
            'data/iris_nh.csv', 'data/test_iris_nh.csv',
            'scenario1_dn_nh/predictions.csv',
            'check_files/predictions_iris_dn.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dn_pred.i_create_all_dn_resources_with_no_headers(
                self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dn_pred.i_check_create_dn_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | ../data/diabetes.csv   | ./scenario_c_2/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'scenario_c_1',
            '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}',
            'data/diabetes.csv', 'scenario_c_2/centroids.csv',
            'check_files/centroids_diabetes.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_cluster.i_create_cluster_resources_from_source(
                self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Exemple #49
0
    def test_scenario09(self):
        """
        Scenario: Successfully combining test predictions from existing directories
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2>
            Given I combine BigML predictions files in "<directory1>" and "<directory2>" into "<output>"
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  |scenario2    | kwargs2                                                  | directory1  | directory2  | output                      |predictions_file         |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | scenario5| {"number_of_models": 10, "output": "./scenario5/predictions.csv", "test": "../data/test_iris.csv"}   | ./scenario1 | ./scenario5 | ./scenario8/predictions.csv | ./check_files/predictions_iris.csv |


        """
        print self.test_scenario09.__doc__
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "test": "data/test_iris.csv", "output": "scenario5/predictions.csv"}',
             'scenario1', 'scenario5', 'scenario8/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3])
            test_pred.i_find_predictions_files(self, directory1=example[4], directory2=example[5], output=example[6])
            test_pred.i_check_predictions(self, example[7])
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroids from scratch:
                Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test               | output                           |predictions_file           |
                | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv |
                | ../data/diabetes.csv   | ../data/diabetes.csv   | ./scenario_c_1/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades.csv', 'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv'],
            ['data/diabetes.csv', 'data/diabetes.csv', 'scenario_c_1/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #51
0
    def test_scenario18(self):
        """
            Scenario: Successfully building test predictions from ensemble
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                Given I have previously executed "<scenario2>" or reproduce it with arguments <kwargs2>
                And I create BigML resources using local ensemble of <number_of_models> models in "<scenario2>" to test "<test>" and log predictions in "<output>"
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |scenario2    | kwargs2                                                  | number_of_models | test                    | output                        |predictions_file                      |
                | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | scenario5| {"number_of_models": 10, "output": "./scenario5/predictions.csv", "test": "../data/test_iris.csv"}       | 10               | ../data/test_iris.csv   | ./scenario18/predictions.csv   | ./check_files/predictions_iris.csv   |



        """
        print self.test_scenario18.__doc__
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}',
             'scenario5', '{"number_of_models": 10, "output": "scenario5/predictions.csv", "test": "data/test_iris.csv"}',
             '10', 'scenario5', 'data/test_iris.csv', 'scenario18/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[2], example[3])
            test_pred.i_create_resources_from_local_ensemble(self, number_of_models=example[4], directory=example[5], test=example[6], output=example[7])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[8])
    def test_scenario4(self):
        """
            Scenario: Successfully building ensembles test predictions from models file with max categories
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using models in file "<models_file>" to test "<test>" and log predictions with combine method in "<output>"
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  |models_file| test                    | output                        |predictions_file           |
                | scenario_mc_1| {"data": "../data/iris.csv", "max_categories": "1", "objective": "species", "output": "./scenario_mc_1/predictions.csv", "test": "../data/test_iris.csv"}   | scenario_mc_1/models | ../data/test_iris.csv   | ./scenario_mc_4/predictions.csv   | ./check_files/predictions_mc.csv   |
        """
        print self.test_scenario4.__doc__
        examples = [
            [
                "scenario_mc_1",
                '{"data": "data/iris.csv", "max_categories": "1", "objective": "species", "output": "scenario_mc_1/predictions.csv", "test": "data/test_iris.csv"}',
                "scenario_mc_1/models",
                "data/test_iris.csv",
                "scenario_mc_4/predictions.csv",
                "check_files/predictions_mc.csv",
            ]
        ]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            max_cat.i_create_all_mc_resources_from_models(
                self, models_file=example[2], test=example[3], output=example[4]
            )
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
Exemple #53
0
    def test_scenario19(self):
        """
            Scenario: Successfully building test predictions from start using median:
                Given I create BigML resources uploading train "<data>" file using the median to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                       |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario19/predictions.csv | ./check_files/predictions_grades_median.csv |
        """
        print self.test_scenario19.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario19/predictions.csv', 'check_files/predictions_grades_median.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_median(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #54
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with a missing-splits model and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                        |predictions_file           |
                | ../data/iris_missing.csv   | ../data/test_iris_missing.csv   | ./scenario_mspl_2/predictions.csv   | ./check_files/predictions_iris_missing.csv
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris_missing.csv', 'data/test_iris_missing.csv',
            'scenario_mspl_2/predictions.csv',
            'check_files/predictions_iris_missing.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_remote_missing_splits(
                self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #55
0
    def test_scenario01(self):
        """
        Scenario: Successfully building test predictions from start with no headers:
            Given I create BigML resources uploading train "<data>" file with no headers to test "<test>" with no headers and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris_nh.csv   | ../data/test_iris_nh.csv   | ./scenario1_nh/predictions.csv   | ./check_files/predictions_iris.csv   |


        """
        print self.test_scenario01.__doc__
        examples = [
            ['data/iris_nh.csv', 'data/test_iris_nh.csv', 'scenario1_nh/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_no_headers(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario07(self):
        """
        Scenario: Successfully building batch test predictions from model
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML logistic regression resources using model to test "<test>" as a batch prediction and log predictions in "<output>"
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario6_lr/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.test_scenario07.__doc__
        examples = [[
            'scenario1_lr',
            '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}',
            'data/test_iris.csv', 'scenario6_lr/predictions_p.csv',
            'check_files/predictions_iris_lr_op_prob.csv',
            'data/operating_point_prob.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            lr_pred.i_create_lr_resources_from_model_with_op(
                self,
                test=example[2],
                output=example[3],
                operating_point=example[5])
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario4(self):
        """
            Scenario 4: Successfully building test predictions from dataset
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using dataset to test the previous test dataset remotely and log predictions in "<output>"
                And I check that the model has been created
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario_r3/predictions.csv   | ./check_files/predictions_iris.csv   |

        """

        print self.test_scenario4.__doc__
        examples = [
            ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario_r3/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_batch(self, output=example[2])
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #58
0
    def test_scenario2(self):
        """
            Scenario: Successfully building predictions for data streamed to stdin:
                Given I create BigML resources uploading train "<data>" file to test "<test>" read from stdin with name "<name>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                            |predictions_file           | name |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_st_2/predictions.csv   | ./check_files/predictions_iris.csv   | Source name: áéí |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario_st_2/predictions.csv', 'check_files/predictions_iris.csv', 'Source name: áéí']]
        for example in examples:
            print "\nTesting with:\n", example
            stdin.i_create_all_resources_to_test_from_stdin(self, data=example[0], test=example[1], name=example[4], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Exemple #59
0
    def test_scenario21(self):
        """
            Scenario 1: Successfully building test predictions from ensemble
                    And I create BigML resources from "<data>" using ensemble of <number_of_models> models to test "<test>" and log predictions in "<output>"
                    And I check that the source has been created
                    And I check that the dataset has been created
                    And I check that the ensemble has been created
                    And I check that the models in the ensembles have been created
                    And I check that the predictions are ready
                    Then the local prediction file is like "<predictions_file>"

                    Examples:
                    |data               | number_of_models | test                      | output                         | predictions_file                         |
        """
        examples = [[
            'data/grades.csv', '5', 'data/test_grades.csv',
            'scenario21/predictions.csv',
            'check_files/predictions_grades_e.csv'
        ]]
        show_doc(self.test_scenario21, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_resources_in_prod_from_ensemble( \
                self, data=example[0], number_of_models=example[1],
                test=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_ensemble(self)
            test_pred.i_check_create_models_in_ensembles(self,
                                                         in_ensemble=True)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])