Esempio n. 1
0
    def test_scenario2(self):
        """
            Scenario: Successfully building evaluations from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to evaluate and log evaluation in "<output>"
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                |scenario    | kwargs                                                  | output                   | json_evaluation_file    |
                | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"}   |./scenario_e2/evaluation | ./check_files/evaluation_iris.json |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'scenario_e1',
            '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}',
            'scenario_e2/evaluation', 'check_files/evaluation_iris.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            evaluation.given_i_create_bigml_resources_using_source_to_evaluate(
                self, output=example[2])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Esempio n. 2
0
    def test_scenario2(self):
        """
            Scenario: Successfully building predictions for data streamed to stdin:
                Given I create BigML resources uploading train "<data>" file to test "<test>" read from stdin with name "<name>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                            |predictions_file           | name |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_st_2/predictions.csv   | ./check_files/predictions_iris.csv   | Source name: áéí |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris.csv',
            'scenario_st_2/predictions.csv',
            'check_files/predictions_iris.csv', 'Source name: áéí'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            stdin.i_create_all_resources_to_test_from_stdin(self,
                                                            data=example[0],
                                                            test=example[1],
                                                            name=example[4],
                                                            output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 3
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building evaluations from start:
                Given I create BigML resources uploading train "<data>" file to evaluate and log evaluation in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | output                   | json_evaluation_file    |
                | ../data/iris.csv | ./scenario_e1/evaluation | ./check_files/evaluation_iris.json |
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_e1/evaluation',
            'check_files/evaluation_iris.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_evaluate(self,
                                                         data=example[0],
                                                         output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[2])
Esempio n. 4
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/grades.csv | ../data/test_grades.csv | ./scenario1_r/predictions.csv | ./check_files/predictions_grades.csv |
            | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario1/predictions.csv   | ./check_files/predictions_iris.csv   |
        """
        print self.setup_scenario02.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario1_r/predictions.csv', 'check_files/predictions_grades.csv'],
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 5
0
    def test_scenario15(self):
        """
            Scenario: Successfully building test predictions from start with different prediction file format:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with prediction options "<options>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                        |options     |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris.csv   |./scenario15/predictions.csv   |--prediction-header --prediction-fields 'petal length,petal width' --prediction-info full | ./check_files/predictions_iris_h.csv   |

        """
        print self.test_scenario15.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario15/predictions.csv', '--prediction-header --prediction-fields \'petal length,petal width\' --prediction-info full', 'check_files/predictions_iris_h.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_options(self, data=example[0], test=example[1], output=example[2], options=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 6
0
    def test_scenario14(self):
        """
            Scenario: Successfully building test predictions from start with user-given separator:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with "<separator>" as test field separator
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | separator | output                        |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris.tsv   | "\t"        |./scenario14/predictions.csv   | ./check_files/predictions_iris.csv   |

        """
        print self.test_scenario14.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.tsv', '"\t"', 'scenario14/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_separator(self, data=example[0], test=example[1], output=example[3], separator=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with proportional missing strategy:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with proportional missing strategy and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_1/predictions.csv | ./check_files/predictions_iris_nulls.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris_nulls.csv',
            'scenario_mis_1/predictions.csv',
            'check_files/predictions_iris_nulls.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_proportional(self,
                                                          data=example[0],
                                                          test=example[1],
                                                          output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 8
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new dataset from an existing one
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                Then I check that the new dataset has field "<field>"

                Examples:
                |data |output_dir  |new_fields | field | model_fields
                |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_1', 'data/new_fields.json', u'outlier?', u'petal length,outlier?,species']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            dataset_adv.i_check_dataset_has_field(self, example[3])
Esempio n. 9
0
    def test_scenario2(self):
        """
            Scenario: Successfully building predictions for data streamed to stdin:
                Given I create BigML resources uploading train "<data>" file to test "<test>" read from stdin with name "<name>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                            |predictions_file           | name |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_st_2/predictions.csv   | ./check_files/predictions_iris.csv   | Source name: áéí |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario_st_2/predictions.csv', 'check_files/predictions_iris.csv', 'Source name: áéí']]
        for example in examples:
            print "\nTesting with:\n", example
            stdin.i_create_all_resources_to_test_from_stdin(self, data=example[0], test=example[1], name=example[4], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 10
0
    def test_scenario1(self):
        """
            Scenario: Successfully retraining a balanced model
                Given I create a BigML balanced model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I retrain the model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model has doubled its rows
                And I check that the model is balanced

                Examples:
                |data |output_dir  | output_dir_ret
                |../data/iris.csv | ./scenario_rt_1 |./scenario_rt_1b |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_rt_1', 'scenario_rt_1b']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_retrain_model(self, data=example[0], output_dir=example[2])
            test_pred.i_check_create_source(self)
            execute_steps.i_check_create_execution(self, number_of_executions=2)
            test_pred.i_check_create_model_in_execution(self)
            test_pred.i_check_model_double(self)
            test_pred.i_check_model_is_balanced(self)
    def test_scenario6(self):
        """
            Scenario 6: Successfully building remote test predictions from scratch to a dataset:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely to a dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                Then I check that the batch predictions dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    | output_dir      |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_r5   |
        """

        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario_r5']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_to_dataset(self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_batch_pred.i_check_create_batch_predictions_dataset(self)
            anomaly_pred.i_check_no_local_CSV(self)
Esempio n. 12
0
    def test_scenario03(self):
        """
        Scenario: Successfully building test predictions from source
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML resources using source to test "<test>" and log predictions in "<output>"
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario2/predictions.csv   | ./check_files/predictions_iris.csv   |
        """
        print self.test_scenario03.__doc__
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario2/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_source(self, None, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 13
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 14
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with missing-splits model:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with a missing-splits model and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris_missing.csv   | ../data/test_iris_missing.csv   | ./scenario_mspl_1/predictions.csv | ./check_files/predictions_iris_missing.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_missing.csv', 'data/test_iris_missing.csv', 'scenario_mspl_1/predictions.csv', 'check_files/predictions_iris_missing.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_missing_splits(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario3(self):
        """
            Scenario: Successfully building evaluations from start:
                Given I create BigML resources uploading train "<data>" file to create model and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I evaluate "<test>" with proportional missing strategy
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | test                          | output                      | json_evaluation_file    |
                | ../data/iris.csv | ../data/iris_nulls.csv   | ./scenario_mis_3/evaluation | ./check_files/evaluation_iris_nulls.json |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'data/iris_nulls.csv', 'scenario_mis_3/evaluation', 'check_files/evaluation_iris_nulls.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_model(self, data=example[0], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            evaluation.i_create_proportional_to_evaluate(self, test=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Esempio n. 16
0
    def test_scenario2(self):
        """
            Scenario: Successfully building evaluations from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to evaluate and log evaluation in "<output>"
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                |scenario    | kwargs                                                  | output                   | json_evaluation_file    |
                | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"}   |./scenario_e2/evaluation | ./check_files/evaluation_iris.json |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_e1', '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}', 'scenario_e2/evaluation', 'check_files/evaluation_iris.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            evaluation.given_i_create_bigml_resources_using_source_to_evaluate(self, output=example[2])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Esempio n. 17
0
    def test_scenario19(self):
        """
            Scenario: Successfully building test predictions from start using median:
                Given I create BigML resources uploading train "<data>" file using the median to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                       |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario19/predictions.csv | ./check_files/predictions_grades_median.csv |
        """
        print self.test_scenario19.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario19/predictions.csv', 'check_files/predictions_grades_median.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_median(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 18
0
    def test_scenario14(self):
        """
            Scenario: Successfully building test predictions from start with user-given separator:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with "<separator>" as test field separator
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | separator | output                        |predictions_file           |

        """
        examples = [
            ['data/iris.csv', 'data/test_iris.tsv', '"\t"', 'scenario14/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.test_scenario14, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_separator(self, data=example[0], test=example[1], output=example[3], separator=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario1(self):
        """
            Scenario 1: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ../data/grades_fields_map.csv | ./scenario_r1_r/predictions.csv | ./check_files/predictions_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'data/grades_fields_map.csv', 'scenario_r1_r/predictions.csv', 'check_files/predictions_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_map(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 20
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
        """
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario1_r/predictions.csv', 'check_files/predictions_grades.csv'],
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.setup_scenario02, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario2(self):
        """
            Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with proportional missing strategy and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the source has been created from the test file
            And I check that the dataset has been created from the test file
            And I check that the batch prediction has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_2/predictions.csv   | ./check_files/predictions_iris_nulls.csv
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris_nulls.csv', 'scenario_mis_2/predictions.csv', 'check_files/predictions_iris_nulls.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_remote_proportional(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 22
0
    def test_scenario1(self):
        """
            Scenario: Successfully exporting models with params in the available languages:
                Given I create BigML resources uploading train "<data>" file using "<source_attributes>" and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I export the model as a function in "<language>"to "<output>"
                Then the export file is like "<check_file>"

                Examples:
                | data                 | source_attributes             | output                 | language       | check_file
                | ../data/movies.csv   | data/movies_source_attrs.json | ./scenario_exp_1/model | python         | model_function.py

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_a/model', 'python', 'check_files/export/model_function.py'],
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_b/model', 'javascript', 'check_files/export/model_function.js'],
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_c/model', 'r', 'check_files/export/model_function.R'],
            ['data/iris.csv', '', 'scenario_exp_1_d/model', 'tableau', 'check_files/export/model_function.tb'],
            ['data/iris.csv', '', 'scenario_exp_1_e/model', 'mysql', 'check_files/export/model_function.sql'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_f/model', 'python', 'check_files/export/model_function_utf8.py'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_g/model', 'r', 'check_files/export/model_function_utf8.R'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_h/model', 'javascript', 'check_files/export/model_function_utf8.js']]
        for example in examples:
            print "\nTesting with:\n", example
            export.i_create_all_resources_to_model_with_source_attrs( \
                self, data=example[0], source_attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_model(self)
            export.i_export_model(self, language=example[3], output=example[2])
            export.i_check_if_the_output_is_like_expected_file( \
                self, language=example[3], expected_file=example[4])
Esempio n. 23
0
    def test_scenario6(self):
        """
            Scenario: Successfully building evaluations from start and test-split:
                Given I create BigML resources uploading train "<data>" file to evaluate with test-split <split> and log evaluation in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the train dataset has been created
                And I check that the test dataset has been created
                And I check that the model has been created
                And I check that the evaluation has been created
                Then the evaluation key "<key>" value for the model is greater than <value>

                Examples:
                | data             | output                   | split    | key         | value |
                | ../data/iris.csv | ./scenario_e6/evaluation | 0.2      | average_phi | 0.85  |
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', 'scenario_e6/evaluation', '0.2', 'average_phi', '0.85']]
        for example in examples:
            print "\nTesting with:\n", example
            evaluation.i_create_with_split_to_evaluate(self, data=example[0], split=example[2], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.i_check_evaluation_key(self, key=example[3], value=example[4])
Esempio n. 24
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
    def test_scenario4(self):
        """
            Scenario 4: Successfully building test predictions from dataset
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using dataset to test the previous test dataset remotely and log predictions in "<output>"
                And I check that the model has been created
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_r1| {"data": "../data/iris.csv", "output": "./scenario_r1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario_r3/predictions.csv   | ./check_files/predictions_iris.csv   |

        """

        print self.test_scenario4.__doc__
        examples = [
            ['scenario_r1', '{"data": "data/iris.csv", "output": "scenario_r1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario_r3/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_batch(self, output=example[2])
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 26
0
    def test_scenario01(self):
        """
        Scenario: Successfully building test predictions from start with no headers:
            Given I create BigML resources uploading train "<data>" file with no headers to test "<test>" with no headers and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris_nh.csv   | ../data/test_iris_nh.csv   | ./scenario1_nh/predictions.csv   | ./check_files/predictions_iris.csv   |


        """
        print self.test_scenario01.__doc__
        examples = [
            ['data/iris_nh.csv', 'data/test_iris_nh.csv', 'scenario1_nh/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_no_headers(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 27
0
    def test_scenario15(self):
        """
            Scenario: Successfully building test predictions from start with different prediction file format:
                Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>" with prediction options "<options>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                        |options     |predictions_file           |

        """
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario15/predictions.csv', '--prediction-header --prediction-fields \'petal length,petal width\' --prediction-info full', 'check_files/predictions_iris_h.csv']]
        show_doc(self.test_scenario15, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_with_options(self, data=example[0], test=example[1], output=example[2], options=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 28
0
    def test_scenario03(self):
        """
        Scenario: Successfully building test predictions from source
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML resources using source to test "<test>" and log predictions in "<output>"
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario2/predictions.csv   | ./check_files/predictions_iris.csv   |
        """
        print self.test_scenario03.__doc__
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario2/predictions.csv', 'check_files/predictions_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_source(self, None, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 29
0
def setup_for_fusion(step, train=None, output_dir=None):
    train = res_filename(train)
    command = ("bigmler --train \"" + train + "\" --store --output-dir " +
               output_dir)
    shell_execute(command, "%s/predictions" % output_dir)
    test_pred.i_check_create_source(step)
    test_pred.i_check_create_dataset(step)
    test_pred.i_check_create_model(step)

    command = ("bigmler deepnet --dataset \"" + world.dataset["resource"] +
               "\" --store --output-dir " + output_dir)
    shell_execute(command, "%s/predictions" % output_dir)
    test_dn.i_check_create_dn_model(step)
Esempio n. 30
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_a_10', 'data/new_fields2.json',
            u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision',
            '5', '98.21%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self,
                                                    json_file=example[2],
                                                    model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self,
                                              min_nodes=example[5],
                                              max_nodes=example[6],
                                              nodes_step=example[7],
                                              k_fold=example[8],
                                              metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9],
                                             example[11])
Esempio n. 31
0
def setup_for_fusion(step, train=None, output_dir=None):
    train = res_filename(train)
    command = ("bigmler --train \"" +
               train + "\" --store --output-dir " +
               output_dir)
    shell_execute(command, "%s/predictions" % output_dir)
    test_pred.i_check_create_source(step)
    test_pred.i_check_create_dataset(step)
    test_pred.i_check_create_model(step)

    command = ("bigmler deepnet --dataset \"" +
               world.dataset["resource"] + "\" --store --output-dir " +
               output_dir)
    shell_execute(command, "%s/predictions" % output_dir)
    test_dn.i_check_create_dn_model(step)
Esempio n. 32
0
    def test_scenario2(self):
        """
            Scenario: Successfully retraining from a model using sampled dataset
                Given I create a BigML balanced model from "<data>" sampling 50% of data and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I retrain the model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model has doubled its rows
                And I check that the model is balanced

                Examples:
                |data |output_dir  | output_dir_ret
                |../data/iris.csv | ./scenario_rt_2 |./scenario_rt_2b |
        """
        print self.test_scenario2.__doc__
        examples = [['data/iris.csv', 'scenario_rt_2', 'scenario_rt_2b'],
                    [
                        'https://static.bigml.com/csv/iris.csv',
                        'scenario_rt_2c', 'scenario_rt_2d'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model_from_sample(
                self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_pred.i_check_create_model(self)
            test_pred.i_retrain_model(self,
                                      data=example[0],
                                      output_dir=example[2])
            if not example[0].startswith("https"):
                test_pred.i_check_create_source(self)
            execute_steps.i_check_create_execution(self,
                                                   number_of_executions=2)
            test_pred.i_check_create_model_in_execution(self)
            test_pred.i_check_model_double(self)
            test_pred.i_check_model_is_balanced(self)
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv',
            u'scénario1/predictions.csv',
            'check_files/predictions_iris_2fb.csv', u'spécies',
            u'"pétal width"'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(
                self,
                objective=example[5],
                fields=example[6],
                test=example[2],
                output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 34
0
    def test_scenario1(self):
        """
            Scenario 1: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ../data/grades_fields_map.csv | ./scenario_r1_r/predictions.csv | ./check_files/predictions_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/grades.csv', 'data/test_grades.csv',
            'data/grades_fields_map.csv', 'scenario_r1_r/predictions.csv',
            'check_files/predictions_grades.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_map(self,
                                                       data=example[0],
                                                       test=example[1],
                                                       fields_map=example[2],
                                                       output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 35
0
    def test_scenario2(self):
        """
            Scenario: Successfully building a field weighted model
                Given I create a BigML field weighted model from "<data>" using field "<field>" as weight and "<objective>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model uses as weight "<field_id>"

                Examples:
                |data |field | output_dir  | field_id | objective
                |../data/iris_w.csv | weight |./scenario_w_2 | 000005 |000004
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_w.csv', 'weight', 'scenario_w_2', '000005', 'species']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_weighted_field_model(self, data=example[0], field=example[1], output_dir=example[2], objective=example[4])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_weighted_model(self, field=example[3])
Esempio n. 36
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a balanced model
                Given I create a BigML balanced model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model is balanced

                Examples:
                |data |output_dir  |
                |../data/iris.csv | ./scenario_w_1 |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_w_1']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_model_is_balanced(self)
Esempio n. 37
0
    def test_scenario3(self):
        """
            Scenario: Successfully building a objective weighted model
                Given I create a BigML objective weighted model from "<data>" using the objective weights in file "<path>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model uses as objective weights "<weights>"

                Examples:
                |data |path | output_dir  | weights
                |../data/iris.csv | ../data/weights.csv |./scenario_w_3 | [["Iris-setosa",5], ["Iris-versicolor",3]]
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'data/weights.csv', 'scenario_w_3', '[["Iris-setosa",5], ["Iris-versicolor",3]]']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_objective_weighted_model(self, data=example[0], path=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_objective_weighted_model(self, weights=example[3])
Esempio n. 38
0
    def test_scenario3(self):
        """
            Scenario: Successfully building a objective weighted model
                Given I create a BigML objective weighted model from "<data>" using the objective weights in file "<path>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model uses as objective weights "<weights>"

                Examples:
                |data |path | output_dir  | weights
                |../data/iris.csv | ../data/weights.csv |./scenario_w_3 | [["Iris-setosa",5], ["Iris-versicolor",3]]
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'data/weights.csv', 'scenario_w_3', '[["Iris-setosa",5], ["Iris-versicolor",3]]']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_objective_weighted_model(self, data=example[0], path=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_objective_weighted_model(self, weights=example[3])
Esempio n. 39
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a balanced model
                Given I create a BigML balanced model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model is balanced

                Examples:
                |data |output_dir  |
                |../data/iris.csv | ./scenario_w_1 |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_w_1']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_model_is_balanced(self)
Esempio n. 40
0
    def test_scenario2(self):
        """
            Scenario: Successfully building a field weighted model
                Given I create a BigML field weighted model from "<data>" using field "<field>" as weight and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model uses as weight "<field_id>"

                Examples:
                |data |field | output_dir  | field_id
                |../data/iris_w.csv | weight |./scenario_w_2 | 000005
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_w.csv', 'weight', 'scenario_w_2', '000005']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_weighted_field_model(self, data=example[0], field=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_weighted_model(self, field=example[3])
Esempio n. 41
0
    def test_scenario08(self):
        """
        Scenario: Successfully building test predictions from dataset file
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML resources using dataset in file "<dataset_file>" to test "<test>" and log predictions in "<output>"
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | dataset_file        | test                  | output                      |predictions_file         |

        """
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'scenario1/dataset', 'data/test_iris.csv', 'scenario7/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.test_scenario08, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_file(self, dataset_file=example[2], test=example[3], output=example[4])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[5])
Esempio n. 42
0
    def test_scenario11(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                         |predictions_file                        | objective | fields   |

        """
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario11/predictions.csv', 'check_files/predictions_iris_b.csv', '0', '"petal length","petal width"']]
        show_doc(self.test_scenario11, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 43
0
    def test_scenario1(self):
        """
            Scenario: Successfully retraining a balanced model
                Given I create a BigML balanced model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I retrain the model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model has doubled its rows
                And I check that the model is balanced

                Examples:
                |data |output_dir  | output_dir_ret
                |../data/iris.csv | ./scenario_rt_1 |./scenario_rt_1b |
        """
        print self.test_scenario1.__doc__
        examples = [['data/iris.csv', 'scenario_rt_1', 'scenario_rt_1b']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model(self,
                                              data=example[0],
                                              output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_retrain_model(self,
                                      data=example[0],
                                      output_dir=example[2])
            test_pred.i_check_create_source(self)
            execute_steps.i_check_create_execution(self,
                                                   number_of_executions=2)
            test_pred.i_check_create_model_in_execution(self)
            test_pred.i_check_model_double(self)
            test_pred.i_check_model_is_balanced(self)
Esempio n. 44
0
    def test_scenario2(self):
        """
            Scenario: Successfully retraining from a model using sampled dataset
                Given I create a BigML balanced model from "<data>" sampling 50% of data and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I retrain the model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model has doubled its rows
                And I check that the model is balanced

                Examples:
                |data |output_dir  | output_dir_ret
                |../data/iris.csv | ./scenario_rt_2 |./scenario_rt_2b |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'scenario_rt_2', 'scenario_rt_2b'],
            ['https://static.bigml.com/csv/iris.csv', 'scenario_rt_2c',
             'scenario_rt_2d']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model_from_sample(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_pred.i_check_create_model(self)
            test_pred.i_retrain_model(self, data=example[0], output_dir=example[2])
            if not example[0].startswith("https"):
                test_pred.i_check_create_source(self)
            execute_steps.i_check_create_execution(self, number_of_executions=2)
            test_pred.i_check_create_model_in_execution(self)
            test_pred.i_check_model_double(self)
            test_pred.i_check_model_is_balanced(self)
Esempio n. 45
0
    def test_scenario11(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                         |predictions_file                        | objective | fields   |
                | scenario1| {"data": "../data/iris.csv", "output": "./scenario1/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario11/predictions.csv   | ./check_files/predictions_iris_b.csv   | 0         | "petal length","petal width" |

        """
        print self.test_scenario11.__doc__
        examples = [
            ['scenario1', '{"data": "data/iris.csv", "output": "scenario1/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario11/predictions.csv', 'check_files/predictions_iris_b.csv', '0', '"petal length","petal width"']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 46
0
    def test_scenario6(self):
        """
            Scenario: Successfully building evaluations from start and test-split:
                Given I create BigML resources uploading train "<data>" file to evaluate with test-split <split> and log evaluation in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the train dataset has been created
                And I check that the test dataset has been created
                And I check that the model has been created
                And I check that the evaluation has been created
                Then the evaluation key "<key>" value for the model is greater than <value>

                Examples:
                | data             | output                   | split    | key         | value |
                | ../data/iris.csv | ./scenario_e6/evaluation | 0.2      | average_phi | 0.85  |
        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris.csv', 'scenario_e6/evaluation', '0.2', 'average_phi',
            '0.85'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            evaluation.i_create_with_split_to_evaluate(self,
                                                       data=example[0],
                                                       split=example[2],
                                                       output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.i_check_evaluation_key(self,
                                              key=example[3],
                                              value=example[4])
Esempio n. 47
0
    def test_scenario1(self):
        """
            Scenario: Successfully exporting models with params in the available languages:
                Given I create BigML resources uploading train "<data>" file using "<source_attributes>" and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I export the model as a function in "<language>"to "<output>"
                Then the export file is like "<check_file>"

                Examples:
                | data                 | source_attributes             | output                 | language       | check_file
                | ../data/movies.csv   | data/movies_source_attrs.json | ./scenario_exp_1/model | python         | model_function.py

        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/movies.csv', 'data/movies_source_attrs.json',
            'scenario_exp_1_a/model', 'python',
            'check_files/export/model_function.py'
        ],
                    [
                        'data/movies.csv', 'data/movies_source_attrs.json',
                        'scenario_exp_1_b/model', 'javascript',
                        'check_files/export/model_function.js'
                    ],
                    [
                        'data/movies.csv', 'data/movies_source_attrs.json',
                        'scenario_exp_1_c/model', 'r',
                        'check_files/export/model_function.R'
                    ],
                    [
                        'data/iris.csv', '', 'scenario_exp_1_d/model',
                        'tableau', 'check_files/export/model_function.tb'
                    ],
                    [
                        'data/iris.csv', '', 'scenario_exp_1_e/model', 'mysql',
                        'check_files/export/model_function.sql'
                    ],
                    [
                        'data/libros.csv', 'data/libros_source_attrs.json',
                        'scenario_exp_1_f/model', 'python',
                        'check_files/export/model_function_utf8.py'
                    ],
                    [
                        'data/libros.csv', 'data/libros_source_attrs.json',
                        'scenario_exp_1_g/model', 'r',
                        'check_files/export/model_function_utf8.R'
                    ],
                    [
                        'data/libros.csv', 'data/libros_source_attrs.json',
                        'scenario_exp_1_h/model', 'javascript',
                        'check_files/export/model_function_utf8.js'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            export.i_create_all_resources_to_model_with_source_attrs( \
                self, data=example[0], source_attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_model(self)
            export.i_export_model(self, language=example[3], output=example[2])
            export.i_check_if_the_output_is_like_expected_file( \
                self, language=example[3], expected_file=example[4])