Esempio n. 1
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new dataset from an existing one
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                Then I check that the new dataset has field "<field>"

                Examples:
                |data |output_dir  |new_fields | field | model_fields
                |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_1', 'data/new_fields.json',
            u'outlier?', u'petal length,outlier?,species'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self,
                                                    json_file=example[2],
                                                    model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            dataset_adv.i_check_dataset_has_field(self, example[3])
Esempio n. 2
0
    def test_scenario6(self):
        """
            Scenario 6: Successfully building remote test predictions from scratch to a dataset:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely to a dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                Then I check that the batch predictions dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    | output_dir      |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_r5   |
        """

        print self.test_scenario6.__doc__
        examples = [['data/iris.csv', 'data/test_iris.csv', 'scenario_r5']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_to_dataset(
                self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_batch_pred.i_check_create_batch_predictions_dataset(self)
            anomaly_pred.i_check_no_local_CSV(self)
Esempio n. 3
0
    def test_scenario9(self):
        """
            Scenario: Successfully building random fields analysis from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML random fields analysis with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-random trees have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best random candidates number is "<random_candidates>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | kfold | metric   | random_candidates | metric_value |
                | ../data/iris.csv | ./scenario_a_11/evaluation |2     | precision  | 4               | 96.09%         |
        """
        print self.test_scenario9.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_11/evaluation', '2', 'precision', '4', '96.09%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_random_analysis(self, k_fold=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_random_forest(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_random_candidates(self, example[4], example[3], example[5])
Esempio n. 4
0
    def test_scenario9(self):
        """
            Scenario: Sucessfully deleting resources in a time range and with a tag:
                Given I create a BigML source from file "<data>" storing results in "<output_dir>"
                And I check that the source has been created
                And I store the source id as lower
                And I create a BigML source from file "<data>" with tag "<tag1>" storing results in "<output_dir>"
                And I check that the source exists
                And I create a BigML dataset from the source with tag "<tag1>" storing results in "<output_dir2>"
                And I check that the dataset exists
                And I delete the resources using --newer-than and --all-tag "<tag1>" storing results in "<output_dir3>"
                Then I check that the source doesn't exist
                And I check that the dataset doesn't exist

                Examples:
                | data               | output_dir       | tag1 | output_dir2       | output_dir3
                | ../data/iris.csv   | ./scenario_del_9 | my_tag1 | ./scenario_del_9_2  | ./scenario_del_9_3
        """
        print self.test_scenario9.__doc__
        examples = [["data/iris.csv", "scenario_del_9", "my_tag1", "scenario_del_9_2", "scenario_del_9_3"]]
        for example in examples:
            print "\nTesting with:\n", example
            test_delete.i_create_source_from_file(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_delete.i_store_source_id_as_bound(self, which="lower")
            test_delete.i_create_source_from_file_with_tag(self, data=example[0], tag=example[2], output_dir=example[3])
            test_delete.i_check_source_exists(self)
            test_delete.i_store_source_id_as_bound(self, which="reference")
            test_delete.i_create_dataset_from_source_with_tag(self, tag=example[2], output_dir=example[4])
            test_delete.i_check_dataset_exists(self)
            test_delete.i_delete_resources_newer_and_tag(self, tag=example[2], output_dir=example[1])
            test_delete.i_check_source_does_not_exist(self, source_id=world.source_reference)
            test_delete.i_check_dataset_does_not_exist(self, dataset_id=world.dataset)
            test_delete.i_check_source_exists_by_id(self, source_id=world.source_lower)
    def test_scenario3(self):
        """
            Scenario: Successfully building evaluations from start:
                Given I create BigML resources uploading train "<data>" file to create model and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I evaluate "<test>" with proportional missing strategy
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | test                          | output                      | json_evaluation_file    |
                | ../data/iris.csv | ../data/iris_nulls.csv   | ./scenario_mis_3/evaluation | ./check_files/evaluation_iris_nulls.json |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'data/iris_nulls.csv', 'scenario_mis_3/evaluation', 'check_files/evaluation_iris_nulls.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_model(self, data=example[0], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            evaluation.i_create_proportional_to_evaluate(self, test=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Esempio n. 6
0
    def test_scenario5(self):
        """
            Scenario: Successfully building nodes threshold analysis from dataset file:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML nodes analysis from dataset file from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value
                | ../data/iris.csv | ./scenario_a_4/evaluation | 3         | 14        | 2         |2     | precision  | 9                | 94.71%
        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_4/evaluation', '3', '14', '2', '2', 'precision', '9', '94.71%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_nodes_analysis_from_dataset_file(
                self, min_nodes=example[2], max_nodes=example[3],
                nodes_step=example[4], k_fold=example[5], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_node_threshold(self, example[7], example[6], example[8])
Esempio n. 7
0
    def test_scenario4(self):
        """
            Scenario: Successfully building feature selection from filtered dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I filter out field "<field>" from dataset and log to "<output_dir>"
                And I check that the new dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                 | field               | objective     |output                    | output_dir | kfold | metric   | selection   | metric_value |
                | ../data/iris_2fd.csv | sepal length        | species         |./scenario_a_6/evaluation |./scenario_a_6 | 2     | recall   | petal width | 100.00%     |
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/iris_2fd.csv', 'sepal length', 'species', 'scenario_a_6/evaluation', 'scenario_a_6', '2', 'recall', 'petal width', '100.00%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            dataset.i_filter_field_from_dataset(self, field=example[1], output_dir=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[5], objective=example[2], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_feature_selection(self, example[7], example[6], example[8])
Esempio n. 8
0
    def test_scenario1(self):
        """
            Scenario: Successfully exporting models with params in the available languages:
                Given I create BigML resources uploading train "<data>" file using "<source_attributes>" and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I export the model as a function in "<language>"to "<output>"
                Then the export file is like "<check_file>"

                Examples:
                | data                 | source_attributes             | output                 | language       | check_file
                | ../data/movies.csv   | data/movies_source_attrs.json | ./scenario_exp_1/model | python         | model_function.py

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_a/model', 'python', 'check_files/export/model_function.py'],
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_b/model', 'javascript', 'check_files/export/model_function.js'],
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_c/model', 'r', 'check_files/export/model_function.R'],
            ['data/iris.csv', '', 'scenario_exp_1_d/model', 'tableau', 'check_files/export/model_function.tb'],
            ['data/iris.csv', '', 'scenario_exp_1_e/model', 'mysql', 'check_files/export/model_function.sql'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_f/model', 'python', 'check_files/export/model_function_utf8.py'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_g/model', 'r', 'check_files/export/model_function_utf8.R'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_h/model', 'javascript', 'check_files/export/model_function_utf8.js']]
        for example in examples:
            print "\nTesting with:\n", example
            export.i_create_all_resources_to_model_with_source_attrs( \
                self, data=example[0], source_attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_model(self)
            export.i_export_model(self, language=example[3], output=example[2])
            export.i_check_if_the_output_is_like_expected_file( \
                self, language=example[3], expected_file=example[4])
Esempio n. 9
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with proportional missing strategy:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with proportional missing strategy and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_1/predictions.csv | ./check_files/predictions_iris_nulls.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris_nulls.csv',
            'scenario_mis_1/predictions.csv',
            'check_files/predictions_iris_nulls.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_proportional(self,
                                                          data=example[0],
                                                          test=example[1],
                                                          output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 10
0
    def test_scenario3(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | objective     |output                    | kfold | metric   | selection            | metric_value |
                | ../data/iris_2f.csv | 0             |./scenario_a_5/evaluation | 2     | r_squared| species              | 0.352845     |
                | ../data/iris_2f.csv | 0             |./scenario_a_8/evaluation | 2     | mean_squared_error| species     | 0.475200     |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris_2f.csv', '0', 'scenario_a_5/evaluation', '2', 'r_squared', 'species', '0.352845'],
            ['data/iris_2f.csv', '0', 'scenario_a_8/evaluation', '2', 'mean_squared_error', 'species', '0.475200']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[3], objective=example[1], metric=example[4])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[3])
            test_pred.i_check_feature_selection(self, example[5], example[4], example[6])
Esempio n. 11
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building test anomaly scores from scratch:
                Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data                 | test               | output                           |predictions_file           |
                | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'data/tiny_kdd.csv', 'data/test_kdd.csv',
            'scenario_an_1/anomaly_scores.csv',
            'check_files/anomaly_scores_kdd.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources(self,
                                                        data=example[0],
                                                        test=example[1],
                                                        output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
Esempio n. 12
0
    def test_scenario7(self):
        """
            Scenario: Successfully building anomalous dataset test predictions from anomaly
                Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                Then I check that the new top anomalies dataset has been created
                And the top anomalies in the anomaly detector are <top_anomalies>
                And the forest size in the anomaly detector is <forest_size>
                And the number of records in the top anomalies dataset is <top_anomalies>

                Examples:
                | data               | options                              | output_dir     | top_anomalies | forest_size |
                | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7  | 15            | 40          |

        """
        print self.test_scenario7.__doc__
        examples = [[
            'data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ',
            'scenario_an_7', '15', '40'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_anomaly_resources_with_options(
                self, example[0], example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_top_anomalies(self, example[3])
            test_anomaly.i_check_forest_size(self, example[4])
            test_anomaly.i_check_dataset_lines_number(self, example[3])
Esempio n. 13
0
    def test_scenario01(self):
        """
        Scenario: Successfully building deepnet test predictions from start with no headers:
            Given I create BigML deepnet resources uploading train "<data>" file with no headers to test "<test>" with no headers and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the deepnet model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris_nh.csv   | ../data/test_iris_nh.csv   | ./scenario1_dn_nh/predictions.csv   | ./check_files/predictions_iris_dn.csv   |


        """
        print self.test_scenario01.__doc__
        examples = [[
            'data/iris_nh.csv', 'data/test_iris_nh.csv',
            'scenario1_dn_nh/predictions.csv',
            'check_files/predictions_iris_dn.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dn_pred.i_create_all_dn_resources_with_no_headers(
                self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dn_pred.i_check_create_dn_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 14
0
    def test_scenario1(self):
        """
            Scenario: Successfully generating reports in Gazibit:
                Given I create BigML resources and share them uploading train "<data>" file to evaluate and log evaluation and reports in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created and shared
                And I check that the model has been created and shared
                Then I check that the evaluation has been created and shared
                And I check that the Gazibit report has been created
                And I check that the Gazibit shared report has been created

                Examples:
                | data               | output                      |
                | ../data/iris.csv   | ./scenario_rpt_1/evaluation |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_rpt_1/evaluation']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_evaluate_and_report(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset_shared(self)
            test_pred.i_check_create_model_shared(self)
            test_pred.i_check_create_evaluation_shared(self)
            test_pred.i_check_gazibit_reports(self, shared=None)
            test_pred.i_check_gazibit_reports(self, shared='shared ')
Esempio n. 15
0
    def test_scenario5(self):
        """
            Scenario: Successfully building nodes threshold analysis from dataset file:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML nodes analysis from dataset file from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value
                | ../data/iris.csv | ./scenario_a_4/evaluation | 3         | 14        | 2         |2     | precision  | 9                | 94.71%
        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_4/evaluation', '3', '14', '2', '2', 'precision', '9', '94.71%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_nodes_analysis_from_dataset_file(
                self, min_nodes=example[2], max_nodes=example[3],
                nodes_step=example[4], k_fold=example[5], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_node_threshold(self, example[7], example[6], example[8])
Esempio n. 16
0
    def test_scenario7(self):
        """
            Scenario: Successfully importing fields summary to a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I import fields attributes in file "<summary_file>" to dataset
                Then the field "<field_id>" has "<attribute>" equal to "<attribute_value>"

                Examples:
                |data |output_dir | summary_file | field_id | attribute | attribute_value
                |../data/iris.csv | ./scenario_d_7 | fields_summary_modified.csv |  000000 | name | sepal_length
        """
        print self.test_scenario7.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_7',
            'data/fields_summary_modified.csv', '000000', 'name',
            'sepal_length'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_import_fields(self, summary=example[2])
            dataset_adv.field_attribute_value(self,
                                              field=example[3],
                                              attribute=example[4],
                                              attribute_value=example[5])
Esempio n. 17
0
    def test_scenario6(self):
        """
            Scenario: Successfully building feature selection from dataset excluding features:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations excluding "<features>" with separator "<args_separator>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                    | kfold | features              | args_separator | metric   | selection   | metric_value |
                | ../data/iris.csv | ./scenario_a_7/evaluation | 2     | petal length!sepal width | !              | accuracy | petal width | 95.33%      |
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_7/evaluation', '2', 'petal length!sepal width', '!', 'accuracy', 'petal width', '95.33%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_separator_metric_no_fields(self, k_folds=example[2], features=example[3], args_separator=example[4], metric=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_feature_selection(self, example[6], example[5], example[7])
    def test_scenario2(self):
        """
            Scenario: Successfully building remote test centroid predictions from scratch to dataset:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                Then I check that the batch centroids dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    |  output_dir     |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_to_dataset(self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            batch_pred.i_check_create_batch_centroids_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Esempio n. 19
0
    def test_scenario7(self):
        """
            Scenario: Successfully building feature selection for a category from dataset:
                Given I create BigML dataset uploading train "<data>" file with attributes "<attributes>" in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>" for category "<category>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | attributes | output                    | kfold | metric   | category | selection   | metric_value
                | ../data/spam.csv    | ../data/spam_attributes.json |./scenario_a_9/evaluation | 2     | recall   | spam     | Message     | 58.69%
        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/spam.csv', 'data/spam_attributes.json', 'scenario_a_9/evaluation', '2', 'recall', 'spam', 'Message', '58.69%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset_with_attributes(self, data=example[0], attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric_category(self, k_folds=example[3], metric=example[4], category=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[3])
            test_pred.i_check_feature_selection(self, example[6], example[4], example[7])
Esempio n. 20
0
    def test_scenario3(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | objective     |output                    | kfold | metric   | selection            | metric_value |
                | ../data/iris_2f.csv | 0             |./scenario_a_5/evaluation | 2     | r_squared| species              | 0.352845     |
                | ../data/iris_2f.csv | 0             |./scenario_a_8/evaluation | 2     | mean_squared_error| species     | 0.475200     |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris_2f.csv', '0', 'scenario_a_5/evaluation', '2', 'r_squared', 'species', '0.352845'],
            ['data/iris_2f.csv', '0', 'scenario_a_8/evaluation', '2', 'mean_squared_error', 'species', '0.475200']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[3], objective=example[1], metric=example[4])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[3])
            test_pred.i_check_feature_selection(self, example[5], example[4], example[6])
Esempio n. 21
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Esempio n. 22
0
    def test_scenario7(self):
        """
            Scenario: Successfully building feature selection for a category from dataset:
                Given I create BigML dataset uploading train "<data>" file with attributes "<attributes>" in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>" for category "<category>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | attributes | output                    | kfold | metric   | category | selection   | metric_value
                | ../data/spam.csv    | ../data/spam_attributes.json |./scenario_a_9/evaluation | 2     | recall   | spam     | Message     | 61.24%
        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/spam.csv', 'data/spam_attributes.json', 'scenario_a_9/evaluation', '2', 'recall', 'spam', 'Message', '61.24%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset_with_attributes(self, data=example[0], attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric_category(self, k_folds=example[3], metric=example[4], category=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[3])
            test_pred.i_check_feature_selection(self, example[6], example[4], example[7])
Esempio n. 23
0
    def test_scenario9(self):
        """
            Scenario: Successfully building random fields analysis from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML random fields analysis with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-random trees have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best random candidates number is "<random_candidates>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | kfold | metric   | random_candidates | metric_value |
                | ../data/iris.csv | ./scenario_a_11/evaluation |2     | precision  | 4               | 96.09%         |
        """
        print self.test_scenario9.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_11/evaluation', '2', 'precision', '4', '96.09%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_random_analysis(self, k_fold=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_random_forest(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_random_candidates(self, example[4], example[3], example[5])
Esempio n. 24
0
    def test_scenario2(self):
        """
            Scenario: Successfully building feature selection from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>
                And I generate a report from the output directory
                And a symlink file is generated in the reports directory

                Examples:
                | data                | output                    | kfold | metric   | selection   | metric_value
                | ../data/iris_2f.csv | ./scenario_a_2/evaluation | 2     | accuracy | petal width | 100.00%
                | ../data/iris_2f.csv | ./scenario_a_3/evaluation | 2     | phi      | petal width | 1
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_2f.csv', 'scenario_a_2/evaluation', '2', 'accuracy', 'petal width', '100.00%'],
            ['data/iris_2f.csv', 'scenario_a_3/evaluation', '2', 'phi', 'petal width', '1']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric(self, k_folds=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_feature_selection(self, example[4], example[3], example[5])
            test_pred.i_generate_report(self)
            test_pred.is_symlink(self)
Esempio n. 25
0
    def test_scenario11(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validation with options "<options>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-ensembles have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the predictions file "<predictions_file>" is like "<estimated_file>"

                Examples:
                | data              |output                    | kfold | options   | predictions_file | estimated_file
                | ../data/iris.csv |./scenario_a_14/evaluation | 2     | --exclude-features="species,petal length" --predictions.csv --number-of-models 2| scenario_a_14/kfold2_pred/predictions.csv | check_files/analyze_predictions_iris_e.csv
        """
        print self.test_scenario11.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_14/evaluation', '2', ' --exclude-features="petal length,sepal length" --predictions-csv --number-of-models 2','scenario_a_14/test/kfold2_pred/predictions.csv', 'check_files/analyze_predictions_iris_e.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_options(self, k_folds=example[2], options=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_ensembles(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_predictions_file(self, example[4], example[5])
Esempio n. 26
0
    def test_scenario4(self):
        """
            Scenario: Successfully building a multi-dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from previous source and store logs in "<output_dir>"
                And I check that the dataset has been created
                And I create a multi-dataset from the datasets file and store logs in "<output_dir2>"
                And I check that the multi-dataset has been created
                Then I check that the multi-dataset's origin are the datasets in "<output_dir>"

                Examples:
                |data |output_dir  |output_dir2 |
                |../data/iris.csv | ./scenario_d_4 | ./scenario_d_4a|
        """
        print self.test_scenario4.__doc__
        examples = [['data/iris.csv', 'scenario_d_4', 'scenario_d_4a']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_from_source(self,
                                                     output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_multi_dataset(self, example[2])
            dataset_adv.i_check_create_multi_dataset(self)
            dataset_adv.i_check_multi_dataset_origin(self,
                                                     output_dir=example[1])
Esempio n. 27
0
    def test_scenario1(self):
        """
            Scenario: Successfully building k-fold cross-validation from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML <kfold>-fold cross-validation
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that the <kfold>-fold cross-validation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | output                    | kfold | json_evaluation_file               |
                | ../data/iris.csv | ./scenario_a_1/evaluation | 2     | ./check_files/evaluation_kfold.json |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_1/evaluation', '2', 'check_files/evaluation_kfold.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation(self, k_folds=example[2])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_kfold_cross_validation(self, example[2])
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Esempio n. 28
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building multi-label test predictions from start:
                Given I create BigML multi-label resources tagged as "<tag>" with "<label_separator>" label separator and <number_of_labels> labels uploading train "<data>" file with "<training_separator>" field separator and <number_of_models> models ensembles to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the models in the ensembles have been created
                Then I check that the predictions are ready

                Examples:
                |tag |label_separator |number_of_labels | data                   |training_separator |number_of_models | test                        | output                                   |
                |my_multilabel_1|:|7| ../data/multilabel.csv |,|10| ../data/test_multilabel.csv | ./scenario_mle_1/predictions.csv
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'my_multilabel_1%s' % PY3, ':', '7', 'data/multilabel.csv', ',',
            '10', 'data/test_multilabel.csv', 'scenario_mle_1/predictions.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_all_ml_resources_and_ensembles(
                self,
                tag=example[0],
                label_separator=example[1],
                number_of_labels=example[2],
                data=example[3],
                training_separator=example[4],
                number_of_models=example[5],
                test=example[6],
                output=example[7])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models_in_ensembles(self,
                                                         in_ensemble=True)
            test_pred.i_check_create_predictions(self)
Esempio n. 29
0
    def test_scenario2(self):
        """
            Scenario: Successfully building feature selection from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>
                And I generate a report from the output directory
                And a symlink file is generated in the reports directory

                Examples:
                | data                | output                    | kfold | metric   | selection   | metric_value
                | ../data/iris_2f.csv | ./scenario_a_2/evaluation | 2     | accuracy | petal width | 100.00%
                | ../data/iris_2f.csv | ./scenario_a_3/evaluation | 2     | phi      | petal width | 1
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_2f.csv', 'scenario_a_2/evaluation', '2', 'accuracy', 'petal width', '100.00%'],
            ['data/iris_2f.csv', 'scenario_a_3/evaluation', '2', 'phi', 'petal width', '1']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric(self, k_folds=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_feature_selection(self, example[4], example[3], example[5])
            test_pred.i_generate_report(self)
            test_pred.is_symlink(self)
Esempio n. 30
0
    def test_scenario2(self):
        """
            Scenario: Successfully associating resources to an existing project:
                Given I create a BigML source uploading train "<data>" file and associate it to a new project named "<project>" storing results in "<output_dir>"
                And I check that the project has been created
                And I check that the source has been created
                And I create a BigML source uploading train "<data>" file and associate it to the last created project id storing results in "<output_dir2>"
                Then the source is associated to the project

                Examples:
                | data             | project         | output_dir     | output_dir2
                | ../data/iris.csv | My new project  | ./scenario_p_2 | ./scenario_p_2_1
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', 'My new project', 'scenario_p_2', 'scenario_p_2_1'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_project.i_create_source_with_project(self,
                                                      data=example[0],
                                                      project=example[1],
                                                      output_dir=example[2])
            test_project.i_check_create_project(self)
            test_pred.i_check_create_source(self)
            test_project.i_create_source_with_project_id(self,
                                                         data=example[0],
                                                         output_dir=example[3])
            test_project.check_source_in_project(self)
Esempio n. 31
0
    def test_scenario11(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validation with options "<options>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-ensembles have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the predictions file "<predictions_file>" is like "<estimated_file>"

                Examples:
                | data              |output                    | kfold | options   | predictions_file | estimated_file
                | ../data/iris.csv |./scenario_a_14/evaluation | 2     | --exclude-features="species,petal length" --predictions.csv --number-of-models 2| scenario_a_14/kfold2_pred/predictions.csv | check_files/analyze_predictions_iris_e.csv
        """
        print self.test_scenario11.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_14/evaluation', '2', ' --exclude-features="petal length,sepal length" --predictions-csv --number-of-models 2','scenario_a_14/test/kfold2_pred/predictions.csv', 'check_files/analyze_predictions_iris_e.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_options(self, k_folds=example[2], options=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_ensembles(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_predictions_file(self, example[4], example[5])
Esempio n. 32
0
    def test_scenario6(self):
        """
            Scenario: Successfully exporting fields summary from a dataset
                Given I create a BigML dataset from "<data>" and a summary file "<summary_file>" for its fields and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                Then the expected field "<expected_file>" is like "<summary_file>"

                Examples:
                |data |output_dir | summary_file | expected_file
                |../data/iris.csv | ./scenario_d_6 | fields_summary.csv | check_files/fields_summary.csv
        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_6', 'fields_summary.csv',
            'check_files/fields_summary.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset_with_summary(self,
                                                      data=example[0],
                                                      summary_file=example[2],
                                                      output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_files_equal(self, example[2], example[3])
Esempio n. 33
0
    def test_scenario5(self):
        """
            Scenario: Successfully building evaluation from model and test file with data map
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using test file "<test>" and a fields map "<fields_map>" to evaluate a model and log evaluation in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                |scenario    | kwargs                                                                   | test             | fields_map | output                   | json_evaluation_file     |
                | scenario_e1| {"data": "../data/iris.csv", "output": "./scenario_e1/predictions.csv"}  | ../data/iris_permuted.csv | ../data/fields_map.csv | ./scenario_e7/evaluation | ./check_files/evaluation_iris2.json |
        """
        print self.test_scenario5.__doc__
        examples = [[
            'scenario_e1',
            '{"data": "data/iris.csv", "output": "scenario_e1/predictions.csv"}',
            'data/iris_permuted.csv', 'data/fields_map.csv',
            'scenario_e7/evaluation', 'check_files/evaluation_iris2.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            evaluation.i_create_all_resources_to_evaluate_with_model_and_map(
                self,
                data=example[2],
                fields_map=example[3],
                output=example[4])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[5])
Esempio n. 34
0
    def test_scenario11(self):
        """
            Scenario: Successfully building association from a sampled dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML association with params "<params>" from dataset in "<output_dir>"
                And I check that the association has been created
                And the association params are "<params_json>"

                Examples:
                |data |output_dir | params | params_json
                |../data/iris.csv | ./scenario_d_11 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true}
        """
        print self.test_scenario11.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_11',
            '--sample-rate 0.2 --replacement',
            '{"sample_rate": 0.2, "replacement": true}'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_association_with_params_from_dataset( \
                self, params=example[2], output_dir=example[1])
            test_pred.i_check_create_association(self)
            dataset_adv.i_check_association_params(self,
                                                   params_json=example[3])
Esempio n. 35
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building evaluations from start:
                Given I create BigML resources uploading train "<data>" file to evaluate and log evaluation in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | output                   | json_evaluation_file    |
                | ../data/iris.csv | ./scenario_e1/evaluation | ./check_files/evaluation_iris.json |
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_e1/evaluation',
            'check_files/evaluation_iris.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_evaluate(self,
                                                         data=example[0],
                                                         output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[2])
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroid predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 37
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Esempio n. 38
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroids from scratch:
                Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test               | output                           |predictions_file           |
                | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv |
                | ../data/diabetes.csv   | ../data/diabetes.csv   | ./scenario_c_1/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades.csv', 'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv'],
            ['data/diabetes.csv', 'data/diabetes.csv', 'scenario_c_1/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 39
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with missing-splits model:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with a missing-splits model and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris_missing.csv   | ../data/test_iris_missing.csv   | ./scenario_mspl_1/predictions.csv | ./check_files/predictions_iris_missing.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_missing.csv', 'data/test_iris_missing.csv', 'scenario_mspl_1/predictions.csv', 'check_files/predictions_iris_missing.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_missing_splits(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def test_scenario2(self):
        """
            Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with proportional missing strategy and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the source has been created from the test file
            And I check that the dataset has been created from the test file
            And I check that the batch prediction has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_2/predictions.csv   | ./check_files/predictions_iris_nulls.csv
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris_nulls.csv', 'scenario_mis_2/predictions.csv', 'check_files/predictions_iris_nulls.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_remote_proportional(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 41
0
    def test_scenario7(self):
        """
            Scenario: Successfully building anomalous dataset test predictions from anomaly
                Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                Then I check that the new top anomalies dataset has been created
                And the top anomalies in the anomaly detector are <top_anomalies>
                And the forest size in the anomaly detector is <forest_size>
                And the number of records in the top anomalies dataset is <top_anomalies>

                Examples:
                | data               | options                              | output_dir     | top_anomalies | forest_size |
                | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7  | 15            | 40          |

        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ', 'scenario_an_7', '15', '40']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_anomaly_resources_with_options(self, example[0], example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_top_anomalies(self, example[3])
            test_anomaly.i_check_forest_size(self, example[4])
            test_anomaly.i_check_dataset_lines_number(self, example[3])
Esempio n. 42
0
    def test_scenario4(self):
        """
            Scenario: Successfully building feature selection from filtered dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I filter out field "<field>" from dataset and log to "<output_dir>"
                And I check that the new dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                 | field               | objective     |output                    | output_dir | kfold | metric   | selection   | metric_value |
                | ../data/iris_2fd.csv | sepal length        | species         |./scenario_a_6/evaluation |./scenario_a_6 | 2     | recall   | petal width | 100.00%     |
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/iris_2fd.csv', 'sepal length', 'species', 'scenario_a_6/evaluation', 'scenario_a_6', '2', 'recall', 'petal width', '100.00%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            dataset.i_filter_field_from_dataset(self, field=example[1], output_dir=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[5], objective=example[2], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_feature_selection(self, example[7], example[6], example[8])
Esempio n. 43
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building test anomaly scores from scratch:
                Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data                 | test               | output                           |predictions_file           |
                | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['data/tiny_kdd.csv', 'data/test_kdd.csv', 'scenario_an_1/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
Esempio n. 44
0
    def test_scenario6(self):
        """
            Scenario: Successfully building feature selection from dataset excluding features:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations excluding "<features>" with separator "<args_separator>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                    | kfold | features              | args_separator | metric   | selection   | metric_value |
                | ../data/iris.csv | ./scenario_a_7/evaluation | 2     | petal length!sepal width | !              | accuracy | petal width | 95.33%      |
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_7/evaluation', '2', 'petal length!sepal width', '!', 'accuracy', 'petal width', '95.33%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_separator_metric_no_fields(self, k_folds=example[2], features=example[3], args_separator=example[4], metric=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_feature_selection(self, example[6], example[5], example[7])
Esempio n. 45
0
    def test_scenario4(self):
        """
            Scenario: Successfully building test anomaly score predictions from training set as a dataset:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for the training set remotely saved to dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the batch anomaly scores prediction has been created
                Then I check that the batch anomaly scores dataset exists
                And no local CSV file is created

                Examples:
                | data             | output_dir      |
                | ../data/iris.csv | ./scenario_ab_4 |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'scenario_ab_4']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_without_test_split(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_batch_anomaly_score_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Esempio n. 46
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Esempio n. 47
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test anomaly score predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_ab_1_r/anomalies.csv | ./check_files/anomaly_scores_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_ab_1_r/anomalies.csv', 'check_files/anomaly_scores_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
Esempio n. 48
0
    def test_scenario1(self):
        """
            Scenario: Successfully building k-fold cross-validation from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML <kfold>-fold cross-validation
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that the <kfold>-fold cross-validation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | output                    | kfold | json_evaluation_file               |
                | ../data/iris.csv | ./scenario_a_1/evaluation | 2     | ./check_files/evaluation_kfold.json |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_1/evaluation', '2', 'check_files/evaluation_kfold.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation(self, k_folds=example[2])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_kfold_cross_validation(self, example[2])
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Esempio n. 49
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test anomaly score predictions from test split:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores with test split "<test_split>" remotely and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the train dataset has been created
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data             | test_split | output                 |predictions_file           |
                | ../data/iris.csv | 0.2 | ./scenario_ab_2/anomalies.csv | ./check_files/anomaly_scores_iris.csv |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '0.2', 'scenario_ab_2/anomalies.csv', 'check_files/anomaly_scores_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_test_split(self, data=example[0], test_split=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
Esempio n. 50
0
    def test_scenario6(self):
        """
            Scenario: Sucessfully deleting a source in a time range:
                Given I create a BigML source from file "<data>" storing results in "<output_dir>"
                And I check that the source has been created
                And I store the source id as lower
                And I create a BigML source from file "<data>" storing results in "<output_dir2>"
                And I check that the source exists
                And I store the source id as reference
                And I create a BigML source from file "<data>" storing results in "<output_dir3>"
                And I check that the source has been created
                And I store the source id as upper
                And I delete the source using --older-than and --newer-than storing results in "<output_dir>"
                Then I check that the reference source doesn't exist

                Examples:
                | data               | output_dir | output_dir2 | output_dir3
                | ../data/iris.csv   | ./scenario_del_6 | ./scenario_del_6_2 | ./scenario_del_6_3
        """
        print self.test_scenario6.__doc__
        examples = [["data/iris.csv", "scenario_del_6", "scenario_del_6_2", "scenario_del_6_3"]]
        for example in examples:
            print "\nTesting with:\n", example
            test_delete.i_create_source_from_file(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_delete.i_store_source_id_as_bound(self, which="lower")
            test_delete.i_create_source_from_file(self, data=example[0], output_dir=example[2])
            test_delete.i_check_source_exists(self)
            test_delete.i_store_source_id_as_bound(self, which="reference")
            test_delete.i_create_source_from_file(self, data=example[0], output_dir=example[3])
            test_pred.i_check_create_source(self)
            test_delete.i_store_source_id_as_bound(self, which="upper")
            test_delete.i_delete_source_older_newer(self, output_dir=example[3])
            test_delete.i_check_source_does_not_exist(self, source_id=world.source_reference)
Esempio n. 51
0
    def test_scenario2(self):
        """
            Scenario: Successfully building remote test centroid predictions from scratch to dataset:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                Then I check that the batch centroids dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    |  output_dir     |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 |

        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_to_dataset(
                self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            batch_pred.i_check_create_batch_centroids_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Esempio n. 52
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with a missing-splits model and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                        |predictions_file           |
                | ../data/iris_missing.csv   | ../data/test_iris_missing.csv   | ./scenario_mspl_2/predictions.csv   | ./check_files/predictions_iris_missing.csv
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris_missing.csv', 'data/test_iris_missing.csv',
            'scenario_mspl_2/predictions.csv',
            'check_files/predictions_iris_missing.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_remote_missing_splits(
                self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML logistic regression resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario1_lr/predictions.csv   | ./check_files/predictions_iris_lr.csv   |
        """
        print self.setup_scenario02.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1_lr/predictions.csv', 'check_files/predictions_iris_lr.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            lr_pred.i_create_all_lr_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 54
0
    def test_scenario5(self):
        """
            Scenario: Successfully building a filtered dataset from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML filtered dataset with filter "<filter_exp>" from previous dataset and store logs in "<output_dir>"
                And I check that the dataset has been created
                And the number of records in the dataset is <filtered_records>

                Examples:
                |data |output_dir | filtered_records | filter_exp
                |../data/iris.csv | ./scenario_d_5 | 50 | (= (f "000004") "Iris-setosa")
        """
        print self.test_scenario5.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_5', '50',
            '(= (f "000004") "Iris-setosa")'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_filtered_dataset_from_dataset(
                self, filter_exp=example[3], output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_dataset_lines_number(self, example[2])
Esempio n. 55
0
    def test_scenario6(self):
        """
            Scenario: Successfully extending the multi-label source file:
                Given I create BigML a multi-label source with "<label_separator>" label separator and <number_of_labels> labels from train "<data>" file with "<training_separator>" field separator and "<ml_fields>" as multi-label fields and objective "<objective>" and output in "<output_dir>"
                And I check that the source has been created
                Then I check the extended file "<local_file>" has been created
                And the headers of the local extended file are "<headers>"
                And the first row of the local extended file is "<first_row>"

                Examples:
                |label_separator |number_of_labels | data                   |training_separator | ml_fields | objective | output_dir                        |local_file         | headers | first_row |
                |:|7| ../data/multilabel_multi.csv |,  | type,class | class | ./scenario_mlm_6 | ./scenario_mlm_6/extended_multilabel_multi.csv |color,year,price,first_name,last_name,sex,class,type,class - Adult,class - Child,class - Pensioner,class - Retired,class - Student,class - Teenager,class - Worker,type - A,type - C,type - P,type - R,type - S,type - T,type - W | Blue,1992,"1208,6988040134",John,Higgins,Male,Worker:Adult,W:A:C:S:T:R:P,1,0,0,0,0,0,1,1,1,1,1,1,1,1
                |:|7| ../data/multilabel_multi2.csv |,  | Colors,Movies,Hobbies | Hobbies | ./scenario_mlm_7 | ./scenario_mlm_7/extended_multilabel_multi2.csv |Registration Date,Age Range,Gender,Height,Weight,Points,Colors,Movies,Hobbies,Colors - Black,Colors - Blue,Colors - Green,Colors - Grey,Colors - Orange,Colors - Pink,Colors - Purple,Colors - Red,Colors - White,Colors - Yellow,Movies - Action,Movies - Adventure,Movies - Comedy,Movies - Crime,Movies - Erotica,Movies - Fantasy,Movies - Horror,Movies - Mystery,Movies - Philosophical,Movies - Political,Movies - Romance,Movies - Satire,Movies - Thriller,Hobbies - Barbacue,Hobbies - Books,Hobbies - Chat,Hobbies - Cooking,Hobbies - Dance,Hobbies - Disco,Hobbies - Dolls,Hobbies - Family,Hobbies - Films,Hobbies - Fishing,Hobbies - Friends,Hobbies - Jogging,Hobbies - Music,Hobbies - Soccer,Hobbies - Toys,Hobbies - Travel,Hobbies - Videogames,Hobbies - Walking |2011-02-06,19-30,Female,140,47,11,White:Red,Comedy:Romance,Friends:Music,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
        """
        print self.test_scenario6.__doc__
        examples = [
            [':', '7', 'data/multilabel_multi.csv', ',', 'type,class', 'class', 'scenario_mlm_6', 'scenario_mlm_6/extended_multilabel_multi.csv', 'color,year,price,first_name,last_name,sex,class,type,class - Adult,class - Child,class - Pensioner,class - Retired,class - Student,class - Teenager,class - Worker,type - A,type - C,type - P,type - R,type - S,type - T,type - W', 'Blue,1992,"1208,6988040134",John,Higgins,Male,Worker:Adult,W:A:C:S:T:R:P,1,0,0,0,0,0,1,1,1,1,1,1,1,1'],
            [':', '7', 'data/multilabel_multi2.csv', ',', 'Colors,Movies,Hobbies', 'Hobbies', 'scenario_mlm_7', 'scenario_mlm_7/extended_multilabel_multi2.csv', 'Registration Date,Age Range,Gender,Height,Weight,Points,Colors,Movies,Hobbies,Colors - Black,Colors - Blue,Colors - Green,Colors - Grey,Colors - Orange,Colors - Pink,Colors - Purple,Colors - Red,Colors - White,Colors - Yellow,Movies - Action,Movies - Adventure,Movies - Comedy,Movies - Crime,Movies - Erotica,Movies - Fantasy,Movies - Horror,Movies - Mystery,Movies - Philosophical,Movies - Political,Movies - Romance,Movies - Satire,Movies - Thriller,Hobbies - Barbacue,Hobbies - Books,Hobbies - Chat,Hobbies - Cooking,Hobbies - Dance,Hobbies - Disco,Hobbies - Dolls,Hobbies - Family,Hobbies - Films,Hobbies - Fishing,Hobbies - Friends,Hobbies - Jogging,Hobbies - Music,Hobbies - Soccer,Hobbies - Toys,Hobbies - Travel,Hobbies - Videogames,Hobbies - Walking', '2011-02-06,19-30,Female,140,47,11,White:Red,Comedy:Romance,Friends:Music,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0']
]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_ml_source(self, label_separator=example[0], number_of_labels=example[1], data=example[2], training_separator=example[3], multi_label_fields=example[4], objective=example[5], output_dir=example[6])
            test_pred.i_check_create_source(self)
            ml_pred.i_check_local_file(self, path=example[7])
            ml_pred.i_check_headers_file(self, headers=example[8])
            ml_pred.i_check_first_row_file(self, first_row=example[9])
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML logistic regression resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario1_lr/predictions.csv   | ./check_files/predictions_iris_lr.csv   |
        """
        print self.setup_scenario02.__doc__
        examples = [[
            'data/iris.csv', 'data/test_iris.csv',
            'scenario1_lr/predictions.csv',
            'check_files/predictions_iris_lr.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            lr_pred.i_create_all_lr_resources(self, example[0], example[1],
                                              example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 57
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building multi-label test predictions from start:
                Given I create BigML multi-label resources tagged as "<tag>" with "<label_separator>" label separator and <number_of_labels> labels uploading train "<data>" file with "<training_separator>" field separator and "<ml_fields>" as multi-label fields using model_fields "<model_fields>" and objective "<objective>" to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |tag |label_separator |number_of_labels | data                   |training_separator | ml_fields | model_fields | objective | test                        | output                         |predictions_file           |
                |my_multilabelm_1|:|7| ../data/multilabel_multi.csv |,  | type,class | -type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P | class |../data/test_multilabel.csv | ./scenario_mlm_1/predictions.csv | ./check_files/predictions_ml.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['my_multilabelm_1', ':', '7', 'data/multilabel_multi.csv', ',', 'type,class', '-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P', 'class', 'data/test_multilabel.csv', 'scenario_mlm_1/predictions.csv', 'check_files/predictions_ml.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_all_mlm_resources(self, tag=example[0], label_separator=example[1], number_of_labels=example[2], data=example[3], training_separator=example[4], ml_fields=example[5], model_fields=example[6], objective=example[7], test=example[8], output=example[9])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[10])
Esempio n. 58
0
    def test_scenario2(self):
        """
            Scenario: Successfully building predictions for data streamed to stdin:
                Given I create BigML resources uploading train "<data>" file to test "<test>" read from stdin with name "<name>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                            |predictions_file           | name |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_st_2/predictions.csv   | ./check_files/predictions_iris.csv   | Source name: áéí |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario_st_2/predictions.csv', 'check_files/predictions_iris.csv', 'Source name: áéí']]
        for example in examples:
            print "\nTesting with:\n", example
            stdin.i_create_all_resources_to_test_from_stdin(self, data=example[0], test=example[1], name=example[4], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Esempio n. 59
0
    def test_scenario1(self):
        """
            Scenario 1: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ../data/grades_fields_map.csv | ./scenario_r1_r/predictions.csv | ./check_files/predictions_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'data/grades_fields_map.csv', 'scenario_r1_r/predictions.csv', 'check_files/predictions_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_map(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])