Example #1
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new dataset from an existing one
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                Then I check that the new dataset has field "<field>"

                Examples:
                |data |output_dir  |new_fields | field | model_fields
                |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_1', 'data/new_fields.json',
            u'outlier?', u'petal length,outlier?,species'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self,
                                                    json_file=example[2],
                                                    model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            dataset_adv.i_check_dataset_has_field(self, example[3])
Example #2
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Example #3
0
    def test_scenario4(self):
        """
            Scenario: Successfully building feature selection from filtered dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I filter out field "<field>" from dataset and log to "<output_dir>"
                And I check that the new dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                 | field               | objective     |output                    | output_dir | kfold | metric   | selection   | metric_value |
                | ../data/iris_2fd.csv | sepal length        | species         |./scenario_a_6/evaluation |./scenario_a_6 | 2     | recall   | petal width | 100.00%     |
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/iris_2fd.csv', 'sepal length', 'species', 'scenario_a_6/evaluation', 'scenario_a_6', '2', 'recall', 'petal width', '100.00%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            dataset.i_filter_field_from_dataset(self, field=example[1], output_dir=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[5], objective=example[2], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_feature_selection(self, example[7], example[6], example[8])
Example #4
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Example #5
0
    def test_scenario4(self):
        """
            Scenario: Successfully building feature selection from filtered dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I filter out field "<field>" from dataset and log to "<output_dir>"
                And I check that the new dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                 | field               | objective     |output                    | output_dir | kfold | metric   | selection   | metric_value |
                | ../data/iris_2fd.csv | sepal length        | species         |./scenario_a_6/evaluation |./scenario_a_6 | 2     | recall   | petal width | 100.00%     |
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/iris_2fd.csv', 'sepal length', 'species', 'scenario_a_6/evaluation', 'scenario_a_6', '2', 'recall', 'petal width', '100.00%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            dataset.i_filter_field_from_dataset(self, field=example[1], output_dir=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[5], objective=example[2], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_feature_selection(self, example[7], example[6], example[8])
Example #6
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new dataset from an existing one
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                Then I check that the new dataset has field "<field>"

                Examples:
                |data |output_dir  |new_fields | field | model_fields
                |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_1', 'data/new_fields.json', u'outlier?', u'petal length,outlier?,species']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            dataset_adv.i_check_dataset_has_field(self, example[3])