Example #1
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | ../data/diabetes.csv   | ./scenario_c_2/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_cluster.i_create_cluster_resources_from_source(self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario2(self):
        """
            Scenario: Successfully building remote test centroid predictions from scratch to dataset:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                Then I check that the batch centroids dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    |  output_dir     |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_to_dataset(self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            batch_pred.i_check_create_batch_centroids_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Example #3
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroids from scratch:
                Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test               | output                           |predictions_file           |
                | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv |
                | ../data/diabetes.csv   | ../data/diabetes.csv   | ./scenario_c_1/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades.csv', 'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv'],
            ['data/diabetes.csv', 'data/diabetes.csv', 'scenario_c_1/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[3])
Example #4
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a cluster from a sampled dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML cluster with params "<params>" from dataset in "<output_dir>"
                And I check that the cluster has been created
                And the cluster params are "<params_json>"

                Examples:
                |data |output_dir | params | params_json
                |../data/iris.csv | ./scenario_d_8 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true}
        """
        print self.test_scenario8.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_8', '--sample-rate 0.2 --replacement',
            '{"sample_rate": 0.2, "replacement": true}'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_cluster_with_params_from_dataset( \
                self, cluster_params=example[2], output_dir=example[1])
            test_pred.i_check_create_cluster(self)
            dataset_adv.i_check_cluster_params(self, params_json=example[3])
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroid predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario9(self):
        """
            Scenario: Successfully building test predictions from dataset with summary fields
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML cluster using dataset and summary fields "<summary_fields>" and log resources in "<output_dir>"
                And I check that the cluster has been created
                Then the cluster has summary fields "<summary_fields>"

                Examples:
                |scenario    | kwargs                                                  | output-dir                        |summary_fields           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | scenario_c_9/   | diabetes,age

        """
        print self.test_scenario9.__doc__
        examples = [[
            'scenario_c_1',
            '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}',
            'scenario_c_9', '000008,000007'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_cluster.i_create_cluster_from_dataset_with_summary_fields(
                self, summary_fields=example[3], output_dir=example[2])
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_cluster_has_summary_fields(self, example[3])
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | ../data/diabetes.csv   | ./scenario_c_2/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'scenario_c_1',
            '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}',
            'data/diabetes.csv', 'scenario_c_2/centroids.csv',
            'check_files/centroids_diabetes.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_cluster.i_create_cluster_resources_from_source(
                self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Example #8
0
    def test_scenario2(self):
        """
            Scenario: Successfully building remote test centroid predictions from scratch to dataset:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                Then I check that the batch centroids dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    |  output_dir     |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 |

        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_to_dataset(
                self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            batch_pred.i_check_create_batch_centroids_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Example #9
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a cluster from a sampled dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML cluster with params "<params>" from dataset in "<output_dir>"
                And I check that the cluster has been created
                And the cluster params are "<params_json>"

                Examples:
                |data |output_dir | params | params_json
                |../data/iris.csv | ./scenario_d_8 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true}
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_8',
             '--sample-rate 0.2 --replacement',
             '{"sample_rate": 0.2, "replacement": true}']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_cluster_with_params_from_dataset( \
                self, cluster_params=example[2], output_dir=example[1])
            test_pred.i_check_create_cluster(self)
            dataset_adv.i_check_cluster_params(self, params_json=example[3])
Example #10
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroid predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Example #11
0
    def test_scenario9(self):
        """
            Scenario: Successfully building test predictions from dataset with summary fields
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML cluster using dataset and summary fields "<summary_fields>" and log resources in "<output_dir>"
                And I check that the cluster has been created
                Then the cluster has summary fields "<summary_fields>"

                Examples:
                |scenario    | kwargs                                                  | output-dir                        |summary_fields           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | scenario_c_9/   | diabetes,age

        """
        print self.test_scenario9.__doc__
        examples = [
            ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'scenario_c_9', '000008,000007']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_cluster.i_create_cluster_from_dataset_with_summary_fields(self, summary_fields=example[3], output_dir=example[2])
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_cluster_has_summary_fields(self, example[3])
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroids from scratch:
                Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test               | output                           |predictions_file           |
                | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv |
                | ../data/diabetes.csv   | ../data/diabetes.csv   | ./scenario_c_1/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/grades.csv', 'data/grades.csv',
            'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv'
        ],
                    [
                        'data/diabetes.csv', 'data/diabetes.csv',
                        'scenario_c_1/centroids.csv',
                        'check_files/centroids_diabetes.csv'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources(self,
                                                        data=example[0],
                                                        test=example[1],
                                                        output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[3])