コード例 #1
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test anomaly score predictions from test split:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores with test split "<test_split>" remotely and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the train dataset has been created
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data             | test_split | output                 |predictions_file           |
                | ../data/iris.csv | 0.2 | ./scenario_ab_2/anomalies.csv | ./check_files/anomaly_scores_iris.csv |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '0.2', 'scenario_ab_2/anomalies.csv', 'check_files/anomaly_scores_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_test_split(self, data=example[0], test_split=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
コード例 #2
0
    def test_scenario4(self):
        """
            Scenario: Successfully building test anomaly score predictions from training set as a dataset:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for the training set remotely saved to dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the batch anomaly scores prediction has been created
                Then I check that the batch anomaly scores dataset exists
                And no local CSV file is created

                Examples:
                | data             | output_dir      |
                | ../data/iris.csv | ./scenario_ab_4 |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'scenario_ab_4']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_without_test_split(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_batch_anomaly_score_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
コード例 #3
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test anomaly score predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_ab_1_r/anomalies.csv | ./check_files/anomaly_scores_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_ab_1_r/anomalies.csv', 'check_files/anomaly_scores_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
コード例 #4
0
ファイル: test_23_anomaly.py プロジェクト: bigmlcom/bigmler
    def setup_scenario1(self):
        """
            Scenario: Successfully building test anomaly scores from scratch:
                Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data                 | test               | output                           |predictions_file           |
                | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['data/tiny_kdd.csv', 'data/test_kdd.csv', 'scenario_an_1/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
コード例 #5
0
ファイル: test_23_anomaly.py プロジェクト: bigmlcom/bigmler
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"}   | ../data/test_kdd.csv   | ./scenario_an_2/anomaly_scores.csv   | ./check_files/anomaly_scores_kdd.csv   |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_an_1', '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}', 'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_anomaly.i_create_anomaly_resources_from_source(self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
コード例 #6
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"}   | ../data/test_kdd.csv   | ./scenario_an_2/anomaly_scores.csv   | ./check_files/anomaly_scores_kdd.csv   |

        """
        print self.test_scenario2.__doc__
        examples = [[
            'scenario_an_1',
            '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}',
            'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv',
            'check_files/anomaly_scores_kdd.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_anomaly.i_create_anomaly_resources_from_source(
                self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
コード例 #7
0
ファイル: test_23_anomaly.py プロジェクト: bigmlcom/bigmler
    def test_scenario7(self):
        """
            Scenario: Successfully building anomalous dataset test predictions from anomaly
                Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                Then I check that the new top anomalies dataset has been created
                And the top anomalies in the anomaly detector are <top_anomalies>
                And the forest size in the anomaly detector is <forest_size>
                And the number of records in the top anomalies dataset is <top_anomalies>

                Examples:
                | data               | options                              | output_dir     | top_anomalies | forest_size |
                | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7  | 15            | 40          |

        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ', 'scenario_an_7', '15', '40']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_anomaly_resources_with_options(self, example[0], example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_top_anomalies(self, example[3])
            test_anomaly.i_check_forest_size(self, example[4])
            test_anomaly.i_check_dataset_lines_number(self, example[3])
コード例 #8
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building test anomaly scores from scratch:
                Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data                 | test               | output                           |predictions_file           |
                | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'data/tiny_kdd.csv', 'data/test_kdd.csv',
            'scenario_an_1/anomaly_scores.csv',
            'check_files/anomaly_scores_kdd.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources(self,
                                                        data=example[0],
                                                        test=example[1],
                                                        output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
コード例 #9
0
    def test_scenario7(self):
        """
            Scenario: Successfully building anomalous dataset test predictions from anomaly
                Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                Then I check that the new top anomalies dataset has been created
                And the top anomalies in the anomaly detector are <top_anomalies>
                And the forest size in the anomaly detector is <forest_size>
                And the number of records in the top anomalies dataset is <top_anomalies>

                Examples:
                | data               | options                              | output_dir     | top_anomalies | forest_size |
                | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7  | 15            | 40          |

        """
        print self.test_scenario7.__doc__
        examples = [[
            'data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ',
            'scenario_an_7', '15', '40'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_anomaly_resources_with_options(
                self, example[0], example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_top_anomalies(self, example[3])
            test_anomaly.i_check_forest_size(self, example[4])
            test_anomaly.i_check_dataset_lines_number(self, example[3])
コード例 #10
0
    def test_scenario4(self):
        """
            Scenario: Successfully building test anomaly score predictions from training set as a dataset:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for the training set remotely saved to dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the batch anomaly scores prediction has been created
                Then I check that the batch anomaly scores dataset exists
                And no local CSV file is created

                Examples:
                | data             | output_dir      |
                | ../data/iris.csv | ./scenario_ab_4 |
        """
        print self.test_scenario3.__doc__
        examples = [['data/iris.csv', 'scenario_ab_4']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_without_test_split(
                self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_batch_anomaly_score_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
コード例 #11
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test anomaly score predictions from test split:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores with test split "<test_split>" remotely and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the train dataset has been created
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data             | test_split | output                 |predictions_file           |
                | ../data/iris.csv | 0.2 | ./scenario_ab_2/anomalies.csv | ./check_files/anomaly_scores_iris.csv |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '0.2', 'scenario_ab_2/anomalies.csv', 'check_files/anomaly_scores_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_test_split(self, data=example[0], test_split=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
コード例 #12
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test anomaly score predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_ab_1_r/anomalies.csv | ./check_files/anomaly_scores_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_ab_1_r/anomalies.csv', 'check_files/anomaly_scores_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])