def test_scenario2(self): """ Scenario: Successfully building test anomaly score predictions from test split: Given I create BigML resources uploading train "<data>" file to find anomaly scores with test split "<test_split>" remotely and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the train dataset has been created And I check that the dataset has been created from the test file And I check that the batch anomaly scores prediction has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: | data | test_split | output |predictions_file | | ../data/iris.csv | 0.2 | ./scenario_ab_2/anomalies.csv | ./check_files/anomaly_scores_iris.csv | """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', '0.2', 'scenario_ab_2/anomalies.csv', 'check_files/anomaly_scores_iris.csv']] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_all_anomaly_resources_with_test_split(self, data=example[0], test_split=example[1], output=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_pred.i_check_create_dataset(self, suffix='train ') test_pred.i_check_create_dataset(self, suffix='test ') test_batch.i_check_create_batch_anomaly_scores(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[3])
def test_scenario4(self): """ Scenario: Successfully building test anomaly score predictions from training set as a dataset: Given I create BigML resources uploading train "<data>" file to find anomaly scores for the training set remotely saved to dataset with no CSV output and log resources in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the batch anomaly scores prediction has been created Then I check that the batch anomaly scores dataset exists And no local CSV file is created Examples: | data | output_dir | | ../data/iris.csv | ./scenario_ab_4 | """ print self.test_scenario3.__doc__ examples = [ ['data/iris.csv', 'scenario_ab_4']] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_all_anomaly_resources_without_test_split(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_batch.i_check_create_batch_anomaly_scores(self) test_anomaly.i_check_create_batch_anomaly_score_dataset(self) test_anomaly.i_check_no_local_CSV(self)
def test_scenario1(self): """ Scenario: Successfully building test anomaly score predictions from scratch: Given I create BigML resources uploading train "<data>" file to find anomaly scores for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the source has been created from the test file And I check that the dataset has been created from the test file And I check that the batch anomaly scores prediction has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: | data | test | fields_map | output |predictions_file | | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_ab_1_r/anomalies.csv | ./check_files/anomaly_scores_grades.csv | """ print self.test_scenario1.__doc__ examples = [ ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_ab_1_r/anomalies.csv', 'check_files/anomaly_scores_grades.csv']] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_all_anomaly_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_pred.i_check_create_test_source(self) test_pred.i_check_create_test_dataset(self) test_batch.i_check_create_batch_anomaly_scores(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[4])
def setup_scenario1(self): """ Scenario: Successfully building test anomaly scores from scratch: Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: | data | test | output |predictions_file | | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv | """ print self.setup_scenario1.__doc__ examples = [ ['data/tiny_kdd.csv', 'data/test_kdd.csv', 'scenario_an_1/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_all_anomaly_resources(self, data=example[0], test=example[1], output=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[3])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"} | ../data/test_kdd.csv | ./scenario_an_2/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_an_1', '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}', 'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_anomaly.i_create_anomaly_resources_from_source(self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"} | ../data/test_kdd.csv | ./scenario_an_2/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_an_1', '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}', 'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_anomaly.i_create_anomaly_resources_from_source( self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[4])
def test_scenario7(self): """ Scenario: Successfully building anomalous dataset test predictions from anomaly Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created Then I check that the new top anomalies dataset has been created And the top anomalies in the anomaly detector are <top_anomalies> And the forest size in the anomaly detector is <forest_size> And the number of records in the top anomalies dataset is <top_anomalies> Examples: | data | options | output_dir | top_anomalies | forest_size | | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7 | 15 | 40 | """ print self.test_scenario7.__doc__ examples = [ ['data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ', 'scenario_an_7', '15', '40']] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_anomaly_resources_with_options(self, example[0], example[1], output_dir=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_pred.i_check_create_dataset(self, suffix='gen ') test_anomaly.i_check_top_anomalies(self, example[3]) test_anomaly.i_check_forest_size(self, example[4]) test_anomaly.i_check_dataset_lines_number(self, example[3])
def setup_scenario1(self): """ Scenario: Successfully building test anomaly scores from scratch: Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the anomaly scores are ready Then the local anomaly scores file is like "<predictions_file>" Examples: | data | test | output |predictions_file | | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv | """ print self.setup_scenario1.__doc__ examples = [[ 'data/tiny_kdd.csv', 'data/test_kdd.csv', 'scenario_an_1/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv' ]] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_all_anomaly_resources(self, data=example[0], test=example[1], output=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_anomaly.i_check_create_anomaly_scores(self) test_anomaly.i_check_anomaly_scores(self, example[3])
def test_scenario7(self): """ Scenario: Successfully building anomalous dataset test predictions from anomaly Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created Then I check that the new top anomalies dataset has been created And the top anomalies in the anomaly detector are <top_anomalies> And the forest size in the anomaly detector is <forest_size> And the number of records in the top anomalies dataset is <top_anomalies> Examples: | data | options | output_dir | top_anomalies | forest_size | | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7 | 15 | 40 | """ print self.test_scenario7.__doc__ examples = [[ 'data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ', 'scenario_an_7', '15', '40' ]] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_anomaly_resources_with_options( self, example[0], example[1], output_dir=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_pred.i_check_create_dataset(self, suffix='gen ') test_anomaly.i_check_top_anomalies(self, example[3]) test_anomaly.i_check_forest_size(self, example[4]) test_anomaly.i_check_dataset_lines_number(self, example[3])
def test_scenario4(self): """ Scenario: Successfully building test anomaly score predictions from training set as a dataset: Given I create BigML resources uploading train "<data>" file to find anomaly scores for the training set remotely saved to dataset with no CSV output and log resources in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I check that the anomaly detector has been created And I check that the batch anomaly scores prediction has been created Then I check that the batch anomaly scores dataset exists And no local CSV file is created Examples: | data | output_dir | | ../data/iris.csv | ./scenario_ab_4 | """ print self.test_scenario3.__doc__ examples = [['data/iris.csv', 'scenario_ab_4']] for example in examples: print "\nTesting with:\n", example test_anomaly.i_create_all_anomaly_resources_without_test_split( self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_anomaly.i_check_create_anomaly(self) test_batch.i_check_create_batch_anomaly_scores(self) test_anomaly.i_check_create_batch_anomaly_score_dataset(self) test_anomaly.i_check_no_local_CSV(self)