def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the cluster has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | ../data/diabetes.csv | ./scenario_c_2/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario2.__doc__ examples = [ ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_cluster.i_create_cluster_resources_from_source(self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building remote test centroid predictions from scratch to dataset: Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I check that the cluster has been created And I check that the source has been created from the test file And I check that the dataset has been created from the test file And I check that the batch centroid prediction has been created Then I check that the batch centroids dataset exists And no local CSV file is created Examples: | data | test | output_dir | | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 | """ print self.test_scenario2.__doc__ examples = [ ['data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2']] for example in examples: print "\nTesting with:\n", example test_cluster.i_create_all_cluster_resources_to_dataset(self, data=example[0], test=example[1], output_dir=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_pred.i_check_create_test_source(self) test_pred.i_check_create_test_dataset(self) batch_pred.i_check_create_batch_centroid(self) batch_pred.i_check_create_batch_centroids_dataset(self) test_anomaly.i_check_no_local_CSV(self)
def test_scenario1(self): """ Scenario: Successfully building test centroids from scratch: Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the cluster has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: | data | test | output |predictions_file | | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv | | ../data/diabetes.csv | ../data/diabetes.csv | ./scenario_c_1/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario1.__doc__ examples = [ ['data/grades.csv', 'data/grades.csv', 'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv'], ['data/diabetes.csv', 'data/diabetes.csv', 'scenario_c_1/centroids.csv', 'check_files/centroids_diabetes.csv']] for example in examples: print "\nTesting with:\n", example test_cluster.i_create_all_cluster_resources(self, data=example[0], test=example[1], output=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[3])
def test_scenario8(self): """ Scenario: Successfully building a cluster from a sampled dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML cluster with params "<params>" from dataset in "<output_dir>" And I check that the cluster has been created And the cluster params are "<params_json>" Examples: |data |output_dir | params | params_json |../data/iris.csv | ./scenario_d_8 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true} """ print self.test_scenario8.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_8', '--sample-rate 0.2 --replacement', '{"sample_rate": 0.2, "replacement": true}' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_cluster_with_params_from_dataset( \ self, cluster_params=example[2], output_dir=example[1]) test_pred.i_check_create_cluster(self) dataset_adv.i_check_cluster_params(self, params_json=example[3])
def test_scenario1(self): """ Scenario: Successfully building test centroid predictions from scratch: Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the cluster has been created And I check that the source has been created from the test file And I check that the dataset has been created from the test file And I check that the batch centroid prediction has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: | data | test | fields_map | output |predictions_file | | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv | """ print self.test_scenario1.__doc__ examples = [ ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']] for example in examples: print "\nTesting with:\n", example test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_pred.i_check_create_test_source(self) test_pred.i_check_create_test_dataset(self) batch_pred.i_check_create_batch_centroid(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[4])
def test_scenario9(self): """ Scenario: Successfully building test predictions from dataset with summary fields Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML cluster using dataset and summary fields "<summary_fields>" and log resources in "<output_dir>" And I check that the cluster has been created Then the cluster has summary fields "<summary_fields>" Examples: |scenario | kwargs | output-dir |summary_fields | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | scenario_c_9/ | diabetes,age """ print self.test_scenario9.__doc__ examples = [[ 'scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'scenario_c_9', '000008,000007' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_cluster.i_create_cluster_from_dataset_with_summary_fields( self, summary_fields=example[3], output_dir=example[2]) test_pred.i_check_create_cluster(self) test_cluster.i_check_cluster_has_summary_fields(self, example[3])
def test_scenario2(self): """ Scenario: Successfully building test predictions from source Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>" And I check that the dataset has been created And I check that the cluster has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: |scenario | kwargs | test | output |predictions_file | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | ../data/diabetes.csv | ./scenario_c_2/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario2.__doc__ examples = [[ 'scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv' ]] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it( self, example[0], example[1]) test_cluster.i_create_cluster_resources_from_source( self, test=example[2], output=example[3]) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[4])
def test_scenario2(self): """ Scenario: Successfully building remote test centroid predictions from scratch to dataset: Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I check that the cluster has been created And I check that the source has been created from the test file And I check that the dataset has been created from the test file And I check that the batch centroid prediction has been created Then I check that the batch centroids dataset exists And no local CSV file is created Examples: | data | test | output_dir | | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 | """ print self.test_scenario2.__doc__ examples = [[ 'data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2' ]] for example in examples: print "\nTesting with:\n", example test_cluster.i_create_all_cluster_resources_to_dataset( self, data=example[0], test=example[1], output_dir=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_pred.i_check_create_test_source(self) test_pred.i_check_create_test_dataset(self) batch_pred.i_check_create_batch_centroid(self) batch_pred.i_check_create_batch_centroids_dataset(self) test_anomaly.i_check_no_local_CSV(self)
def test_scenario8(self): """ Scenario: Successfully building a cluster from a sampled dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML cluster with params "<params>" from dataset in "<output_dir>" And I check that the cluster has been created And the cluster params are "<params_json>" Examples: |data |output_dir | params | params_json |../data/iris.csv | ./scenario_d_8 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true} """ print self.test_scenario8.__doc__ examples = [ ['data/iris.csv', 'scenario_d_8', '--sample-rate 0.2 --replacement', '{"sample_rate": 0.2, "replacement": true}']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_cluster_with_params_from_dataset( \ self, cluster_params=example[2], output_dir=example[1]) test_pred.i_check_create_cluster(self) dataset_adv.i_check_cluster_params(self, params_json=example[3])
def test_scenario9(self): """ Scenario: Successfully building test predictions from dataset with summary fields Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs> And I create BigML cluster using dataset and summary fields "<summary_fields>" and log resources in "<output_dir>" And I check that the cluster has been created Then the cluster has summary fields "<summary_fields>" Examples: |scenario | kwargs | output-dir |summary_fields | | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"} | scenario_c_9/ | diabetes,age """ print self.test_scenario9.__doc__ examples = [ ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'scenario_c_9', '000008,000007']] for example in examples: print "\nTesting with:\n", example test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1]) test_cluster.i_create_cluster_from_dataset_with_summary_fields(self, summary_fields=example[3], output_dir=example[2]) test_pred.i_check_create_cluster(self) test_cluster.i_check_cluster_has_summary_fields(self, example[3])
def test_scenario1(self): """ Scenario: Successfully building test centroids from scratch: Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>" And I check that the source has been created And I check that the dataset has been created And I check that the cluster has been created And I check that the centroids are ready Then the local centroids file is like "<predictions_file>" Examples: | data | test | output |predictions_file | | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv | | ../data/diabetes.csv | ../data/diabetes.csv | ./scenario_c_1/centroids.csv | ./check_files/centroids_diabetes.csv | """ print self.test_scenario1.__doc__ examples = [[ 'data/grades.csv', 'data/grades.csv', 'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv' ], [ 'data/diabetes.csv', 'data/diabetes.csv', 'scenario_c_1/centroids.csv', 'check_files/centroids_diabetes.csv' ]] for example in examples: print "\nTesting with:\n", example test_cluster.i_create_all_cluster_resources(self, data=example[0], test=example[1], output=example[2]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_check_create_cluster(self) test_cluster.i_check_create_centroids(self) test_pred.i_check_predictions(self, example[3])