def test_scenario2(self): """ Scenario: Successfully updating a dataset with attributes in a JSON file Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I update the dataset using the specs in JSON file "<new_fields>" Then I check that property "<property>" for field id "<field_id>" is "<value>" of type "<type>" Examples: |data |output_dir |new_fields | property | field_id | value | type |../data/iris.csv | ./scenario_d_2 |../data/attributes.json| preferred | 000001 | false | boolean |../data/iris.csv | ./scenario_d_2_b |../data/attributes_col.json| preferred | 000001 | false | boolean """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', 'scenario_d_2', 'data/attributes.json', 'preferred', '000001', 'false', 'boolean'], ['data/iris.csv', 'scenario_d_2_b', 'data/attributes_col.json', 'preferred', '000001', 'false', 'boolean'] ] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_update_dataset_new_properties(self, json_file=example[2]) dataset_adv.i_check_dataset_has_property(self, attribute=example[3], field_id=example[4], value=example[5], type=example[6])
def test_scenario8(self): """ Scenario: Successfully building a new dataset from an existing one and analyzing it Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>" And I check that the new dataset has been created And I check that the model has been created And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>" And I check that the <kfold>-datasets have been created And I check that the <kfold>-models have been created And I check that all the <kfold>-fold cross-validations have been created Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value> Examples: |data |output_dir |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric | node_threshold | metric_value | |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3 | 14 | 2 |2 | precision | 9 | 94.71% | """ print self.test_scenario8.__doc__ examples = [ ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4]) test_pred.i_check_create_new_dataset(self) test_pred.i_check_create_model(self) test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9]) test_pred.i_check_create_kfold_datasets(self, example[8]) test_pred.i_check_create_kfold_models(self, example[8]) test_pred.i_check_create_all_kfold_cross_validations(self, example[8]) test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
def test_scenario2(self): """ Scenario: Successfully building a new sample from a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>" And I check that the sample has been created And the sample JSON is like the one in "<sample_JSON_file>" Examples: |data |output_dir |sample_options | sample_JSON |../data/iris.csv | ./scenario_smp_4 | --stat-field "petal length"| ./check_files/stat_info.json """ print self.test_scenario2.__doc__ examples = [[ 'data/iris.csv', 'scenario_smp_4', '--stat-field "petal length"', 'check_files/stat_info.json' ]] for example in examples: print "\nTesting with:\n", example dataset.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_sample.i_create_sample(self, options=example[2], output_dir=example[1]) test_sample.i_check_create_sample(self) test_sample.i_check_sample_json(self, check_sample_file=example[3])
def test_scenario1(self): """ Scenario: Successfully building a new sample from a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>" And I check that the sample has been created And the sample file is like "<sample_CSV>" Examples: |data |output_dir |sample_options | sample_CSV |../data/iris.csv | ./scenario_smp_1 | --occurrence --sample-header --row-index | ./check_files/sample_iris.csv |../data/iris.csv | ./scenario_smp_2 | --precision 0 --rows 10 --row-offset 10 --unique | ./check_files/sample_iris2.csv |../data/iris.csv | ./scenario_smp_3 | --row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear | ./check_files/sample_iris3.csv """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', 'scenario_smp_1', '--occurrence --sample-header --row-index', 'check_files/sample_iris.csv'], ['data/iris.csv', 'scenario_smp_2', '--precision 0 --rows 10 --row-offset 10 --unique', 'check_files/sample_iris2.csv'], ['data/iris.csv', 'scenario_smp_3', '--row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear', 'check_files/sample_iris3.csv']] for example in examples: print "\nTesting with:\n", example dataset.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_sample.i_create_sample(self, options=example[2], output_dir=example[1]) test_sample.i_check_create_sample(self) test_sample.i_check_sample_file(self, check_sample_file=example[3])
def test_scenario2(self): """ Scenario: Successfully building a new sample from a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>" And I check that the sample has been created And the sample JSON is like the one in "<sample_JSON_file>" Examples: |data |output_dir |sample_options | sample_JSON |../data/iris.csv | ./scenario_smp_4 | --stat-field "petal length"| ./check_files/stat_info.json """ print self.test_scenario2.__doc__ examples = [ ['data/iris.csv', 'scenario_smp_4', '--stat-field "petal length"', 'check_files/stat_info.json']] for example in examples: print "\nTesting with:\n", example dataset.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_sample.i_create_sample(self, options=example[2], output_dir=example[1]) test_sample.i_check_create_sample(self) test_sample.i_check_sample_json(self, check_sample_file=example[3])
def test_scenario1(self): """ Scenario: Successfully building test predictions from dataset specifying objective field and model fields Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>" And I check that the model has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |data | output_dir | test | output |predictions_file | objective | fields | | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv | ./scénario1/predictions.csv | ./check_files/predictions_iris_2fb.csv | spécies | "pétal width" | """ print self.test_scenario1.__doc__ examples = [ ['data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3]) test_pred.i_check_create_model(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario7(self): """ Scenario: Successfully importing fields summary to a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I import fields attributes in file "<summary_file>" to dataset Then the field "<field_id>" has "<attribute>" equal to "<attribute_value>" Examples: |data |output_dir | summary_file | field_id | attribute | attribute_value |../data/iris.csv | ./scenario_d_7 | fields_summary_modified.csv | 000000 | name | sepal_length """ print self.test_scenario7.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_7', 'data/fields_summary_modified.csv', '000000', 'name', 'sepal_length' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_import_fields(self, summary=example[2]) dataset_adv.field_attribute_value(self, field=example[3], attribute=example[4], attribute_value=example[5])
def test_scenario5(self): """ Scenario: Successfully building a filtered dataset from a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML filtered dataset with filter "<filter_exp>" from previous dataset and store logs in "<output_dir>" And I check that the dataset has been created And the number of records in the dataset is <filtered_records> Examples: |data |output_dir | filtered_records | filter_exp |../data/iris.csv | ./scenario_d_5 | 50 | (= (f "000004") "Iris-setosa") """ print self.test_scenario5.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_5', '50', '(= (f "000004") "Iris-setosa")' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_filtered_dataset_from_dataset( self, filter_exp=example[3], output_dir=example[1]) test_pred.i_check_create_dataset(self, suffix='gen ') test_anomaly.i_check_dataset_lines_number(self, example[2])
def test_scenario1(self): """ Scenario: Successfully building a new dataset from an existing one Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>" And I check that the new dataset has been created And I check that the model has been created Then I check that the new dataset has field "<field>" Examples: |data |output_dir |new_fields | field | model_fields |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species """ print self.test_scenario1.__doc__ examples = [ ['data/iris.csv', 'scenario_d_1', 'data/new_fields.json', u'outlier?', u'petal length,outlier?,species']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4]) test_pred.i_check_create_new_dataset(self) test_pred.i_check_create_model(self) dataset_adv.i_check_dataset_has_field(self, example[3])
def test_scenario4(self): """ Scenario: Successfully building a multi-dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML dataset from previous source and store logs in "<output_dir>" And I check that the dataset has been created And I create a multi-dataset from the datasets file and store logs in "<output_dir2>" And I check that the multi-dataset has been created Then I check that the multi-dataset's origin are the datasets in "<output_dir>" Examples: |data |output_dir |output_dir2 | |../data/iris.csv | ./scenario_d_4 | ./scenario_d_4a| """ print self.test_scenario4.__doc__ examples = [ ['data/iris.csv', 'scenario_d_4', 'scenario_d_4a']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_dataset_from_source(self, output_dir=example[1]) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_multi_dataset(self, example[2]) dataset_adv.i_check_create_multi_dataset(self) dataset_adv.i_check_multi_dataset_origin(self, output_dir=example[1])
def test_scenario4(self): """ Scenario: Successfully building a multi-dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML dataset from previous source and store logs in "<output_dir>" And I check that the dataset has been created And I create a multi-dataset from the datasets file and store logs in "<output_dir2>" And I check that the multi-dataset has been created Then I check that the multi-dataset's origin are the datasets in "<output_dir>" Examples: |data |output_dir |output_dir2 | |../data/iris.csv | ./scenario_d_4 | ./scenario_d_4a| """ print self.test_scenario4.__doc__ examples = [['data/iris.csv', 'scenario_d_4', 'scenario_d_4a']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_dataset_from_source(self, output_dir=example[1]) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_multi_dataset(self, example[2]) dataset_adv.i_check_create_multi_dataset(self) dataset_adv.i_check_multi_dataset_origin(self, output_dir=example[1])
def test_scenario11(self): """ Scenario: Successfully building association from a sampled dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML association with params "<params>" from dataset in "<output_dir>" And I check that the association has been created And the association params are "<params_json>" Examples: |data |output_dir | params | params_json |../data/iris.csv | ./scenario_d_11 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true} """ print self.test_scenario11.__doc__ examples = [ ['data/iris.csv', 'scenario_d_11', '--sample-rate 0.2 --replacement', '{"sample_rate": 0.2, "replacement": true}']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_association_with_params_from_dataset( \ self, params=example[2], output_dir=example[1]) test_pred.i_check_create_association(self) dataset_adv.i_check_association_params(self, params_json=example[3])
def test_scenario1(self): """ Scenario: Successfully building a new dataset from an existing one Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>" And I check that the new dataset has been created And I check that the model has been created Then I check that the new dataset has field "<field>" Examples: |data |output_dir |new_fields | field | model_fields |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species """ print self.test_scenario1.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_1', 'data/new_fields.json', u'outlier?', u'petal length,outlier?,species' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4]) test_pred.i_check_create_new_dataset(self) test_pred.i_check_create_model(self) dataset_adv.i_check_dataset_has_field(self, example[3])
def test_scenario5(self): """ Scenario: Successfully building a filtered dataset from a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML filtered dataset with filter "<filter_exp>" from previous dataset and store logs in "<output_dir>" And I check that the dataset has been created And the number of records in the dataset is <filtered_records> Examples: |data |output_dir | filtered_records | filter_exp |../data/iris.csv | ./scenario_d_5 | 50 | (= (f "000004") "Iris-setosa") """ print self.test_scenario5.__doc__ examples = [ ['data/iris.csv', 'scenario_d_5', '50', '(= (f "000004") "Iris-setosa")']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_filtered_dataset_from_dataset(self, filter_exp=example[3], output_dir=example[1]) test_pred.i_check_create_dataset(self, suffix='gen ') test_anomaly.i_check_dataset_lines_number(self, example[2])
def test_scenario11(self): """ Scenario: Successfully building association from a sampled dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML association with params "<params>" from dataset in "<output_dir>" And I check that the association has been created And the association params are "<params_json>" Examples: |data |output_dir | params | params_json |../data/iris.csv | ./scenario_d_11 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true} """ print self.test_scenario11.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_11', '--sample-rate 0.2 --replacement', '{"sample_rate": 0.2, "replacement": true}' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_create_association_with_params_from_dataset( \ self, params=example[2], output_dir=example[1]) test_pred.i_check_create_association(self) dataset_adv.i_check_association_params(self, params_json=example[3])
def test_scenario1(self): """ Scenario: Successfully building a new sample from a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>" And I check that the sample has been created And the sample file is like "<sample_CSV>" Examples: |data |output_dir |sample_options | sample_CSV |../data/iris.csv | ./scenario_smp_1 | --occurrence --sample-header --row-index | ./check_files/sample_iris.csv |../data/iris.csv | ./scenario_smp_2 | --precision 0 --rows 10 --row-offset 10 --unique | ./check_files/sample_iris2.csv |../data/iris.csv | ./scenario_smp_3 | --row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear | ./check_files/sample_iris3.csv """ print self.test_scenario1.__doc__ examples = [ [ 'data/iris.csv', 'scenario_smp_1', '--occurrence --sample-header --row-index', 'check_files/sample_iris.csv' ], [ 'data/iris.csv', 'scenario_smp_2', '--precision 0 --rows 10 --row-offset 10 --unique', 'check_files/sample_iris2.csv' ], [ 'data/iris.csv', 'scenario_smp_3', '--row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear', 'check_files/sample_iris3.csv' ] ] for example in examples: print "\nTesting with:\n", example dataset.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self) test_sample.i_create_sample(self, options=example[2], output_dir=example[1]) test_sample.i_check_create_sample(self) test_sample.i_check_sample_file(self, check_sample_file=example[3])
def test_scenario13(self): """ Scenario: Successfully building dataset using sql transformations Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new dataset joining both datasets and store logs in "<output_dir>" And I check that the dataset has been created And I check that datasets have been joined Examples: |data |output_dir | |../data/iris.csv | ./scenario_d_13 | """ print self.test_scenario12.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_13', "select A.*,B.* from A join B " "on A.\`000000\` = \`B.000000\`", 900 ]] for example in examples: print "\nTesting with:\n", example datasets = [] dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_join(self, output_dir=example[1], sql=example[2]) test_pred.i_check_create_dataset(self, suffix="gen ") dataset_adv.i_check_joined(self, example[3])
def test_scenario3(self): """ Scenario: Successfully exporting a dataset to a CSV file Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I export the dataset to the CSV file "<csv_file>" Then file "<csv_file>" is like file "<data>" Examples: |data |output_dir |csv_file | |../data/iris.csv | ./scenario_d_3 |dataset.csv """ print self.test_scenario3.__doc__ examples = [["data/iris.csv", "scenario_d_3", "dataset.csv"]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_export_the_dataset(self, example[2]) dataset_adv.i_files_equal(self, example[2], example[0])
def test_scenario13(self): """ Scenario: Successfully building dataset using sql transformations Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new dataset joining both datasets and store logs in "<output_dir>" And I check that the dataset has been created And I check that datasets have been joined Examples: |data |output_dir | |../data/iris.csv | ./scenario_d_13 | """ print self.test_scenario12.__doc__ examples = [ ['data/iris.csv', 'scenario_d_13', "select A.*,B.* from A join B " "on A.\`000000\` = \`B.000000\`", 900]] for example in examples: print "\nTesting with:\n", example datasets = [] dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_join(self, output_dir=example[1], sql=example[2]) test_pred.i_check_create_dataset(self, suffix="gen ") dataset_adv.i_check_joined(self, example[3])
def test_scenario1(self): """ Scenario: Successfully building test predictions from dataset specifying objective field and model fields Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>" And I check that the model has been created And I check that the predictions are ready Then the local prediction file is like "<predictions_file>" Examples: |data | output_dir | test | output |predictions_file | objective | fields | | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv | ./scénario1/predictions.csv | ./check_files/predictions_iris_2fb.csv | spécies | "pétal width" | """ print self.test_scenario1.__doc__ examples = [[ 'data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) test_pred.i_create_resources_from_dataset_objective_model( self, objective=example[5], fields=example[6], test=example[2], output=example[3]) test_pred.i_check_create_model(self) test_pred.i_check_create_predictions(self) test_pred.i_check_predictions(self, example[4])
def test_scenario12(self): """ Scenario: Successfully building dataset juxtaposing datasets Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new dataset juxtaposing both datasets and store logs in "<output_dir>" And I check that the dataset has been created And I check that datasets have been juxtaposed Examples: |data |output_dir | |../data/iris.csv | ./scenario_d_12 | """ print self.test_scenario12.__doc__ examples = [['data/iris.csv', 'scenario_d_12']] for example in examples: print "\nTesting with:\n", example datasets = [] dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_juxtaposed(self, output_dir=example[1]) test_pred.i_check_create_dataset(self, suffix="gen ") dataset_adv.i_check_juxtaposed(self, datasets)
def test_scenario2(self): """ Scenario: Successfully updating a dataset with attributes in a JSON file Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I update the dataset using the specs in JSON file "<new_fields>" Then I check that property "<property>" for field id "<field_id>" is "<value>" of type "<type>" Examples: |data |output_dir |new_fields | property | field_id | value | type |../data/iris.csv | ./scenario_d_2 |../data/attributes.json| preferred | 000001 | false | boolean |../data/iris.csv | ./scenario_d_2_b |../data/attributes_col.json| preferred | 000001 | false | boolean """ print self.test_scenario2.__doc__ examples = [[ 'data/iris.csv', 'scenario_d_2', 'data/attributes.json', 'preferred', '000001', 'false', 'boolean' ], [ 'data/iris.csv', 'scenario_d_2_b', 'data/attributes_col.json', 'preferred', '000001', 'false', 'boolean' ]] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_update_dataset_new_properties(self, json_file=example[2]) dataset_adv.i_check_dataset_has_property(self, attribute=example[3], field_id=example[4], value=example[5], type=example[6])
def test_scenario7(self): """ Scenario: Successfully importing fields summary to a dataset Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I import fields attributes in file "<summary_file>" to dataset Then the field "<field_id>" has "<attribute>" equal to "<attribute_value>" Examples: |data |output_dir | summary_file | field_id | attribute | attribute_value |../data/iris.csv | ./scenario_d_7 | fields_summary_modified.csv | 000000 | name | sepal_length """ print self.test_scenario7.__doc__ examples = [ ['data/iris.csv', 'scenario_d_7', 'data/fields_summary_modified.csv', '000000', 'name', 'sepal_length']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_import_fields(self, summary=example[2]) dataset_adv.field_attribute_value(self, field=example[3], attribute=example[4], attribute_value=example[5])
def test_scenario3(self): """ Scenario: Successfully exporting a dataset to a CSV file Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I export the dataset to the CSV file "<csv_file>" Then file "<csv_file>" is like file "<data>" Examples: |data |output_dir |csv_file | |../data/iris.csv | ./scenario_d_3 |dataset.csv """ print self.test_scenario3.__doc__ examples = [ ['data/iris.csv', 'scenario_d_3', 'dataset.csv']] for example in examples: print "\nTesting with:\n", example dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) dataset_adv.i_export_the_dataset(self, example[2]) dataset_adv.i_files_equal(self, example[2], example[0])
def test_scenario12(self): """ Scenario: Successfully building dataset juxtaposing datasets Given I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a BigML dataset from "<data>" and store logs in "<output_dir>" And I check that the source has been created And I check that the dataset has been created And I create a new dataset juxtaposing both datasets and store logs in "<output_dir>" And I check that the dataset has been created And I check that datasets have been juxtaposed Examples: |data |output_dir | |../data/iris.csv | ./scenario_d_12 | """ print self.test_scenario12.__doc__ examples = [ ['data/iris.csv', 'scenario_d_12']] for example in examples: print "\nTesting with:\n", example datasets = [] dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1]) test_pred.i_check_create_source(self) test_pred.i_check_create_dataset(self, suffix=None) datasets.append(world.dataset) dataset_adv.i_create_juxtaposed(self, output_dir=example[1]) test_pred.i_check_create_dataset(self, suffix="gen ") dataset_adv.i_check_juxtaposed(self, datasets)