Beispiel #1
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a model and exporting it:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I export the <"pmml"> model to file "<expected_file>"
                Then I check the model is stored in "<expected_file>" file in <"pmml">

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file         | pmml
                | data/iris.csv          | 10      | 10     | 10     | tmp/model/iris.json   | false
                | data/iris_sp_chars.csv | 10      | 10     | 10     | tmp/model/iris_sp_chars.pmml   | true

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '30', '30', '30', 'tmp/model/iris.json', False],
            ['data/iris_sp_chars.csv', '30', '30', '30', 'tmp/model/iris_sp_chars.pmml', True]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_export_model(self, example[5], example[4])
            model_create.i_check_model_stored(self, example[4], example[5])
    def test_scenario2(self):
        """
            Scenario: Successfully comparing centroids with configuration options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a cluster with options "<options>"
                And I wait until the cluster is ready less than <time_3> secs
                And I create a local cluster
                When I create a centroid for "<data_input>"
                Then the centroid is "<centroid>" with distance "<distance>"
                And I create a local centroid for "<data_input>"
                Then the local centroid is "<centroid>" with distance "<distance>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                            | centroid  | distance | full_data_input
        """
        examples = [
            ['data/iris.csv', '30', '30', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.16436', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}'],
            ['data/iris.csv', '20', '20', '30', '{"default_numeric_value": "zero"}', '{"petal length": 1}', 'Cluster 4', '1.41215', '{"petal length": 1, "petal width": 0, "sepal length": 0, "sepal width": 0, "species": ""}']]
        show_doc(self.test_scenario2, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            cluster_create.i_create_a_cluster_with_options(self, example[4])
            cluster_create.the_cluster_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_cluster(self)
            prediction_create.i_create_a_centroid(self, example[8])
            prediction_create.the_centroid_is_with_distance(self, example[6], example[7])
            prediction_compare.i_create_a_local_centroid(self, example[5])
            prediction_compare.the_local_centroid_is(self, example[6], example[7])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction with a user's project connection:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And the source is in the project
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file_with_project_conn(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            assert world.source['project'] == world.project_id
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            assert world.dataset['project'] == world.project_id
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            assert world.model['project'] == world.project_id
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            assert world.prediction['project'] == world.project_id
Beispiel #4
0
    def test_scenario1(self):
        """

            Scenario: Successfully exporting a dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I download the dataset file to "<local_file>"
                Then file "<local_file>" is like file "<data>"

                Examples:
                | data             | time_1  | time_2 | local_file |
                | ../data/iris.csv | 30      | 30     | ./tmp/exported_iris.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '30', '30', 'tmp/exported_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            dataset_create.i_export_a_dataset(self, example[3])
            dataset_create.files_equal(self, example[3], example[0])
    def test_scenario3(self):
        """
            Scenario: Successfully comparing scores from anomaly detectors:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an anomaly detector
                And I wait until the anomaly detector is ready less than <time_3> secs
                And I create a local anomaly detector
                When I create an anomaly score for "<data_input>"
                Then the anomaly score is "<score>"
                And I create a local anomaly score for "<data_input>"
                Then the local anomaly score is "<score>"

                Examples:
                | data                 | time_1  | time_2 | time_3 | data_input                            | score  |

        """
        examples = [
            ['data/tiny_kdd.csv', '30', '30', '30', '{"000020": 255.0, "000004": 183.0, "000016": 4.0, "000024": 0.04, "000025": 0.01, "000026": 0.0, "000019": 0.25, "000017": 4.0, "000018": 0.25, "00001e": 0.0, "000005": 8654.0, "000009": "0", "000023": 0.01, "00001f": 123.0}', '0.69802']]
        show_doc(self.test_scenario3, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            anomaly_create.i_create_an_anomaly(self)
            anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_anomaly(self)
            prediction_create.i_create_an_anomaly_score(self, example[4])
            prediction_create.the_anomaly_score_is(self, example[5])
            prediction_compare.i_create_a_local_anomaly_score(self, example[4])
            prediction_compare.the_local_anomaly_score_is(self, example[5])
Beispiel #6
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction from a source in a remote location

                Given I create a data source using the url "<url>"
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | url                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | s3://bigml-public/csv/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['s3://bigml-public/csv/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_create_using_url(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
Beispiel #7
0
    def test_scenario1(self):
        """
            Scenario: Successfully creating a model from a dataset list:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I store the dataset id in a list
                And I create a dataset
                And I wait until the dataset is ready less than <time_3> secs
                And I store the dataset id in a list
                Then I create a model from a dataset list
                And I wait until the model is ready less than <time_4> secs
                And I check the model stems from the original dataset list

                Examples:
                | data                | time_1  | time_2 | time_3 |  time_4 |
                | ../data/iris.csv | 10      | 10     | 10     |  10
        """
        print self.test_scenario1.__doc__
        examples = [["data/iris.csv", "10", "10", "10", "10"]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            multimodel_create.i_store_dataset_id(self)
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[3])
            multimodel_create.i_store_dataset_id(self)
            model_create.i_create_a_model_from_dataset_list(self)
            model_create.the_model_is_finished_in_less_than(self, example[4])
            multimodel_create.i_check_model_datasets_and_datasets_ids(self)
Beispiel #8
0
    def test_scenario2(self):
        """
            Scenario 2: Successfully creating Topic Model from a dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create topic model from a dataset
                And I wait until the topic model is ready less than <time_3> secs
                And I update the topic model name to "<topic_model_name>"
                When I wait until the topic_model is ready less than <time_4> secs
                Then the topic model name is "<topic_model_name>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | topic_model_name | params
                | ../data/spam.csv | 100      | 100     | 200     | 500 | my new topic model name | '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}'
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '100', '100', '10000', '500', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[6])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
            topic_create.i_update_topic_model_name(self, example[5])
            topic_create.the_topic_model_is_finished_in_less_than(self, example[4])
            topic_create.i_check_topic_model_name(self, example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a split dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a dataset extracting a <rate> sample
                And I wait until the dataset is ready less than <time_3> secs
                When I compare the datasets' instances
                Then the proportion of instances between datasets is <rate>

                Examples:
                | data                | time_1  | time_2 | time_3 | rate |
                | ../data/iris.csv | 10      | 10     | 10     | 0.8 |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '0.8']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[2])
            dataset_create.i_create_a_split_dataset(self, example[4])
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[3])
            dataset_create.i_compare_datasets_instances(self)
            dataset_create.proportion_datasets_instances(self, example[4])
    def test_scenario2(self):
        """
            Scenario: Successfully creating a batch prediction for an ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble of <number_of_models> models and <tlp> tlp
                And I wait until the ensemble is ready less than <time_3> secs
                When I create a batch prediction for the dataset with the ensemble
                And I wait until the batch prediction is ready less than <time_4> secs
                And I download the created predictions file to "<local_file>"
                Then the batch prediction file is like "<predictions_file>"

                Examples:
                | data             | time_1  | time_2 | number_of_models | tlp | time_3 | time_4 | local_file | predictions_file       |
                | ../data/iris.csv | 30      | 30     | 5                | 1   | 80     | 50     | ./tmp/batch_predictions.csv | ./data/batch_predictions_e.csv |


        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '30', '30', '5', '1', '80', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_e.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self, example[3], example[4])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[5])
            batch_pred_create.i_create_a_batch_prediction_ensemble(self)
            batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[6])
            batch_pred_create.i_download_predictions_file(self, example[7])
            batch_pred_create.i_check_predictions(self, example[8])
Beispiel #11
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a model from a dataset list and predicting with it using median:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local multi model
                When I create a local multimodel batch prediction using median for <input_data>
                Then the local prediction is <prediction>

                Examples:
                | data                | time_1  | time_2 | time_3 |  input_data | prediction
                | ../data/grades.csv | 10      | 10     | 10     |  {'Tutorial': 99.47, 'Midterm': 53.12, 'TakeHome': 87.96} | 50
        """
        print self.test_scenario2.__doc__
        examples = [
            ["data/grades.csv", "10", "10", "10", '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', 50]
        ]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            world.list_of_models = [world.model]
            compare_pred.i_create_a_local_multi_model(self)
            compare_pred.i_create_a_local_mm_median_batch_prediction(self, example[4])
            compare_pred.the_local_prediction_is(self, example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating an evaluation:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create an evaluation for the model with the dataset
                And I wait until the evaluation is ready less than <time_4> secs
                Then the measured "<measure>" is <value>

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | measure       | value  |
                | ../data/iris.csv | 30      | 30     | 30     | 30     | average_phi   | 1      |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '50', '50', '50', '50', 'average_phi', '1']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            evaluation_create.i_create_an_evaluation(self)
            evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4])
            evaluation_create.the_measured_measure_is_value(self, example[5], example[6])
Beispiel #13
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating local association object:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an association from a dataset
                And I wait until the association is ready less than <time_3> secs
                And I create a local association
                When I get the rules for <"item_list">
                Then the first rule is "<JSON_rule>"

                Examples:
                | data             | time_1  | time_2 | time_3 | item_list                              | JSON_rule  |
                | ../data/tiny_mushrooms.csv | 10      | 20     | 50     | ["Edible"]                   | {'p_value': 2.08358e-17, 'confidence': 1, 'lift': 1.12613, 'lhs': [14], 'leverage': 0.07885, 'lhs_cover': [0.704, 176], 'rhs_cover': [0.888, 222], 'rhs': [1], 'support': [0.704, 176], 'rule_id': u'000038'}

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], {'p_value': 5.26971e-31, 'confidence': 1, 'rhs_cover': [0.488, 122], 'leverage': 0.24986, 'rhs': [19], 'rule_id': u'000002', 'lift': 2.04918, 'lhs': [0, 21, 16, 7], 'lhs_cover': [0.488, 122], 'support': [0.488, 122]}]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            association_create.i_create_an_association_from_dataset(self)
            association_create.the_association_is_finished_in_less_than(self, example[3])
            association_create.i_create_a_local_association(self)
            association_create.i_get_rules_for_item_list(self, example[4])
            association_create.the_first_rule_is(self, example[5])
    def test_scenario5(self):
        """
            Scenario: Successfully comparing association sets:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the association is ready less than <time_3> secs
                And I create a local association
                When I create an association set for "<data_input>"
                Then the association set is like the contents of "<association_set_file>"
                And I create a local association set for "<data_input>"
                Then the local association set is like the contents of "<association_set_file>"

        """
        examples = [
            ['data/groceries.csv', '20', '20', '30', '{"fields": {"00000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', 'data/associations/association_set.json', '{"field1": "cat food"}']]
        show_doc(self.test_scenario5, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            association_create.i_create_an_association_from_dataset(self)
            association_create.the_association_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_association(self)
            prediction_create.i_create_an_association_set(self, example[6])
            prediction_compare.the_association_set_is_like_file(self, example[5])
            prediction_compare.i_create_a_local_association_set(self, example[6])
            prediction_compare.the_local_association_set_is_like_file(self, example[5])
    def test_scenario5(self):
        """
            Scenario: Successfully creating a local prediction from an Ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble of <number_of_models> models and <tlp> tlp
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local Ensemble
                When I create a local ensemble prediction using median with confidence for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | number_of_models | tlp   |  data_input    |prediction  |
                | ../data/grades.csv | 10      | 10     | 50     | 2                | 1     | {}             | 67.5 |
        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/grades.csv', '30', '30', '50', '2', '1', '{}', 69.0934]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self, example[4], example[5])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.create_local_ensemble_prediction_using_median_with_confidence(self, example[6])
            compare_pred.the_local_prediction_is(self, example[7])
    def test_scenario4(self):
        """

            Scenario: Successfully creating a source from a batch prediction:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a batch prediction for the dataset with the model
                And I wait until the batch prediction is ready less than <time_4> secs
                Then I create a source from the batch prediction
                And I wait until the source is ready less than <time_1> secs

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 |
                | ../data/iris.csv | 30      | 30     | 50     | 50     |
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/diabetes.csv', '30', '30', '50', '50']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            batch_pred_create.i_create_a_batch_prediction(self)
            batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4])
            batch_pred_create.i_create_a_source_from_batch_prediction(self)
            source_create.the_source_is_finished(self, example[1])
Beispiel #17
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a Fields object and a summary fields file:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a Fields object from the dataset with objective column "<objective_column>"
                And I export a summary fields file "<summary_file>"
                Then I check that the file "<summary_file>" is like "<expected_file>"

                Examples:
                | data                | time_1  | objective_column | summary_file| expected_file | time_2
                | ../data/iris.csv | 10      | 0 | fields_summary.csv | data/fields/fields_summary.csv | 10
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '10', '0', 'fields_summary.csv', 'data/fields/fields_summary.csv', '10']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[5])
            fields_steps.create_fields_from_dataset(self, example[2])
            fields_steps.generate_summary(self, example[3])
            fields_steps.check_summary_like_expected(self, example[3], example[4])
    def test_scenario2(self):
        """
            Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                         |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_missing2_MISSINGS.txt     |

        """
        print self.test_scenario2.__doc__
        examples = [["data/iris_missing2.csv", "10", "10", "10", "data/model/if_then_rules_iris_missing2_MISSINGS.txt"]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with_missing_splits(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
    def test_scenario2(self):
        """
            Scenario: Successfully obtaining parsing error counts:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<params>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                When I ask for the error counts in the fields
                Then the error counts dict is "<error_values>"

                Examples:
                | data                     | time_1  | params                                          | time_2 |error_values       |
                | ../data/iris_missing.csv | 30      | {"fields": {"000000": {"optype": "numeric"}}}   |30      |{"000000": 1}      |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}}', '30', '{"000000": 1}']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[2])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[3])
            dataset_read.i_get_the_errors_values(self)
            dataset_read.i_get_the_properties_values(
                self, 'error counts', example[4])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction in DEV mode:
                Given I want to use api in DEV mode
                When I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And the source has DEV True 
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_read.source_has_dev(self, True)
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a batch prediction:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                When I create a batch prediction for the dataset with the model
                And I wait until the batch prediction is ready less than <time_4> secs
                And I download the created predictions file to "<local_file>"
                Then the batch prediction file is like "<predictions_file>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | local_file | predictions_file       |
                | ../data/iris.csv | 30      | 30     | 50     | 50     | ./tmp/batch_predictions.csv |./data/batch_predictions.csv |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            batch_pred_create.i_create_a_batch_prediction(self)
            batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4])
            batch_pred_create.i_download_predictions_file(self, example[5])
            batch_pred_create.i_check_predictions(self, example[6])
    def test_scenario5(self):
        """
            Scenario: Successfully creating a batch anomaly score from an anomaly detector:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an anomaly detector
                And I wait until the anomaly detector is ready less than <time_3> secs
                When I create a batch anomaly score
                And I check the batch anomaly score is ok
                And I wait until the batch anomaly score is ready less than <time_4> secs
                And I download the created anomaly score file to "<local_file>"
                Then the batch anomaly score file is like "<predictions_file>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | local_file | predictions_file       |
                | ../data/tiny_kdd.csv | 30      | 30     | 50     | 50     | ./tmp/batch_predictions.csv |./data/batch_predictions_a.csv |

        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/tiny_kdd.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_a.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            anomaly_create.i_create_an_anomaly(self)
            anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3])
            batch_pred_create.i_create_a_batch_prediction_with_anomaly(self)
            batch_pred_create.the_batch_anomaly_score_is_finished_in_less_than(self, example[4])
            batch_pred_create.i_download_anomaly_score_file(self, example[5])
            batch_pred_create.i_check_predictions(self, example[6])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a local prediction from an Ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble of <number_of_models> models and <tlp> tlp
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local Ensemble
                When I create a local ensemble prediction with confidence for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"
                And the local probabilities are "<probabilities>"

                Examples:
                | data             | time_1  | time_2 | time_3 | number_of_models | tlp   |  data_input    |prediction  | confidence
                | ../data/iris.csv | 10      | 10     | 50     | 5                | 1     | {"petal width": 0.5} | Iris-versicolor | 0.3687 | [0.3403, 0.4150, 0.2447]
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '50', '5', '1', '{"petal width": 0.5}', 'Iris-versicolor', '0.415', '["0.3403", "0.4150", "0.2447"]' ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self, example[4], example[5])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.create_local_ensemble_prediction_with_confidence(self, example[6])
            compare_pred.the_local_prediction_is(self, example[7])
            compare_pred.the_local_prediction_confidence_is(self, example[8])
            compare_pred.the_local_probabilities_are(self, example[9])
    def test_scenario2(self):
        """

            Scenario: Successfully creating an evaluation for an ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble of <number_of_models> models and <tlp> tlp
                And I wait until the ensemble is ready less than <time_3> secs
                When I create an evaluation for the ensemble with the dataset
                And I wait until the evaluation is ready less than <time_4> secs
                Then the measured "<measure>" is <value>

                Examples:
                | data             | time_1  | time_2 | number_of_models | tlp | time_3 | time_4 | measure       | value  |
                | ../data/iris.csv | 30      | 30     | 5                | 1   | 50     | 30     | average_phi   | 0.98029   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.98029']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self, example[3], example[4])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[5])
            evaluation_create.i_create_an_evaluation_ensemble(self)
            evaluation_create.the_evaluation_is_finished_in_less_than(self, example[6])
            evaluation_create.the_measured_measure_is_value(self, example[7], example[8])
Beispiel #25
0
    def test_scenario3(self):
        """
            Scenario: Successfully creating a Fields object and a modified fields structure from a file:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a Fields object from the dataset with objective column "<objective_column>"
                And I import a summary fields file "<summary_file>" as a fields structure
                Then I check the new field structure has field "<field_id>" as "<optype>"

                Examples:
                | data                | time_1  | objective_column | summary_file| field_id | optype | time_2
                | ../data/iris.csv | 10      | 0 | fields_summary_modified.csv | 000000 | categorical | 10
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', '10', '0', 'data/fields/fields_summary_modified.csv', '000000', 'categorical', '10']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[6])
            fields_steps.create_fields_from_dataset(self, example[2])
            fields_steps.import_summary_file(self, example[3])
            fields_steps.check_field_type(self, example[4], example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating an statistical test from a dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an statistical test from a dataset
                And I wait until the statistical test is ready less than <time_3> secs
                And I update the statistical test name to "<test_name>"
                When I wait until the statistical test is ready less than <time_4> secs
                Then the statistical test name is "<correlation_name>"

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | test_name |
                | ../data/iris.csv | 10      | 10     | 10     | 10 | my new statistical test name |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '10', 'my new statistical test name']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            statistical_tst_create.i_create_a_tst_from_dataset(self)
            statistical_tst_create.the_tst_is_finished_in_less_than(self, example[3])
            statistical_tst_create.i_update_tst_name(self, example[5])
            statistical_tst_create.the_tst_is_finished_in_less_than(self, example[4])
            statistical_tst_create.i_check_tst_name(self, example[5])
Beispiel #27
0
    def test_scenario6(self):
        """
            Scenario: Successfully creating an anomaly score:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an anomaly detector from a dataset
                And I wait until the anomaly detector is ready less than <time_3> secs
                When I create an anomaly score for "<data_input>"
                Then the anomaly score is "<score>"

                Examples:
                | data                 | time_1  | time_2 | time_3 | data_input         | score  |
                | ../data/tiny_kdd.csv | 10      | 10     | 100     | {"src_bytes": 350} | 0.92618 |
                | ../data/iris_sp_chars.csv | 10      | 10     | 100     | {"pétal&width\u0000": 300} | 0.90198 |
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/tiny_kdd.csv', '10', '10', '100', '{"src_bytes": 350}', '0.92846'],
            ['data/iris_sp_chars.csv', '10', '10', '100', '{"pétal&width\u0000": 300}', '0.89313']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            anomaly_create.i_create_an_anomaly(self)
            anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_an_anomaly_score(self, example[4])
            prediction_create.the_anomaly_score_is(self, example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating and reading a public dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I make the dataset public
                And I wait until the dataset is ready less than <time_3> secs
                When I get the dataset status using the dataset's public url
                Then the dataset's status is FINISHED

                Examples:
                | data                | time_1  | time_2 | time_3 |
                | ../data/iris.csv | 10      | 10     | 10     |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            dataset_create.make_the_dataset_public(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[3])
            dataset_create.build_local_dataset_from_public_url(self)
            dataset_create.dataset_status_finished(self)
Beispiel #29
0
    def test_scenario5(self):
        """
            Scenario: Successfully creating a centroid and the associated dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a cluster
                And I wait until the cluster is ready less than <time_3> secs
                When I create a centroid for "<data_input>"
                And I check the centroid is ok
                Then the centroid is "<centroid>"
                And I create a dataset from the cluster and the centroid
                And I wait until the dataset is ready less than <time_2> secs
                And I check that the dataset is created for the cluster and the centroid

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | centroid  |
                | ../data/diabetes.csv | 10      | 20     | 20     | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 3 |
        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/diabetes.csv', '10', '20', '20', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            cluster_create.i_create_a_cluster(self)
            cluster_create.the_cluster_is_finished_in_less_than(self, example[3])
            prediction_create.i_create_a_centroid(self, example[4])
            prediction_create.the_centroid_is(self, example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction using a public model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I make the model public
                And I wait until the model is ready less than <time_3> secs
                And I check the model status using the model's public url
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | objective | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | 000004    | Iris-setosa |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.make_the_model_public(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.model_from_public_url(self)
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
    def test_scenario12(self):
        """
            Scenario: Successfully comparing logistic regression predictions with constant fields:

                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I update the dataset with "<params>"
                And I wait until the dataset is ready less than <time_4> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 |time_4| data_input                                 | prediction  | field_id

        """
        examples = [[
            'data/constant_field.csv', '10', '10', '50', '10',
            '{"a": 1, "b": 1, "c": 1}', 'a',
            '{"fields": {"000000": {"preferred": true}}}'
        ]]
        show_doc(self.test_scenario12, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            dataset_create.i_update_dataset_with(self, example[7])
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[4])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
Beispiel #32
0
    def test_scenario7(self):
        """
            Scenario: Successfully comparing predictions for ensembles with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an esemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence | params
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'],


        """
        examples = [
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}],
            ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}],
            ['data/grades.csv', '10', '10', '50', '{}', '000005', '70.505792', '30.7161', '{"number_of_models": 5}', {}],
            ['data/grades.csv', '10', '10', '50', '{"Midterm": 20}', '000005', '54.82214', '25.89672', '{"number_of_models": 5}', {"operating_kind": "confidence"}],
            ['data/grades.csv', '10', '10', '50', '{"Midterm": 20}', '000005', '45.4573', '29.58403', '{"number_of_models": 5}', {}],
            ['data/grades.csv', '10', '10', '50', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '42.814', '31.51804', '{"number_of_models": 5}', {}]]
        show_doc(self.test_scenario7, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[8])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_proportional_prediction(self, example[4], example[9])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_create.create_local_ensemble_proportional_prediction_with_confidence(self, example[4], example[9])
            prediction_compare.the_local_ensemble_prediction_is(self, example[6])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions for deepnets with operating point:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a deepnet with objective "<objective>" and "<params>"
                And I wait until the deepnet is ready less than <time_3> secs
                And I create a local deepnet
                When I create a prediction with operating point "<operating_point>" for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction with operating point "<operating_point>" for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params | operating_point,


        """
        examples = [[
            'data/iris.csv', '10', '50', '60', '{"petal width": 4}', '000004',
            'Iris-versicolor', '{}', {
                "kind": "probability",
                "threshold": 1,
                "positive_class": "Iris-virginica"
            }
        ]]
        show_doc(self.test_scenario3, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_deepnet_with_objective_and_params(
                self, example[5], example[7])
            model_create.the_deepnet_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_deepnet(self)
            prediction_create.i_create_a_deepnet_prediction_with_op(
                self, example[4], example[8])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_deepnet_prediction_with_op(
                self, example[4], example[8])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario2(self):
        """

            Scenario: Successfully creating a local batch prediction from a multi model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a local multi model
                When I create a batch multimodel prediction for "<data_inputs>"
                Then the predictions are "<predictions>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params             |  tag  |  data_inputs                                | predictions  |
                | ../data/iris.csv | 10      | 10     | 10     | {"tags":["mytag"]} | mytag |  [{"petal width": 0.5}, {"petal length": 6, "petal width": 2}] | ["Iris-setosa", "Iris-virginica"] |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag',
            '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}]',
            '["Iris-setosa", "Iris-virginica"]'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_models(self, example[5])
            compare_pred.i_create_a_local_multi_model(self)
            compare_pred.i_create_a_batch_prediction_from_a_multi_model(
                self, example[6])
            compare_pred.the_batch_mm_predictions_are(self, example[7])
    def test_scenario8(self):
        """
            Scenario: Successfully comparing logistic regression predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regresssion model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                                                                                 | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}         | 'Iris-virginica' |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}             | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}           | Iris-virginica |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 1}                                                                         | Iris-versicolor |
                | ../data/iris_sp_chars.csv | 10      | 10     | 10     | {"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}| Iris-virginica |
                | ../data/price.csv | 10      | 10     | 10     | {"Price": 1200}| Product1 |

        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '50', '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '50', '{"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '50', '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-virginica'],
            ['data/iris.csv', '10', '10', '50', '{"petal length": 1}', 'Iris-virginica'],
            ['data/iris_sp_chars.csv', '10', '10', '50', '{"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}', 'Iris-virginica'],
            ['data/price.csv', '10', '10', '50', '{"Price": 1200}', 'Product2']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[4])
            prediction_create.the_logistic_prediction_is(self, example[5])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[5])
    def test_scenario2(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | objective | prediction  |

        """
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'],
            ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.93064'],
            ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']]
        show_doc(self.test_scenario2, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_prediction(self, example[5])
            prediction_create.the_prediction_is(self, example[6], example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[7])
    def test_scenario15(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data             | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability

        """
        examples = [[
            'data/iris.csv', '20', '20', '180',
            '{"weight_field": "000000", "missing_numerics": false}',
            '{"petal width": 1.5, "petal length": 2, "sepal width":1}',
            'Iris-versicolor', '0.9547', '000004'
        ]]
        show_doc(self.test_scenario15, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(
                self, example[8], example[4])
            model_create.the_logistic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions for ensembles:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble with "<params>"
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  | params

            ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 46.261364, '{"number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-setosa', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],


        """
        examples = [
            ['data/iris_unbalanced.csv', '10', '10', '120', '{"petal width": 4}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'],
            ['data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 61.61036, '{"boosting": {"iterations": 5}, "number_of_models": 5}']]
        show_doc(self.test_scenario6, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble_with_params(self, example[7])
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_ensemble_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a batch prediction from a multi model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a local multi model
                When I create a batch prediction for "<data_input>" and save it in "<path>"
                And I combine the votes in "<path>"
                Then the plurality combined predictions are "<predictions>"
                And the confidence weighted predictions are "<predictions>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params                         |  tag  |  data_input    | path | predictions  |
                | ../data/iris.csv | 10      | 10     | 10     | {"tags":["mytag"]} | mytag |  [{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}]  | ./tmp | ["Iris-setosa", "Iris-virginica", "Iris-versicolor"] |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}]', './tmp', '["Iris-setosa", "Iris-virginica", "Iris-versicolor"]']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_models(self, example[5])
            compare_pred.i_create_a_local_multi_model(self)
            compare_pred.i_create_a_batch_prediction(self, example[6], example[7])
            compare_pred.i_combine_the_votes(self, example[7])
            compare_pred.the_plurality_combined_prediction(self, example[8])
            compare_pred.the_confidence_weighted_prediction(self, example[8])
Beispiel #40
0
 def test_scenario10(self):
     """
         Scenario 10: Successfully creating a local fusion from an exported file:
             Given I create a data source uploading a "<data>" file
             And I wait until the source is ready less than <time_1> secs
             And I create a dataset
             And I wait until the dataset is ready less than <time_2> secs
             And I create a model with "<params>"
             And I wait until the model is ready less than <time_3> secs
             And I create a model with "<params>"
             And I wait until the model is ready less than <time_3> secs
             And I create a model with "<params>"
             And I wait until the model is ready less than <time_3> secs
             And I retrieve a list of remote models tagged with "<tag>"
             And I create a fusion from a list of models
             And I wait until the fusion is ready less than <time_3> secs
             And I export the fusion to "<exported_file>"
             When I create a local fusion from the file "<exported_file>"
             Then the fusion ID and the local fusion ID match
             Examples:
             | data                | time_1  | time_2 | time_3 | exported_file | params | tag
             | ../data/iris.csv | 10      | 10     | 50 | ./tmp/fusion.json
     """
     print self.test_scenario10.__doc__
     examples = [
         ['data/iris.csv', '10', '10', '50', './tmp/fusion.json', 'my_fusion_tag']]
     for example in examples:
         print "\nTesting with:\n", example
         tag = "%s_%s" % (example[5], PY3)
         tag_args = '{"tags":["%s"]}' % tag
         source_create.i_upload_a_file(self, example[0])
         source_create.the_source_is_finished(self, example[1])
         dataset_create.i_create_a_dataset(self)
         dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
         model_create.i_create_a_model_with(self, tag_args)
         model_create.the_model_is_finished_in_less_than(self, example[3])
         model_create.i_create_a_model_with(self, tag_args)
         model_create.the_model_is_finished_in_less_than(self, example[3])
         model_create.i_create_a_model_with(self, tag_args)
         model_create.the_model_is_finished_in_less_than(self, example[3])
         prediction_compare.i_retrieve_a_list_of_remote_models(self, tag)
         model_create.i_create_a_fusion(self)
         model_create.the_fusion_is_finished_in_less_than(self, example[3])
         model_create.i_export_fusion(self, example[4])
         model_create.i_create_local_fusion_from_file(self, example[4])
         model_create.check_fusion_id_local_id(self)
Beispiel #41
0
    def test_scenario1(self):
        """

            Scenario: Successfully creating an anomaly detector from a dataset and a dataset list:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                Then I create an anomaly detector from a dataset
                And I wait until the anomaly detector is ready less than <time_4> secs
                And I check the anomaly detector stems from the original dataset
                And I store the dataset id in a list
                And I create a dataset
                And I wait until the dataset is ready less than <time_3> secs
                And I store the dataset id in a list
                Then I create an anomaly detector from a dataset list
                And I wait until the anomaly detector is ready less than <time_4> secs
                And I check the anomaly detector stems from the original dataset list

                Examples:
                | data                 | time_1  | time_2 | time_3 |  time_4 | 
                | ../data/tiny_kdd.csv | 40      | 40     | 40     |  100
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/tiny_kdd.csv', '40', '40', '40', '100']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[2])
            anomaly_create.i_create_an_anomaly_from_dataset(self)
            anomaly_create.the_anomaly_is_finished_in_less_than(self,
                                                                example[4])
            anomaly_create.i_check_anomaly_dataset_and_datasets_ids(self)
            mm_create.i_store_dataset_id(self)
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self,
                                                                example[3])
            mm_create.i_store_dataset_id(self)
            anomaly_create.i_create_an_anomaly_from_dataset_list(self)
            anomaly_create.the_anomaly_is_finished_in_less_than(self,
                                                                example[4])
            anomaly_create.i_check_anomaly_datasets_and_datasets_ids(self)
Beispiel #42
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating local association object:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an association from a dataset
                And I wait until the association is ready less than <time_3> secs
                And I create a local association
                When I get the rules for <"item_list">
                Then the first rule is "<JSON_rule>"

                Examples:
                | data             | time_1  | time_2 | time_3 | item_list                              | JSON_rule  |
                | ../data/tiny_mushrooms.csv | 10      | 20     | 50     | ["Edible"]                   | {'p_value': 2.08358e-17, 'confidence': 1, 'lift': 1.12613, 'lhs': [14], 'leverage': 0.07885, 'lhs_cover': [0.704, 176], 'rhs_cover': [0.888, 222], 'rhs': [1], 'support': [0.704, 176], 'rule_id': u'000038'}

        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], {
                'p_value': 5.26971e-31,
                'confidence': 1,
                'rhs_cover': [0.488, 122],
                'leverage': 0.24986,
                'rhs': [19],
                'rule_id': u'000002',
                'lift': 2.04918,
                'lhs': [0, 21, 16, 7],
                'lhs_cover': [0.488, 122],
                'support': [0.488, 122]
            }
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            association_create.i_create_an_association_from_dataset(self)
            association_create.the_association_is_finished_in_less_than(
                self, example[3])
            association_create.i_create_a_local_association(self)
            association_create.i_get_rules_for_item_list(self, example[4])
            association_create.the_first_rule_is(self, example[5])
Beispiel #43
0
    def test_scenario2(self):
        """
            Scenario: Successfully creating a prediction using a shared model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I make the model shared
                And I wait until the model is ready less than <time_3> secs
                And I get the model sharing info
                And I check the model status using the model's shared url
                And I check the model status using the model's shared key
                And I create a local model
                When I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"


                Examples:
                | data                | time_1  | time_2 | time_3 | data_input    | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"petal width": 0.5} | Iris-setosa |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{"petal width": 0.5}',
            'Iris-setosa'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.make_the_model_shared(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.get_sharing_info(self)
            model_create.model_from_shared_url(self)
            model_create.model_from_shared_key(self)
            compare_pred.i_create_a_local_model(self)
            compare_pred.i_create_a_local_prediction(self, example[4])
            compare_pred.the_local_prediction_is(self, example[5])
    def test_scenario2(self):
        """

            Scenario: Successfully obtaining field importance from an Ensemble:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<parms1>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<parms2>"
                And I wait until the model is ready less than <time_4> secs
                And I create a model with "<parms3>"
                And I wait until the model is ready less than <time_5> secs
                When I create a local Ensemble with the last <number_of_models> models
                Then the field importance text is <field_importance>

                Examples:
                | data             | time_1  | time_2 |parms1 | time_3 |parms2 | time_4 |parms3| time_5 |number_of_models |field_importance
                | ../data/iris.csv | 10      | 10     |{"input_fields": ["000000", "000001","000003", "000004"]} |20      |{"input_fields": ["000000", "000001","000002", "000004"]} | 20     |{"input_fields": ["000000", "000001","000002", "000003", "000004"]} | 20   | 3 |[["000002", 0.5269933333333333], ["000003", 0.38936], ["000000", 0.04662333333333333], ["000001", 0.037026666666666666]]
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', '10', '10',
            '{"input_fields": ["000000", "000001","000003", "000004"]}', '20',
            '{"input_fields": ["000000", "000001","000002", "000004"]}', '20',
            '{"input_fields": ["000000", "000001","000002", "000003", "000004"]}',
            '20', '3',
            '[["000002", 0.5269933333333333], ["000003", 0.38936], ["000000", 0.04662333333333333], ["000001", 0.037026666666666666]]'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with(self, example[3])
            model_create.the_model_is_finished_in_less_than(self, example[4])
            model_create.i_create_a_model_with(self, example[5])
            model_create.the_model_is_finished_in_less_than(self, example[6])
            model_create.i_create_a_model_with(self, example[7])
            model_create.the_model_is_finished_in_less_than(self, example[8])
            ensemble_create.create_local_ensemble_with_list(self, example[9])
            ensemble_create.field_importance_print(self, example[10])
    def test_scenario5(self):
        """
            Scenario: Successfully comparing projections for PCAs:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a PCA with "<params>"
                And I wait until the PCA is ready less than <time_3> secs
                And I create a local PCA
                When I create a projection for "<input_data>"
                Then the projection is "<projection>"
                And I create a local projection for "<data_input>"
                Then the local projection is "<projection>"

                Examples:
                | data             | time_1  | time_2 | time_3 | input_data  | projection | params


        """
        examples = [
            ['data/iris.csv', '30', '30', '120', '{}',
             '{"PC2": 0, "PC3": 0, "PC1": 0, "PC6": 0, "PC4": 5e-05, "PC5": 0}', '{}'],
            ['data/iris.csv', '30', '30', '120', '{"petal length": 1}',
             '{"PC2": 0.08708, "PC3": 0.20929, "PC1": 1.56084, "PC6": -1.34463, "PC4": 0.7295, "PC5": -1.00876}', '{}'],
            ['data/iris.csv', '30', '30', '120', '{"species": "Iris-versicolor"}',
             '{"PC2": 1.8602, "PC3": -2.00864, "PC1": -0.61116, "PC6": -0.66983, "PC4": -2.44618, "PC5": 0.43414}', '{}'],
            ['data/iris.csv', '30', '30', '120', '{"petal length": 1, "sepal length": 0, "petal width": 0, "sepal width": 0, "species": "Iris-versicolor"}',
             '{"PC2": 7.18009, "PC3": 6.51511, "PC1": 2.78155, "PC6": 0.21372, "PC4": -1.94865, "PC5": 0.57646}', '{}']]
        show_doc(self.test_scenario5, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            pca_create.i_create_a_pca_with_params(self, example[6])
            pca_create.the_pca_is_finished_in_less_than(self, example[3])
            compare_predictions.create_local_pca(self)
            projection_create.i_create_a_projection(self, example[4])
            projection_create.the_projection_is(self, example[5])
            compare_predictions.i_create_a_local_projection(self, example[4])
            compare_predictions.the_local_projection_is(self, example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a prediction from a multi model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I create a model with "<params>"
                And I wait until the model is ready less than <time_3> secs
                And I retrieve a list of remote models tagged with "<tag>"
                And I create a local multi model
                When I create a local prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | params                         |  tag  |  data_input    | prediction  |
                | ../data/iris.csv | 10      | 10     | 10     | {"tags":["mytag"]} | mytag |  {"petal width": 0.5}     | Iris-setosa |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag',
            '{"petal width": 0.5}', 'Iris-setosa'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_create_a_model_with(self, example[4])
            model_create.the_model_is_finished_in_less_than(self, example[3])
            compare_pred.i_retrieve_a_list_of_remote_models(self, example[5])
            compare_pred.i_create_a_local_multi_model(self)
            compare_pred.i_create_a_local_prediction(self, example[6])
            compare_pred.the_local_prediction_is(self, example[7])
    def test_scenario3(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |
                | ../data/iris.csv   | 10      | 10     | 10     | {}                   | 000004    | Iris-setosa    | 0.2629     |
                | ../data/grades.csv | 10      | 10     | 10     | {}                   | 000005    | 68.62224       | 27.5358    |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20}      | 000005    | 46.69889      | 37.27594297134128   |
                | ../data/grades.csv | 10      | 10     | 10     | {"Midterm": 20, "Tutorial": 90, "TakeHome": 100}     | 000005    | 28.06      | 24.86634   |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.2629'],
            ['data/grades.csv', '10', '10', '10', '{}', '000005', '68.62224', '27.5358'],
            ['data/grades.csv', '10', '10', '10', '{"Midterm": 20}', '000005', '46.69889', '37.27594297134128'],
            ['data/grades.csv', '10', '10', '10', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '24.86634']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

                Examples:
                | data             | time_1  | time_2 | objective | time_3 | options | data_input                             | prediction  | probability
                | ../data/spam.csv | 20      | 20     | 000002 | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Message": "A normal message"}       | ham     | 0.7645

        """
        print self.test_scenario10.__doc__
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "A normal message"}', 'ham', 0.7645, "000000"],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"Message": "mobile"}', 'spam', 0.7175, "000000"],
            ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', '25-34', '0.4135', '000002']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective(self, example[8])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario3(self):
        """
            Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with "<options>" waiting less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | options  |   expected_file                                        |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_1.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} | data/model/if_then_rules_spam_textanalysis_2.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_3.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_4.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_5.txt              |
                | data/spam.csv          | 20      | 20     | 30     | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_6.txt              |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}','data/model/if_then_rules_spam_textanalysis_1.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', 'data/model/if_then_rules_spam_textanalysis_2.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_3.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_4.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_5.txt'],
            ['data/spam.csv', '10', '10', '10', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_6.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[5])
    def test_scenario1(self):
        """
            Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                And I translate the tree into IF_THEN rules
                Then I check the output is like "<expected_file>" expected file

                Examples:
                | data                   | time_1  | time_2 | time_3 | expected_file                                        |
                | data/iris.csv          | 10      | 10     | 10     | data/model/if_then_rules_iris.txt              |
                | data/iris_sp_chars.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_sp_chars.txt     |
                | data/spam.csv          | 20      | 20     | 30     | data/model/if_then_rules_spam.txt              |
                | data/grades.csv        | 10      | 10     | 10     | data/model/if_then_rules_grades.txt            |
                | data/diabetes.csv      | 20      | 20     | 30     | data/model/if_then_rules_diabetes.txt          |
                | data/iris_missing2.csv | 10      | 10     | 10     | data/model/if_then_rules_iris_missing2.txt     |
                | data/tiny_kdd.csv      | 20      | 20     | 30     | data/model/if_then_rules_tiny_kdd.txt          |

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', '10', '10', '10', 'data/model/if_then_rules_iris.txt'],
            ['data/iris_sp_chars.csv', '10', '10', '10', 'data/model/if_then_rules_iris_sp_chars.txt'],
            ['data/spam.csv', '10', '10', '10', 'data/model/if_then_rules_spam.txt'],
            ['data/grades.csv', '10', '10', '10', 'data/model/if_then_rules_grades.txt'],
            ['data/diabetes.csv', '10', '10', '10', 'data/model/if_then_rules_diabetes.txt'],
            ['data/iris_missing2.csv', '10', '10', '10', 'data/model/if_then_rules_iris_missing2.txt'],
            ['data/tiny_kdd.csv', '10', '10', '10', 'data/model/if_then_rules_tiny_kdd.txt']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            inspect_model.i_translate_the_tree_into_IF_THEN_rules(self)
            inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4])
    def test_scenario6(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                             | prediction  |

        """
        examples = [
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'ham'],
            ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', 'ham']]
        show_doc(self.test_scenario6, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model(self)
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario10(self):
        """
            Scenario: Successfully comparing predictions with proportional missing strategy and balanced models:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a balanced model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a proportional missing strategy prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And the confidence for the prediction is "<confidence>"
                And I create a proportional missing strategy local prediction for "<data_input>"
                Then the local prediction is "<prediction>"
                And the local prediction's confidence is "<confidence>"
                And I create local probabilities for "<data_input>"
                Then the local probabilities are "<probabilities>"

                Examples:
                | data               | time_1  | time_2 | time_3 | data_input           | objective | prediction     | confidence |

        """
        examples = [
            ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'],
            ['data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]']]
        show_doc(self.test_scenario10, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_balanced_model(self)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_model(self)
            prediction_create.i_create_a_proportional_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_proportional_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_create.the_confidence_is(self, example[7])
            prediction_compare.the_local_prediction_confidence_is(self, example[7])
            prediction_compare.i_create_local_probabilities(self, example[4])
            prediction_compare.the_local_probabilities_are(self, example[8])
    def test_scenario13(self):
        """
            Scenario: Successfully comparing predictions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the model is ready less than <time_3> secs
                And I create a local model
                When I create a prediction for "<data_input>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local prediction for "<data_input>"
                Then the local prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | objective | prediction  |

        """
        examples = [
            ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', "tmp/my_model.json", "my_test"],
            ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"],
            ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"]]
        show_doc(self.test_scenario13, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            args = '{"tags": ["%s"]}' % example[8]
            model_create.i_create_a_model_with(self, data=args)
            model_create.the_model_is_finished_in_less_than(self, example[3])
            model_create.i_export_model(self, False, example[7]) # no pmml
            prediction_compare.i_create_a_local_model_from_file(self, example[7])
            prediction_create.i_create_a_prediction(self, example[4])
            prediction_create.the_prediction_is(self, example[5], example[6])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
            model_create.i_export_tags_model(self, example[7], example[8])
            prediction_compare.i_create_a_local_model_from_file(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[4])
            prediction_compare.the_local_prediction_is(self, example[6])
    def test_scenario3(self):
        """
            Scenario: Successfully getting the closest point in a cluster:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a cluster
                And I wait until the cluster is ready less than <time_3> secs
                And I create a local cluster
                Then the data point in the cluster closest to "<reference>" is "<closest>"

                Examples:
                | data | time_1  | time_2 | time_3 | reference | closest |

        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/iris.csv', '10', '10', '40',
            '{"petal length": 1.4, "petal width": 0.2,'
            ' "sepal width": 3.0, "sepal length": 4.89,'
            ' "species": "Iris-setosa"}',
            '{"distance": 0.001894153207990619, "data":'
            ' {"petal length": "1.4", "petal width": "0.2",'
            ' "sepal width": "3.0", "sepal length": "4.9",'
            ' "species": "Iris-setosa"}}'
        ],
                    [
                        'data/spam_4w.csv', '10', '10', '40',
                        '{"Message": "mobile"}', '{"distance": 0.0, "data":'
                        ' {"Message": "mobile", "Type": "spam"}}'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            cluster_create.i_create_a_cluster(self)
            cluster_create.the_cluster_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_cluster(self)
            cluster_create.closest_in_cluster(self, example[4], example[5])
Beispiel #55
0
    def test_scenario3(self):
        """
            Scenario: Successfully comparing association sets:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the association is ready less than <time_3> secs
                And I create a local association
                When I create an association set for "<data_input>"
                Then the association set is like the contents of
                "<association_set_file>"
                And I create a local association set for "<data_input>"
                Then the local association set is like the contents of
                "<association_set_file>"

        """
        examples = [[
            'data/dates2.csv', '20', '30', '80', '{"target-2": -1}',
            'data/associations/association_set2.json'
        ]]
        show_doc(self.test_scenario3, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            association_create.i_create_an_association_from_dataset(self)
            association_create.the_association_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_association(self)
            prediction_create.i_create_an_association_set(self, example[4])
            prediction_compare.the_association_set_is_like_file(
                self, example[5])
            prediction_compare.i_create_a_local_association_set(
                self, example[4])
            prediction_compare.the_local_association_set_is_like_file(
                self, example[5])
    def test_scenario5(self):
        """
            Scenario: Successfully comparing association sets:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a model
                And I wait until the association is ready less than <time_3> secs
                And I create a local association
                When I create an association set for "<data_input>"
                Then the association set is like the contents of "<association_set_file>"
                And I create a local association set for "<data_input>"
                Then the local association set is like the contents of "<association_set_file>"

        """
        examples = [[
            'data/groceries.csv', '20', '20', '30',
            '{"fields": {"00000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}',
            'data/associations/association_set.json', '{"field1": "cat food"}'
        ]]
        show_doc(self.test_scenario5, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            association_create.i_create_an_association_from_dataset(self)
            association_create.the_association_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_association(self)
            prediction_create.i_create_an_association_set(self, example[6])
            prediction_compare.the_association_set_is_like_file(
                self, example[5])
            prediction_compare.i_create_a_local_association_set(
                self, example[6])
            prediction_compare.the_local_association_set_is_like_file(
                self, example[5])
Beispiel #57
0
    def test_scenario5(self):
        """
            Scenario: Successfully comparing centroids with summary fields:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a cluster with options "<options>"
                And I wait until the cluster is ready less than <time_3> secs
                And I create a local cluster
                When I create a centroid for "<data_input>"
                Then the centroid is "<centroid>" with distance "<distance>"
                And I create a local centroid for "<data_input>"
                Then the local centroid is "<centroid>" with distance "<distance>"

                Examples:
                | data             | time_1  | time_2 | time_3 | options | data_input                            | centroid  | distance |
                | ../data/iris.csv | 20      | 20     | 30     | {"summary_fields": ["sepal width"]} |{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}             | Cluster 6   | 0.713698082167   |
        """
        print self.test_scenario5.__doc__
        examples = [[
            'data/iris.csv', '20', '20', '30',
            '{"summary_fields": ["sepal width"]}',
            '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}',
            'Cluster 6', '0.713698082167'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            cluster_create.i_create_a_cluster_with_options(self, example[4])
            cluster_create.the_cluster_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_cluster(self)
            prediction_create.i_create_a_centroid(self, example[5])
            prediction_create.the_centroid_is_with_distance(
                self, example[6], example[7])
            prediction_compare.i_create_a_local_centroid(self, example[5])
            prediction_compare.the_local_centroid_is(self, example[6],
                                                     example[7])
    def test_scenario1(self):
        """
            Scenario: Successfully creating forecasts from a dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create time-series from a dataset
                And I wait until the time series is ready less than <time_3> secs
                And I update the time series name to "<time_series_name>"
                When I wait until the time series is ready less than <time_4> secs
                Then the time series name is "<time_series_name>"
                And I create a forecast for "<input_data>"
                Then the forecasts are "<forecast_points>"

                Examples:
                | data                | time_1  | time_2 | time_3 | time_4 | time_series_name |input_data | forecast_points
                | ../data/grades.csv | 10      | 10     | 20     | 50 | my new time_series name |
                {"000005": {"horizon": 5}], {}}
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/grades.csv', '30', '30', '50', '50',
            'my new time series name', '{"000005": {"horizon": 5}}',
            '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, 74.1996, 74.27899], "model": "M,M,N"}]}'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            time_series_create.i_create_a_time_series(self)
            time_series_create.the_time_series_is_finished_in_less_than(
                self, example[3])
            time_series_create.i_update_time_series_name(self, example[5])
            time_series_create.the_time_series_is_finished_in_less_than(
                self, example[4])
            time_series_create.i_check_time_series_name(self, example[5])
            forecast_create.i_create_a_forecast(self, example[6])
            forecast_create.the_forecast_is(self, example[7])
    def test_scenario9(self):
        """
            Scenario: Successfully comparing predictions with text options:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a logistic regression model with objective "<objective>" and parms "<parms>"
                And I wait until the logistic regression model is ready less than <time_3> secs
                And I create a local logistic regression model
                When I create a logistic regression prediction for "<data_input>"
                Then the logistic regression prediction is "<prediction>"
                And the logistic regression probability for the prediction is "<probability>"
                And I create a local logistic regression prediction for "<data_input>"
                Then the local logistic regression prediction is "<prediction>"
                And the local logistic regression probability for the prediction is "<probability>"

        """
        examples = [
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", '{"field_codings": [{"field": "species", "coding": "dummy", "dummy_class": "Iris-setosa"}]}'],
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "contrast", "coefficients": [[1, 2, -1, -2]]}]}'],
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'],
            ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", '{"bias": false}']]
        show_doc(self.test_scenario9, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9])
            model_create.the_logistic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_logistic_model(self)
            prediction_create.i_create_a_logistic_prediction(self, example[5])
            prediction_create.the_logistic_prediction_is(self, example[6])
            prediction_create.the_logistic_probability_is(self, example[7])
            prediction_compare.i_create_a_local_prediction(self, example[5])
            prediction_compare.the_local_prediction_is(self, example[6])
            prediction_compare.the_local_probability_is(self, example[7])
    def test_scenario7(self):
        """
            Scenario: Successfully comparing predictions in operating points for ensembles:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create an ensemble
                And I wait until the ensemble is ready less than <time_3> secs
                And I create a local ensemble
                When I create a prediction for "<data_input>" in "<operating_kind>"
                Then the prediction for "<objective>" is "<prediction>"
                And I create a local ensemble prediction for "<data_input>" in "<operating_kind>"
                Then the local ensemble prediction is "<prediction>"

                Examples:
                | data             | time_1  | time_2 | time_3 | data_input                             | prediction  | operating_kind


        """
        examples = [
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor',  "probability", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa',  "probability", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor',  "confidence", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa',  "confidence", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor',  "votes", "000004"],
            ['data/iris.csv', '10', '50', '50', '{"petal length": 1}', 'Iris-setosa',  "votes", "000004"]]
        show_doc(self.test_scenario7, examples)

        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            ensemble_create.i_create_an_ensemble(self)
            ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3])
            ensemble_create.create_local_ensemble(self)
            prediction_create.i_create_an_ensemble_prediction_op_kind(self, example[4], example[6])
            prediction_create.the_prediction_is(self, example[7], example[5])
            prediction_compare.i_create_a_local_ensemble_prediction_op_kind(self, example[4], example[6])
            prediction_compare.the_local_prediction_is(self, example[5])