Example #1
0
    def test_scenario7(self):
        """
            Scenario: Successfully creating a Topic Model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<params>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                When I create a Topic Model from a dataset
                Then I wait until the Topic Model is ready less than <time_3> secs

                Examples:
                | data                 | time_1  | time_2 | time_3 | params
                | ../data/movies.csv | 10      | 10     | 100     | {"fields": {"genre": {"optype": "items", "item_analysis": {"separator": "$"}}, "title": {"optype": "text"}}}
        """
        print self.test_scenario7.__doc__
        examples = [[
            'data/movies.csv', '10', '10', '100',
            '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}, "000006": {"optype": "text"}}}'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, data=example[4])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(
                self, example[3])
Example #2
0
 def test_scenario8(self):
     """
         Scenario 8: Successfully creating a local topic model from an exported file:
             Given I create a data source uploading a "<data>" file
             And I wait until the source is ready less than <time_1> secs
             And I create a dataset
             And I wait until the dataset is ready less than <time_2> secs
             And I create a topic model
             And I wait until the topic model is ready less than <time_3> secs
             And I export the topic model to "<exported_file>"
             When I create a local topic model from the file "<exported_file>"
             Then the topic model ID and the local topic model ID match
             Examples:
             | data                | time_1  | time_2 | time_3 | exported_file
             | ../data/iris.csv | 10      | 10     | 50 | ./tmp/topic_model.json
     """
     print self.test_scenario8.__doc__
     examples = [
         ['data/spam.csv', '10', '10', '500', './tmp/topic_model.json', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']]
     for example in examples:
         print "\nTesting with:\n", example
         source_create.i_upload_a_file(self, example[0])
         source_create.the_source_is_finished(self, example[1])
         source_create.i_update_source_with(self, example[5])
         source_create.the_source_is_finished(self, example[1])
         dataset_create.i_create_a_dataset(self)
         dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
         topic_create.i_create_a_topic_model(self)
         topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
         topic_create.i_export_topic_model(self, example[4])
         topic_create.i_create_local_topic_model_from_file(self, example[4])
         topic_create.check_topic_model_id_local_id(self)
Example #3
0
    def test_scenario2(self):
        """
            Scenario 2: Successfully creating Topic Model from a dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create topic model from a dataset
                And I wait until the topic model is ready less than <time_3> secs
                And I update the topic model name to "<topic_model_name>"
                When I wait until the topic_model is ready less than <time_4> secs
                Then the topic model name is "<topic_model_name>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | topic_model_name | params
                | ../data/spam.csv | 100      | 100     | 200     | 500 | my new topic model name | '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}'
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '100', '100', '10000', '500', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[6])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
            topic_create.i_update_topic_model_name(self, example[5])
            topic_create.the_topic_model_is_finished_in_less_than(self, example[4])
            topic_create.i_check_topic_model_name(self, example[5])
Example #4
0
    def test_scenario7(self):
        """
            Scenario: Successfully creating a Topic Model:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<params>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                When I create a Topic Model from a dataset
                Then I wait until the Topic Model is ready less than <time_3> secs

                Examples:
                | data                 | time_1  | time_2 | time_3 | params
                | ../data/movies.csv | 10      | 10     | 100     | {"fields": {"genre": {"optype": "items", "item_analysis": {"separator": "$"}}, "title": {"optype": "text"}}}
        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/movies.csv', '10', '10', '100', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}, "000006": {"optype": "text"}}}']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, data=example[4])
            source_create.the_source_is_finished(self, example[1])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
 def test_scenario8(self):
     """
         Scenario 8: Successfully creating a local topic model from an exported file:
             Given I create a data source uploading a "<data>" file
             And I wait until the source is ready less than <time_1> secs
             And I create a dataset
             And I wait until the dataset is ready less than <time_2> secs
             And I create a topic model
             And I wait until the topic model is ready less than <time_3> secs
             And I export the topic model to "<exported_file>"
             When I create a local topic model from the file "<exported_file>"
             Then the topic model ID and the local topic model ID match
             Examples:
             | data                | time_1  | time_2 | time_3 | exported_file
             | ../data/iris.csv | 10      | 10     | 50 | ./tmp/topic_model.json
     """
     print self.test_scenario8.__doc__
     examples = [
         ['data/spam.csv', '10', '10', '500', './tmp/topic_model.json', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']]
     for example in examples:
         print "\nTesting with:\n", example
         source_create.i_upload_a_file(self, example[0])
         source_create.the_source_is_finished(self, example[1])
         source_create.i_update_source_with(self, example[5])
         source_create.the_source_is_finished(self, example[1])
         dataset_create.i_create_a_dataset(self)
         dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
         topic_create.i_create_a_topic_model(self)
         topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
         topic_create.i_export_topic_model(self, example[4])
         topic_create.i_create_local_topic_model_from_file(self, example[4])
         topic_create.check_topic_model_id_local_id(self)
    def test_scenario2(self):
        """
            Scenario 2: Successfully creating Topic Model from a dataset:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create topic model from a dataset
                And I wait until the topic model is ready less than <time_3> secs
                And I update the topic model name to "<topic_model_name>"
                When I wait until the topic_model is ready less than <time_4> secs
                Then the topic model name is "<topic_model_name>"

                Examples:
                | data             | time_1  | time_2 | time_3 | time_4 | topic_model_name | params
                | ../data/spam.csv | 100      | 100     | 200     | 500 | my new topic model name | '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}'
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/spam.csv', '100', '100', '10000', '500', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']]
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[6])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
            topic_create.i_update_topic_model_name(self, example[5])
            topic_create.the_topic_model_is_finished_in_less_than(self, example[4])
            topic_create.i_check_topic_model_name(self, example[5])
    def test_scenario4(self):
        """
            Scenario: Successfully comparing topic distributions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a topic model
                And I wait until the topic model is ready less than <time_3> secs
                And I create a local topic model
                When I create a topic distribution for "<data_input>"
                Then the topic distribution is "<topic_distribution>"
                And I create a local topic distribution for "<data_input>"
                Then the local topic distribution is "<topic_distribution>"

                Examples headers:
                | data             | time_1  | time_2 | time_3 | options | data_input                            | topic distribution  |

        """
        examples = [
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}',
                '{"Type": "ham", "Message": "Mobile call"}',
                '[0.01878, 0.00388, 0.00388, 0.00388, 0.20313, 0.47315, 0.00574, 0.05695, 0.00388, 0.19382, 0.00388, 0.02902]'
            ],
            [
                'data/spam.csv', '20', '20', '30',
                '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}',
                '{"Type": "ham", "Message": "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."}',
                '[0.00263, 0.01083, 0.00831, 0.06004, 0.33701, 0.00263, 0.01209, 0.44553, 0.0531, 0.00326, 0.06193, 0.00263]'
            ]
        ]
        show_doc(self.test_scenario4, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(
                self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(
                self, example[3])
            prediction_compare.i_create_a_local_topic_model(self)
            topic_create.i_create_a_local_topic_distribution(self, example[5])
            prediction_compare.the_local_topic_distribution_is(
                self, example[6])
            topic_create.i_create_a_topic_distribution(self, example[5])
            prediction_compare.the_topic_distribution_is(self, example[6])
    def test_scenario4(self):
        """
            Scenario: Successfully comparing topic distributions:
                Given I create a data source uploading a "<data>" file
                And I wait until the source is ready less than <time_1> secs
                And I update the source with params "<options>"
                And I create a dataset
                And I wait until the dataset is ready less than <time_2> secs
                And I create a topic model
                And I wait until the topic model is ready less than <time_3> secs
                And I create a local topic model
                When I create a topic distribution for "<data_input>"
                Then the topic distribution is "<topic_distribution>"
                And I create a local topic distribution for "<data_input>"
                Then the local topic distribution is "<topic_distribution>"

                Examples headers:
                | data             | time_1  | time_2 | time_3 | options | data_input                            | topic distribution  |

        """
        examples = [
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', '[0.51133, 0.00388, 0.00574, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.44801]'],
            ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."}', '[0.39188, 0.00643, 0.00264, 0.00643, 0.08112, 0.00264, 0.37352, 0.0115, 0.00707, 0.00327, 0.00264, 0.11086]']]
        show_doc(self.test_scenario4, examples)
        for example in examples:
            print "\nTesting with:\n", example
            source_create.i_upload_a_file(self, example[0])
            source_create.the_source_is_finished(self, example[1])
            source_create.i_update_source_with(self, example[4])
            dataset_create.i_create_a_dataset(self)
            dataset_create.the_dataset_is_finished_in_less_than(self, example[2])
            topic_create.i_create_a_topic_model(self)
            topic_create.the_topic_model_is_finished_in_less_than(self, example[3])
            prediction_compare.i_create_a_local_topic_model(self)
            topic_create.i_create_a_topic_distribution(self, example[5])
            prediction_compare.the_topic_distribution_is(self, example[6])
            topic_create.i_create_a_local_topic_distribution(self, example[5])
            prediction_compare.the_local_topic_distribution_is(self, example[6])