def test_scenario2(self): """ Scenario: Successfully comparing centroids with configuration options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | full_data_input """ examples = [ ['data/iris.csv', '20', '20', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.16436', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}'], ['data/iris.csv', '20', '20', '30', '{"default_numeric_value": "zero"}', '{"petal length": 1}', 'Cluster 4', '1.41215', '{"petal length": 1, "petal width": 0, "sepal length": 0, "sepal width": 0, "species": ""}']] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[8]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario2(self): """ Scenario: Successfully comparing centroids with configuration options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | full_data_input """ examples = [ ['data/iris.csv', '30', '30', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.16436', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}'], ['data/iris.csv', '20', '20', '30', '{"default_numeric_value": "zero"}', '{"petal length": 1}', 'Cluster 4', '1.41215', '{"petal length": 1, "petal width": 0, "sepal length": 0, "sepal width": 0, "species": ""}']] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[8]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario5(self): """ Scenario: Successfully comparing centroids with summary fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | | ../data/iris.csv | 20 | 20 | 30 | {"summary_fields": ["sepal width"]} |{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"} | Cluster 6 | 0.7310939266123302 | """ print self.test_scenario5.__doc__ examples = [ ['data/iris.csv', '20', '20', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 0', '0.7310939266123302']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario5(self): """ Scenario: Successfully comparing centroids with summary fields: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster with options "<options>" And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | | ../data/iris.csv | 20 | 20 | 30 | {"summary_fields": ["sepal width"]} |{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"} | Cluster 2 | 1.1643644909783857 | """ print self.test_scenario5.__doc__ examples = [ ['data/iris.csv', '20', '20', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.1643644909783857']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster_with_options(self, example[4]) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario2(self): """ Scenario: Successfully comparing remote and local predictions with raw date input for cluster And I wait until the source is ready less than <time_1> secs And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples headers: |data|time_1|time_2|time_3|data_input|centroid|distance| """ examples = [ [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', "Cluster 2", 0.92112 ], [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', "Cluster 3", 0.77389 ], [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', "Cluster 0", 0.87855 ], [ 'data/dates2.csv', '20', '30', '60', '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', "Cluster 6", 0.83506 ] ] show_doc(self.test_scenario2, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[4]) prediction_create.the_centroid_is_with_distance( self, example[5], example[6]) prediction_compare.i_create_a_local_centroid(self, example[4]) prediction_compare.the_local_centroid_is(self, example[5], example[6])
def test_scenario4(self): """ Scenario: Successfully comparing centroids with or without text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Type": "ham", "Message": "Mobile call"} | Cluster 7 | 0.341886116992 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} |{"Type": "ham", "Message": "A normal message"} | Cluster 0 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Type": "ham", "Message": "Mobile calls"} | Cluster 0 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Type": "ham", "Message": "A normal message"} | Cluster 0 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Type": "ham", "Message": "Mobile call"} | Cluster 4 | 0.382148869802 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Type": "ham", "Message": "A normal message"} | Cluster 4 | 0.382148869802 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"} | Cluster 1 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Type": "ham", "Message": "Ok"} | Cluster 1 | 0.478833312167 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Type": "", "Message": ""} | Cluster 0 | 0.707106781187 | | ../data/diabetes.csv | 20 | 20 | 30 | {"fields": {}} |{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 6 | 0.486471379368 | | ../data/iris_sp_chars.csv | 20 | 20 | 30 | {"fields": {}} |{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"} | Cluster 7 | 0.757736964835 | """ print self.test_scenario4.__doc__ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 5', '0.375'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile calls"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 1', '0.375'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 1', '0.375'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "Ok"}', 'Cluster 0', '0.478833312167'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 0', '0.707106781187'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 5', '0.4006712471727391'], ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 0', '0.811744494026442']] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario1(self): """ Scenario: Successfully comparing centroids with or without text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples headers: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | """ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.25'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile calls"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 1', '0.36637'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "Ok"}', 'Cluster 0', '0.478833312167'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 6', '0.5'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3', '0.5033378686559257'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": true}', 'Cluster 3', '0.5033378686559257'], ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.8752380218327035'], ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "age_range": "18-24", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', 'Cluster 1', '0.7294650227133437']] show_doc(self.test_scenario1, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario1(self): """ Scenario: Successfully comparing centroids with or without text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples headers: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | """ examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 7', '0.36637'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile calls"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 1', '0.36637'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'Cluster 0', '0.5'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "Ok"}', 'Cluster 0', '0.478833312167'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 1', '0.5'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3', '0.5033378686559257'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": true}', 'Cluster 3', '0.5033378686559257'], ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.8752380218327035'], ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "age_range": "18-24", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', 'Cluster 1', '0.7294650227133437']] show_doc(self.test_scenario1, examples) for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])
def test_scenario4(self): """ Scenario: Successfully comparing centroids with or without text options: Given I create a data source uploading a "<data>" file And I wait until the source is ready less than <time_1> secs And I update the source with params "<options>" And I create a dataset And I wait until the dataset is ready less than <time_2> secs And I create a cluster And I wait until the cluster is ready less than <time_3> secs And I create a local cluster When I create a centroid for "<data_input>" Then the centroid is "<centroid>" with distance "<distance>" And I create a local centroid for "<data_input>" Then the local centroid is "<centroid>" with distance "<distance>" Examples: | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Type": "ham", "Message": "Mobile call"} | Cluster 7 | 0.341886116992 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} |{"Type": "ham", "Message": "A normal message"} | Cluster 0 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Type": "ham", "Message": "Mobile calls"} | Cluster 0 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} |{"Type": "ham", "Message": "A normal message"} | Cluster 0 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Type": "ham", "Message": "Mobile call"} | Cluster 4 | 0.382148869802 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} |{"Type": "ham", "Message": "A normal message"} | Cluster 4 | 0.382148869802 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"} | Cluster 1 | 0.5 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} |{"Type": "ham", "Message": "Ok"} | Cluster 1 | 0.478833312167 | | ../data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} |{"Type": "", "Message": ""} | Cluster 0 | 0.707106781187 | | ../data/diabetes.csv | 20 | 20 | 30 | {"fields": {}} |{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 6 | 0.486471379368 | | ../data/iris_sp_chars.csv | 20 | 20 | 30 | {"fields": {}} |{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"} | Cluster 7 | 0.757736964835 | """ print self.test_scenario4.__doc__ examples = [ [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 7', '0.341886116992' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile calls"}', 'Cluster 0', '0.5' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 4', '0.382148869802' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 4', '0.382148869802' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'Cluster 1', '0.5' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "Ok"}', 'Cluster 1', '0.478833312167' ], [ 'data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 0', '0.707106781187' ], [ 'data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 6', '0.486471379368' ], [ 'data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.757736964835' ] ] for example in examples: print "\nTesting with:\n", example source_create.i_upload_a_file(self, example[0]) source_create.the_source_is_finished(self, example[1]) source_create.i_update_source_with(self, example[4]) dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example[2]) cluster_create.i_create_a_cluster(self) cluster_create.the_cluster_is_finished_in_less_than( self, example[3]) prediction_compare.i_create_a_local_cluster(self) prediction_create.i_create_a_centroid(self, example[5]) prediction_create.the_centroid_is_with_distance( self, example[6], example[7]) prediction_compare.i_create_a_local_centroid(self, example[5]) prediction_compare.the_local_centroid_is(self, example[6], example[7])