class TestFileReceiver(unittest.TestCase): def setUp(self): self.file_receiver = FileReceiver() def test_acquire_input_path_valid(self): self.file_receiver.acquire_input_path() self.assertNotEqual( "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\PythonUsage", self.file_receiver.input_folder_path) def test_acquire_input_path_invalid(self): self.file_receiver.acquire_input_path() self.assertEqual( "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\PythonUsage", self.file_receiver.input_folder_path) def test_acquire_output_path_valid(self): self.file_receiver.acquire_output_path() self.assertNotEqual( "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations", self.file_receiver.output_folder_path) def test_acquire_output_path_invalid(self): self.file_receiver.acquire_output_path() self.assertEqual( "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations", self.file_receiver.output_folder_path)
class TestDataAdjustment(unittest.TestCase): def setUp(self): self.data_adjuster = DataAdjustment() self.file_receiver = FileReceiver() def test_removeStopWords_valid(self): result = self.data_adjuster.remove_string_stopwords( ["apple", "an", "the"]) self.assertEqual(1, len(result)) print(result) def test_removeStopWords_invalid(self): result = self.data_adjuster.remove_string_stopwords(["an", "the"]) self.assertEqual(1, len(result)) print(result) def test_remove_string_punctuation_valid(self): test = "this string. has ! a bunch , of ? punctuation " test = self.data_adjuster.remove_string_punctuation(test) self.assertEqual(-1, test.find('.')) print(test) def test_remove_string_punctuation_invalid(self): test = "this string has no punctuation" test = self.data_adjuster.remove_string_punctuation(test) self.assertNotEqual(-1, test.find(',')) print(test) def test_create_dict_from_tuple_valid(self): test = (("lol", 5), ("test", 4), ("trash", 3)) result = self.data_adjuster.get_dict_from_tuple(test) self.assertNotEqual(0, len(result)) def test_create_dict_from_tuple_invalid(self): test = (("lol", 5), ("test", 4), ("trash", 3)) result = self.data_adjuster.get_dict_from_tuple(test) self.assertNotEqual(3, len(result)) def test_remove_duplicate_rows_from_csv(self): self.file_receiver.acquire_input_path() self.data_adjuster.remove_duplicate_rows_from_csv( self.file_receiver.csv_files, 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test')
def main(): wc_generator = WordCloudGenerator() file_receiver = FileReceiver() # acquire paths if len(wc_generator.csv_files) == 0: file_receiver.acquire_input_path() file_receiver.acquire_output_path() # create word clouds wc_generator.acquire_csv_files(file_receiver.csv_files) figure_choice = input( "Single figure-multiple word clouds(M) or each word clouds separate(S)?\n" ) if figure_choice == 'M': wc_generator.create_dictionaries_from_topics() wc_generator.create_figure_with_multiple_word_clouds() #save word clouds wc_generator.save_figure(file_receiver.output_folder_path) else: column_name = input( "What is the column name from which you want to extract text frequency?\n" ) wc_generator.create_dictionaries(column_name) wc_generator.create_word_cloud() # save word clouds wc_generator.save_word_cloud(file_receiver.output_folder_path) # display word clouds while True: input_text = input("Would you like to see the word clouds? (Y/N) :") if input_text == 'Y': wc_generator.display_word_cloud() break else: if input_text == 'N': print("Shutting down.") break else: print("I didn't catch that, try again.")
def main(): lc_generator = LineChartGenerator() file_receiver = FileReceiver() file_receiver.acquire_input_path() file_receiver.acquire_output_path() line_type = input("Yearly lines(Y) or Character Distribution(W): ") if line_type == "Y": lc_generator.acquire_csv_files(file_receiver.csv_files) lc_generator.calculate_overall_reviews_by_identifiable_individuals() lc_generator.calculate_reviews_by_identifiable_individuals_per_app() lc_generator.create_all_year_data_chart() lc_generator.create_yearly_app_data_charts() lc_generator.save_yearly_app_charts(file_receiver.output_folder_path) lc_generator.save_all_year_charts(file_receiver.output_folder_path) # display line charts while True: input_text = input( "Would you like to see the line charts? (Y/N) :") if input_text == 'Y': lc_generator.display_line_charts() break else: if input_text == 'N': print("Shutting down.") break else: print("I didn't catch that, try again.") elif line_type == "W": lc_generator.acquire_csv_files(file_receiver.csv_files) lc_generator.categorize_text_by_character_count() lc_generator.create_simple_line_chart() lc_generator.save_line_chart(file_receiver.output_folder_path)
def setUp(self): self.wordcloud = WordCloudGenerator() self.file_receiver = FileReceiver()
def setUp(self): self.data_adjuster = DataAdjustment() self.file_receiver = FileReceiver()
def setUp(self): self.dendrogram_generator = DendrogramGenerator() self.file_receiver = FileReceiver()
class TestWordCloud(unittest.TestCase): def setUp(self): self.wordcloud = WordCloudGenerator() self.file_receiver = FileReceiver() def test_display_wordcloud_valid(self): wc = WordCloud(background_color="white") wc.generate_from_text( "this is a word cloud text lmao lmao lmao lmao lo lo lo lol lol lol lol" ) self.wordcloud.word_clouds.append(wc) self.wordcloud.display_word_cloud() def test_display_wordcloud_invalid(self): self.wordcloud.display_word_cloud() def test_create_dictionaries_valid(self): self.file_receiver.acquire_input_path() self.wordcloud.acquire_csv_files(self.file_receiver.csv_files) self.wordcloud.create_dictionaries('Text') self.assertNotEqual(0, len(self.wordcloud.csv_files)) self.assertNotEqual(0, len(self.wordcloud.word_frequency)) def test_create_dictionaries_invalid(self): self.file_receiver.acquire_input_path() self.wordcloud.acquire_csv_files(self.file_receiver.csv_files) self.wordcloud.create_dictionaries('Text') self.assertEqual(0, len(self.wordcloud.csv_files)) self.assertEqual(0, len(self.wordcloud.word_frequency)) def test_create_word_cloud_valid(self): test_data = {'Word': ["nice", "very nice"], 'Frequency': [100, 200]} test_frame = pd.DataFrame(test_data, columns=['Word', 'Frequency']) self.wordcloud.word_frequency.append(test_frame) self.wordcloud.create_word_cloud() self.assertNotEqual(0, len(self.wordcloud.word_clouds)) self.wordcloud.display_word_cloud() def test_create_word_cloud_invalid(self): test_data = {'Word': ["nice", "very nice"], 'Frequency': [100, 200]} test_frame = pd.DataFrame(test_data, columns=['Word', 'Frequency']) self.wordcloud.word_frequency.append(test_frame) self.wordcloud.create_word_cloud() self.assertNotEqual(1, len(self.wordcloud.word_clouds)) self.wordcloud.display_word_cloud() def test_save_word_cloud_valid(self): self.wordcloud.acquire_csv_files([ 'D:\Google_Play_Fraud_Benign_Malware\Fraud\Test\fraud_apps_640_review_info_final_2012_top_10.csv' ]) self.wordcloud.create_dictionaries('Text') self.wordcloud.create_word_cloud() self.assertNotEqual(0, len(self.wordcloud.word_clouds)) self.wordcloud.save_word_cloud( "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations") def test_save_word_cloud_invalid(self): self.wordcloud.acquire_csv_files([ 'D:\Google_Play_Fraud_Benign_Malware\Fraud\Test\fraud_apps_640_review_info_final_2012_top_10.csv' ]) self.wordcloud.create_dictionaries('Text') self.wordcloud.create_word_cloud() self.assertNotEqual(0, len(self.wordcloud.word_clouds)) self.wordcloud.save_word_cloud( "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations") def test_create_sentiment_dictionaries_valid(self): self.file_receiver.acquire_input_path() self.wordcloud.acquire_csv_files(self.file_receiver.csv_files) self.wordcloud.create_sentiment_dictionaries('Text') self.assertNotEqual(0, len(self.wordcloud.sentiment_word_cloud_data)) def test_create_sentiment_word_cloud_valid(self): self.wordcloud.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\Sentiment_word_cloud\\fraud_apps_640_review_info_final_2012_top_10_including_sentiment_score.csv' ]) self.wordcloud.create_sentiment_dictionaries('Text') self.assertNotEqual(0, len(self.wordcloud.sentiment_word_cloud_data)) self.wordcloud.create_sentiment_word_cloud() def test_save_sentiment_word_cloud_valid(self): self.wordcloud.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\Sentiment_word_cloud\\fraud_apps_640_review_info_final_2013_top_10_including_sentiment_score.csv' ]) self.wordcloud.create_sentiment_dictionaries('Text') self.assertNotEqual(0, len(self.wordcloud.sentiment_word_cloud_data)) self.wordcloud.create_sentiment_word_cloud() self.wordcloud.save_sentiment_clouds( 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\Sentiment_word_cloud' ) def test_create_dictionaries_from_text_topics(self): self.wordcloud.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Keywords\\topic_keywords_fraud_apps_2016_all_reviews_including_sentiment_score.csv' ]) self.wordcloud.create_dictionaries_from_topics() def test_create_figure_with_multiple_word_clouds(self): self.wordcloud.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Keywords\\topic_keywords_fraud_apps_2016_all_reviews_including_sentiment_score.csv' ]) self.wordcloud.create_dictionaries_from_topics() self.wordcloud.create_figure_with_multiple_word_clouds()
def setUp(self): self.barchart_generator = BarChartGenerator() self.file_receiver = FileReceiver()
def main(): file_receiver = FileReceiver() pie_chart_generator = PieChartGenerator() pie_type = input( "Basic Pie Chart(B), Nested Pie Chart(Sentiment-Rating=SR,Sentiment-Topic=ST, Rating-Topic=RT): " ) if pie_type == 'B': file_receiver.acquire_input_path() file_receiver.acquire_output_path() pie_chart_generator.acquire_csv_files(file_receiver.csv_files) pie_chart_generator.get_chart_data_from_csv_files() pie_chart_generator.create_basic_pie_chart() pie_chart_generator.save_basic_pie_charts( file_receiver.output_folder_path) elif pie_type == 'SR': file_receiver.acquire_input_path() file_receiver.acquire_output_path() pie_chart_generator.acquire_csv_files(file_receiver.csv_files) pie_chart_generator.get_chart_data_from_csv_files() pie_chart_generator.create_nested_pie_chart_sentiment_and_rating() pie_chart_generator.save_nested_pie_chart( file_receiver.output_folder_path) elif pie_type == 'ST': input_folders = [ "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2013", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2014", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2015", #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016" ] output_folders = [ "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Sentiment-Topic Nested Pie Charts", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2013\\Sentiment-Topic Nested Pie Charts", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2014\\Sentiment-Topic Nested Pie Charts", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2015\\Sentiment-Topic Nested Pie Charts", #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Sentiment-Topic Nested Pie Charts" ] for i in range(0, len(input_folders)): filenames = get_csv_files_from_directories(input_folders[i]) file_list = [] for filename in filenames: file_list.append(filename) pie_chart_generator.acquire_csv_files(file_list) pie_chart_generator.get_chart_data_from_csv_files(True) pie_chart_generator.create_nested_pie_chart_sentiment_and_topic() pie_chart_generator.save_nested_pie_chart(output_folders[i]) elif pie_type == 'RT': input_folders = [ "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2013", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2014", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2015" #, #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016" ] output_folders = [ "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Rating-Topic Nested Pie Charts", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2013\\Rating-Topic Nested Pie Charts", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2014\\Rating-Topic Nested Pie Charts", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2015\\Rating-Topic Nested Pie Charts" #, #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Rating-Topic Nested Pie Charts" ] for i in range(0, len(input_folders)): filenames = get_csv_files_from_directories(input_folders[i]) file_list = [] for filename in filenames: file_list.append(filename) pie_chart_generator.acquire_csv_files(file_list) pie_chart_generator.get_chart_data_from_csv_files(True) pie_chart_generator.create_nested_pie_chart_rating_and_topic() pie_chart_generator.save_nested_pie_chart(output_folders[i])
def main(): file_receiver = FileReceiver() sentiment_analyzer = SentimentAnalyzer() data_adjuster = DataAdjustment() input_text = input( "Calculate Sentiment for text in csv(S) or Add column from existing csv file to another(C):" ) if input_text == 'S': file_receiver.acquire_input_path() file_receiver.acquire_output_path() sentiment_analyzer.acquire_csv_files(file_receiver.csv_files) sentiment_analyzer.create_data_frames_with_result_columns() sentiment_analyzer.save_sentiment_csv_file( file_receiver.output_folder_path) elif input_text == "C": print("Input path to initial set:") file_receiver.acquire_input_path() first_set = file_receiver.csv_files print("Input path to second set:") file_receiver.acquire_input_path() second_set = file_receiver.csv_files file_receiver.acquire_output_path() first_set_files = [] second_set_files = [] first_folder = [] for i in range(0, len(first_set)): first_head, first_tail = os.path.split(first_set[i]) first_folder = first_head second_head, second_tail = os.path.split(second_set[i]) first_set_files.append(first_tail) second_set_files.append(second_tail) for file in first_set_files: second_set_index = second_set_files.index(file) first_data = pd.read_csv(first_folder + '\\' + file) second_data = pd.read_csv(second_set[second_set_index]) data_adjuster.concatenate_csv_data( file_receiver.output_folder_path, file, first_data, second_data, "Topic ID", "Topic Count")
class TestBarChart(unittest.TestCase): def setUp(self): self.barchart_generator = BarChartGenerator() self.file_receiver = FileReceiver() def test_calculate_text_length_valid(self): self.file_receiver.acquire_input_path() self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files) self.barchart_generator.categorize_text_by_word_count() self.assertNotEqual(0, len(self.barchart_generator.csv_files)) def test_calculate_text_length_invalid(self): self.file_receiver.acquire_input_path() self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files) self.barchart_generator.categorize_text_by_word_count() self.assertEqual(0, len(self.barchart_generator.csv_files)) def test_create_horizontal_bar_chart_valid(self): self.barchart_generator.total_word_count = { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 } self.barchart_generator.per_file_word_count = (('2012', { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 }), ('2013', { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 }), ('2014', { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 })) self.barchart_generator.create_overall_bar_charts() self.barchart_generator.display_bar_charts() def test_create_horizontal_bar_chart_invalid(self): self.barchart_generator.total_word_count = { '0-50': "invalid", '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 } self.barchart_generator.per_file_word_count = (('2012', { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 }), ('2013', { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 }), ('2014', { '0-50': 1, '51-100': 9, '101-200': 11, '201-300': 25, '301-400': 60, '400+': 5 })) self.barchart_generator.create_overall_bar_charts() self.barchart_generator.display_bar_charts() def test_save_bar_charts_valid(self): self.file_receiver.acquire_input_path() self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files) self.barchart_generator.categorize_text_by_word_count() self.barchart_generator.create_overall_bar_charts() self.barchart_generator.display_bar_charts() self.barchart_generator.save_overall_bar_charts( "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations") def test_save_bar_charts_invalid(self): self.file_receiver.acquire_input_path() self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files) self.barchart_generator.categorize_text_by_word_count() self.barchart_generator.create_overall_bar_charts() self.barchart_generator.display_bar_charts() self.barchart_generator.save_overall_bar_charts( "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations") def test_calculate_word_occurrence_valid(self): self.barchart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv' ]) self.barchart_generator.categorize_words_by_valence() self.assertNotEqual(0, len(self.barchart_generator.file_valence_data)) # missing text column def test_calculate_word_occurence_invalid(self): self.barchart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2016_top_10_missing_text_column.csv' ]) self.barchart_generator.categorize_words_by_valence() self.assertNotEqual(0, len(self.barchart_generator.file_valence_data)) def test_create_divergent_valence_bar_chart_valid(self): self.barchart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv' ]) self.barchart_generator.categorize_words_by_valence() self.barchart_generator.create_divergent_valence_bar_chart() self.assertNotEqual(0, len(self.barchart_generator.file_valence_data)) def test_create_divergent_valence_bar_chart_invalid(self): self.barchart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv' ]) self.barchart_generator.categorize_words_by_valence() self.barchart_generator.file_valence_data = None self.barchart_generator.create_divergent_valence_bar_chart() self.assertNotEqual(0, len(self.barchart_generator.file_valence_data)) def test_categorize_rating(self): self.barchart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv' ]) self.barchart_generator.categorize_ratings() def test_create_rating_bar_charts(self): self.barchart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2012\\sentiment\\fraud_apps_2012_all_anon_reviews_including_sentiment_score.csv' ]) self.barchart_generator.categorize_ratings() self.barchart_generator.create_rating_bar_charts()
def setUp(self): self.file_receiver = FileReceiver()
class TestLineChart(unittest.TestCase): def setUp(self): self.file_receiver = FileReceiver() self.line_chart_generator = LineChartGenerator() def test_categorize_text_by_word_count(self): self.line_chart_generator.acquire_csv_files([ "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2013\\sentiment\\fraud_apps_2013_all_anon_reviews_including_sentiment_score.csv" ]) self.line_chart_generator.categorize_text_by_character_count() self.assertNotEqual( 0, len(self.line_chart_generator.per_file_character_count)) def test_create_simple_line_chart(self): self.line_chart_generator.acquire_csv_files([ "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2012\\sentiment\\fraud_apps_2012_all_anon_reviews_including_sentiment_score.csv", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2013\\sentiment\\fraud_apps_2013_all_anon_reviews_including_sentiment_score.csv", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2014\\sentiment\\fraud_apps_2014_all_anon_including_sentiment_score.csv", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2015\\sentiment\\fraud_apps_2015_all_anon_reviews_including_sentiment_score.csv", "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2016\\sentiment\\fraud_apps_2016_all_anon_reviews_including_sentiment_score.csv" ]) self.line_chart_generator.categorize_text_by_character_count() self.line_chart_generator.create_simple_line_chart() self.assertNotEqual(0, len(self.line_chart_generator.line_charts)) def test_calculate_monthly_app_reviews_valid(self): self.line_chart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10.csv' ]) self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app( ) # missing data column - month def test_calculate_monthly_app_reviews_invalid(self): self.line_chart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10_no_month_column.csv' ]) self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app( ) def test_create_charts_valid(self): self.line_chart_generator.create_charts([ ('APP_1', [(1, 47), (2, 51), (3, 30), (4, 34), (5, 18), (6, 22), (7, 11), (8, 14), (9, 11), (10, 343), (11, 142), (12, 121)]), ('APP_2', [(1, 22), (2, 22), (3, 26), (4, 6), (5, 26), (6, 61), (7, 22), (8, 34), (9, 26), (10, 174), (11, 148), (12, 22)]), ('APP_3', [(1, 25), (2, 34), (3, 14), (4, 22), (5, 20), (6, 119), (7, 72), (8, 24), (9, 17), (10, 61), (11, 13), (12, 24)]), ('APP_4', [(1, 10), (2, 16), (3, 6), (4, 34), (5, 36), (6, 30), (7, 30), (8, 18), (9, 36), (10, 138), (11, 112), (12, 22)]), ('APP_5', [(1, 132), (2, 162), (3, 136), (4, 104), (5, 110), (6, 144), (7, 141), (8, 156), (9, 122), (10, 78), (11, 102), (12, 118)]), ('APP_6', [(1, 17), (2, 13), (3, 17), (4, 40), (5, 44), (6, 46), (7, 47), (8, 58), (9, 52), (10, 59), (11, 80), (12, 168)]), ('APP_7', [(1, 32), (2, 88), (3, 162), (4, 121), (5, 165), (6, 112), (7, 149), (8, 145), (9, 83), (10, 259), (11, 212), (12, 221)]), ('APP_8', [(1, 247), (2, 178), (3, 226), (4, 182), (5, 160), (6, 121), (7, 82), (8, 130), (9, 152), (10, 104), (11, 143), (12, 112)]), ('APP_9', [(1, 27), (2, 33), (3, 19), (4, 68), (5, 31), (6, 17), (7, 30), (8, 37), (9, 52), (10, 149), (11, 89), (12, 53)]), ('APP_10', [(1, 96), (2, 56), (3, 142), (4, 172), (5, 219), (6, 211), (7, 130), (8, 140), (9, 186), (10, 147), (11, 138), (12, 98)]) ]) # missing APP_2 in the second element def test_create_charts_invalid(self): self.line_chart_generator.create_charts([ ('APP_1', [(1, 47), (2, 51), (3, 30), (4, 34), (5, 18), (6, 22), (7, 11), (8, 14), (9, 11), (10, 343), (11, 142), (12, 121)]), ([(1, 22), (2, 22), (3, 26), (4, 6), (5, 26), (6, 61), (7, 22), (8, 34), (9, 26), (10, 174), (11, 148), (12, 22)]), ('APP_3', [(1, 25), (2, 34), (3, 14), (4, 22), (5, 20), (6, 119), (7, 72), (8, 24), (9, 17), (10, 61), (11, 13), (12, 24)]), ('APP_4', [(1, 10), (2, 16), (3, 6), (4, 34), (5, 36), (6, 30), (7, 30), (8, 18), (9, 36), (10, 138), (11, 112), (12, 22)]), ('APP_5', [(1, 132), (2, 162), (3, 136), (4, 104), (5, 110), (6, 144), (7, 141), (8, 156), (9, 122), (10, 78), (11, 102), (12, 118)]), ('APP_6', [(1, 17), (2, 13), (3, 17), (4, 40), (5, 44), (6, 46), (7, 47), (8, 58), (9, 52), (10, 59), (11, 80), (12, 168)]), ('APP_7', [(1, 32), (2, 88), (3, 162), (4, 121), (5, 165), (6, 112), (7, 149), (8, 145), (9, 83), (10, 259), (11, 212), (12, 221)]), ('APP_8', [(1, 247), (2, 178), (3, 226), (4, 182), (5, 160), (6, 121), (7, 82), (8, 130), (9, 152), (10, 104), (11, 143), (12, 112)]), ('APP_9', [(1, 27), (2, 33), (3, 19), (4, 68), (5, 31), (6, 17), (7, 30), (8, 37), (9, 52), (10, 149), (11, 89), (12, 53)]), ('APP_10', [(1, 96), (2, 56), (3, 142), (4, 172), (5, 219), (6, 211), (7, 130), (8, 140), (9, 186), (10, 147), (11, 138), (12, 98)]) ]) def test_create_yearly_app_data_charts_valid(self): self.line_chart_generator.yearly_app_data = [[('APP_1', [(1, 47), (2, 51), (3, 30), (4, 34), (5, 18), (6, 22), (7, 11), (8, 14), (9, 11), (10, 343), (11, 142), (12, 121)]), ('APP_2', [(1, 22), (2, 22), (3, 26), (4, 6), (5, 26), (6, 61), (7, 22), (8, 34), (9, 26), (10, 174), (11, 148), (12, 22)]), ('APP_3', [(1, 25), (2, 34), (3, 14), (4, 22), (5, 20), (6, 119), (7, 72), (8, 24), (9, 17), (10, 61), (11, 13), (12, 24)]), ('APP_4', [(1, 10), (2, 16), (3, 6), (4, 34), (5, 36), (6, 30), (7, 30), (8, 18), (9, 36), (10, 138), (11, 112), (12, 22)]), ('APP_5', [(1, 132), (2, 162), (3, 136), (4, 104), (5, 110), (6, 144), (7, 141), (8, 156), (9, 122), (10, 78), (11, 102), (12, 118)]), ('APP_6', [(1, 17), (2, 13), (3, 17), (4, 40), (5, 44), (6, 46), (7, 47), (8, 58), (9, 52), (10, 59), (11, 80), (12, 168)]), ('APP_7', [(1, 32), (2, 88), (3, 162), (4, 121), (5, 165), (6, 112), (7, 149), (8, 145), (9, 83), (10, 259), (11, 212), (12, 221)]), ('APP_8', [(1, 247), (2, 178), (3, 226), (4, 182), (5, 160), (6, 121), (7, 82), (8, 130), (9, 152), (10, 104), (11, 143), (12, 112)]), ('APP_9', [(1, 27), (2, 33), (3, 19), (4, 68), (5, 31), (6, 17), (7, 30), (8, 37), (9, 52), (10, 149), (11, 89), (12, 53)]), ('APP_10', [(1, 96), (2, 56), (3, 142), (4, 172), (5, 219), (6, 211), (7, 130), (8, 140), (9, 186), (10, 147), (11, 138), (12, 98)])], [ ('APP_1', [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 0), (12, 2)]), ('APP_2', [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 2), (10, 0), (11, 0), (12, 2)]), ('APP_3', [(1, 0), (2, 0), (3, 0), (4, 0), (5, 2), (6, 0), (7, 2), (8, 0), (9, 2), (10, 2), (11, 4), (12, 24)]), ('APP_4', [(1, 0), (2, 0), (3, 1), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 0), (12, 1)]), ('APP_5', [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 2), (12, 0)]), ('APP_6', [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 2), (7, 0), (8, 2), (9, 0), (10, 2), (11, 4), (12, 4)]), ('APP_7', [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 0), (12, 1)]) ]] self.line_chart_generator.create_yearly_app_data_charts() # missing second element in the list def test_create_yearly_app_data_charts_invalid(self): self.line_chart_generator.yearly_app_data = [[('APP_1', [(1, 47), (2, 51), (3, 30), (4, 34), (5, 18), (6, 22), (7, 11), (8, 14), (9, 11), (10, 343), (11, 142), (12, 121)]), ('APP_2', [(1, 22), (2, 22), (3, 26), (4, 6), (5, 26), (6, 61), (7, 22), (8, 34), (9, 26), (10, 174), (11, 148), (12, 22)]), ('APP_3', [(1, 25), (2, 34), (3, 14), (4, 22), (5, 20), (6, 119), (7, 72), (8, 24), (9, 17), (10, 61), (11, 13), (12, 24)]), ('APP_4', [(1, 10), (2, 16), (3, 6), (4, 34), (5, 36), (6, 30), (7, 30), (8, 18), (9, 36), (10, 138), (11, 112), (12, 22)]), ('APP_5', [(1, 132), (2, 162), (3, 136), (4, 104), (5, 110), (6, 144), (7, 141), (8, 156), (9, 122), (10, 78), (11, 102), (12, 118)]), ('APP_6', [(1, 17), (2, 13), (3, 17), (4, 40), (5, 44), (6, 46), (7, 47), (8, 58), (9, 52), (10, 59), (11, 80), (12, 168)]), ('APP_7', [(1, 32), (2, 88), (3, 162), (4, 121), (5, 165), (6, 112), (7, 149), (8, 145), (9, 83), (10, 259), (11, 212), (12, 221)]), ('APP_8', [(1, 247), (2, 178), (3, 226), (4, 182), (5, 160), (6, 121), (7, 82), (8, 130), (9, 152), (10, 104), (11, 143), (12, 112)]), ('APP_9', [(1, 27), (2, 33), (3, 19), (4, 68), (5, 31), (6, 17), (7, 30), (8, 37), (9, 52), (10, 149), (11, 89), (12, 53)]), ('APP_10', [(1, 96), (2, 56), (3, 142), (4, 172), (5, 219), (6, 211), (7, 130), (8, 140), (9, 186), (10, 147), (11, 138), (12, 98)])], []] self.line_chart_generator.create_yearly_app_data_charts() def test_calculate_all_year_data_valid(self): self.line_chart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10.csv' ]) self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals( ) def test_calculate_all_year_data_invalid(self): self.line_chart_generator.acquire_csv_files([ 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10_no_month_column.csv' ]) self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals( ) def test_create_all_year_data_chart_valid(self): self.line_chart_generator.all_year_data = [('2014', [(1, 655), (2, 653), (3, 778), (4, 783), (5, 829), (6, 883), (7, 714), (8, 756), (9, 737), (10, 1512), (11, 1179), (12, 959)])] self.line_chart_generator.create_all_year_data_chart() # missing year tag def test_create_all_year_data_chart_invalid(self): self.line_chart_generator.all_year_data = [([(1, 655), (2, 653), (3, 778), (4, 783), (5, 829), (6, 883), (7, 714), (8, 756), (9, 737), (10, 1512), (11, 1179), (12, 959)])] self.line_chart_generator.create_all_year_data_chart() def test_save_all_year_charts_valid(self): self.file_receiver.acquire_input_path() self.line_chart_generator.acquire_csv_files( self.file_receiver.csv_files) self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals( ) self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app( ) self.line_chart_generator.create_all_year_data_chart() self.line_chart_generator.create_yearly_app_data_charts() self.line_chart_generator.save_all_year_charts( 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test') self.line_chart_generator.save_yearly_app_charts( 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test') #invalid output path def test_save_all_year_charts_invalid(self): self.file_receiver.acquire_input_path() self.line_chart_generator.acquire_csv_files( self.file_receiver.csv_files) self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals( ) self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app( ) self.line_chart_generator.create_all_year_data_chart() self.line_chart_generator.create_yearly_app_data_charts() self.line_chart_generator.save_all_year_charts('') self.line_chart_generator.save_yearly_app_charts( 'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test')
def main(): bc_generator = BarChartGenerator() file_receiver = FileReceiver() file_receiver.acquire_input_path() file_receiver.acquire_output_path() bc_generator.acquire_csv_files(file_receiver.csv_files) chart_type = input( "Sentiment Divergent bar chart(S) or Character count bar chart(W) or Rating bar charts(R) or Topic bar charts(T)?" ) if chart_type == 'W': bc_generator.categorize_text_by_word_count() bc_generator.create_overall_bar_charts() # display bar charts while True: input_text = input("Would you like to see the bar charts? (Y/N) :") if input_text == 'Y': bc_generator.display_bar_charts() break else: if input_text == 'N': print("Shutting down.") break else: print("I didn't catch that, try again.") bc_generator.save_overall_bar_charts(file_receiver.output_folder_path) elif chart_type == 'S': bc_generator.categorize_words_by_valence() bc_generator.create_divergent_valence_bar_chart() # display bar charts while True: input_text = input("Would you like to see the bar charts? (Y/N) :") if input_text == 'Y': bc_generator.display_divergent_bar_charts() break else: if input_text == 'N': print("Shutting down.") break else: print("I didn't catch that, try again.") bc_generator.save_divergent_bar_charts( file_receiver.output_folder_path) elif chart_type == 'R': bc_generator.categorize_ratings() bc_generator.create_rating_bar_charts() while True: input_text = input("Would you like to see the bar charts? (Y/N) :") if input_text == 'Y': bc_generator.display_bar_charts() break else: if input_text == 'N': print("Shutting down.") break else: print("I didn't catch that, try again.") bc_generator.save_rating_charts(file_receiver.output_folder_path) elif chart_type == 'T': topic_data = bc_generator.categorize_topic_distribution() bc_generator.create_bar_charts(topic_data) bc_generator.save_bar_charts(file_receiver.output_folder_path)
def setUp(self): self.file_receiver = FileReceiver() self.sentiment_analyzer = SentimentAnalyzer()
def setUp(self): self.file_receiver = FileReceiver() self.line_chart_generator = LineChartGenerator()