예제 #1
0
class TestFileReceiver(unittest.TestCase):
    def setUp(self):
        self.file_receiver = FileReceiver()

    def test_acquire_input_path_valid(self):
        self.file_receiver.acquire_input_path()
        self.assertNotEqual(
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\PythonUsage",
            self.file_receiver.input_folder_path)

    def test_acquire_input_path_invalid(self):
        self.file_receiver.acquire_input_path()
        self.assertEqual(
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\PythonUsage",
            self.file_receiver.input_folder_path)

    def test_acquire_output_path_valid(self):
        self.file_receiver.acquire_output_path()
        self.assertNotEqual(
            "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations",
            self.file_receiver.output_folder_path)

    def test_acquire_output_path_invalid(self):
        self.file_receiver.acquire_output_path()
        self.assertEqual(
            "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations",
            self.file_receiver.output_folder_path)
예제 #2
0
class TestDataAdjustment(unittest.TestCase):
    def setUp(self):
        self.data_adjuster = DataAdjustment()
        self.file_receiver = FileReceiver()

    def test_removeStopWords_valid(self):
        result = self.data_adjuster.remove_string_stopwords(
            ["apple", "an", "the"])
        self.assertEqual(1, len(result))
        print(result)

    def test_removeStopWords_invalid(self):
        result = self.data_adjuster.remove_string_stopwords(["an", "the"])
        self.assertEqual(1, len(result))
        print(result)

    def test_remove_string_punctuation_valid(self):
        test = "this string. has ! a bunch , of ? punctuation "
        test = self.data_adjuster.remove_string_punctuation(test)
        self.assertEqual(-1, test.find('.'))
        print(test)

    def test_remove_string_punctuation_invalid(self):
        test = "this string has no punctuation"
        test = self.data_adjuster.remove_string_punctuation(test)
        self.assertNotEqual(-1, test.find(','))
        print(test)

    def test_create_dict_from_tuple_valid(self):
        test = (("lol", 5), ("test", 4), ("trash", 3))
        result = self.data_adjuster.get_dict_from_tuple(test)
        self.assertNotEqual(0, len(result))

    def test_create_dict_from_tuple_invalid(self):
        test = (("lol", 5), ("test", 4), ("trash", 3))
        result = self.data_adjuster.get_dict_from_tuple(test)
        self.assertNotEqual(3, len(result))

    def test_remove_duplicate_rows_from_csv(self):
        self.file_receiver.acquire_input_path()
        self.data_adjuster.remove_duplicate_rows_from_csv(
            self.file_receiver.csv_files,
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test')
def main():
    wc_generator = WordCloudGenerator()
    file_receiver = FileReceiver()

    # acquire paths
    if len(wc_generator.csv_files) == 0:
        file_receiver.acquire_input_path()
    file_receiver.acquire_output_path()

    # create word clouds
    wc_generator.acquire_csv_files(file_receiver.csv_files)
    figure_choice = input(
        "Single figure-multiple word clouds(M) or each word clouds separate(S)?\n"
    )
    if figure_choice == 'M':
        wc_generator.create_dictionaries_from_topics()
        wc_generator.create_figure_with_multiple_word_clouds()
        #save word clouds
        wc_generator.save_figure(file_receiver.output_folder_path)
    else:
        column_name = input(
            "What is the column name from which you want to extract text frequency?\n"
        )
        wc_generator.create_dictionaries(column_name)
        wc_generator.create_word_cloud()
        # save word clouds
        wc_generator.save_word_cloud(file_receiver.output_folder_path)

    # display word clouds
    while True:
        input_text = input("Would you like to see the word clouds? (Y/N) :")
        if input_text == 'Y':
            wc_generator.display_word_cloud()
            break
        else:
            if input_text == 'N':
                print("Shutting down.")
                break
            else:
                print("I didn't catch that, try again.")
def main():
    lc_generator = LineChartGenerator()
    file_receiver = FileReceiver()

    file_receiver.acquire_input_path()
    file_receiver.acquire_output_path()

    line_type = input("Yearly lines(Y) or Character Distribution(W): ")

    if line_type == "Y":
        lc_generator.acquire_csv_files(file_receiver.csv_files)
        lc_generator.calculate_overall_reviews_by_identifiable_individuals()
        lc_generator.calculate_reviews_by_identifiable_individuals_per_app()
        lc_generator.create_all_year_data_chart()
        lc_generator.create_yearly_app_data_charts()

        lc_generator.save_yearly_app_charts(file_receiver.output_folder_path)
        lc_generator.save_all_year_charts(file_receiver.output_folder_path)

        # display line charts
        while True:
            input_text = input(
                "Would you like to see the line charts? (Y/N) :")
            if input_text == 'Y':
                lc_generator.display_line_charts()
                break
            else:
                if input_text == 'N':
                    print("Shutting down.")
                    break
                else:
                    print("I didn't catch that, try again.")
    elif line_type == "W":
        lc_generator.acquire_csv_files(file_receiver.csv_files)
        lc_generator.categorize_text_by_character_count()
        lc_generator.create_simple_line_chart()
        lc_generator.save_line_chart(file_receiver.output_folder_path)
예제 #5
0
 def setUp(self):
     self.wordcloud = WordCloudGenerator()
     self.file_receiver = FileReceiver()
예제 #6
0
 def setUp(self):
     self.data_adjuster = DataAdjustment()
     self.file_receiver = FileReceiver()
예제 #7
0
 def setUp(self):
     self.dendrogram_generator = DendrogramGenerator()
     self.file_receiver = FileReceiver()
예제 #8
0
class TestWordCloud(unittest.TestCase):
    def setUp(self):
        self.wordcloud = WordCloudGenerator()
        self.file_receiver = FileReceiver()

    def test_display_wordcloud_valid(self):
        wc = WordCloud(background_color="white")
        wc.generate_from_text(
            "this is a word cloud text lmao lmao lmao lmao lo lo lo lol lol lol lol"
        )
        self.wordcloud.word_clouds.append(wc)
        self.wordcloud.display_word_cloud()

    def test_display_wordcloud_invalid(self):
        self.wordcloud.display_word_cloud()

    def test_create_dictionaries_valid(self):
        self.file_receiver.acquire_input_path()
        self.wordcloud.acquire_csv_files(self.file_receiver.csv_files)
        self.wordcloud.create_dictionaries('Text')
        self.assertNotEqual(0, len(self.wordcloud.csv_files))
        self.assertNotEqual(0, len(self.wordcloud.word_frequency))

    def test_create_dictionaries_invalid(self):
        self.file_receiver.acquire_input_path()
        self.wordcloud.acquire_csv_files(self.file_receiver.csv_files)
        self.wordcloud.create_dictionaries('Text')
        self.assertEqual(0, len(self.wordcloud.csv_files))
        self.assertEqual(0, len(self.wordcloud.word_frequency))

    def test_create_word_cloud_valid(self):
        test_data = {'Word': ["nice", "very nice"], 'Frequency': [100, 200]}
        test_frame = pd.DataFrame(test_data, columns=['Word', 'Frequency'])
        self.wordcloud.word_frequency.append(test_frame)
        self.wordcloud.create_word_cloud()
        self.assertNotEqual(0, len(self.wordcloud.word_clouds))
        self.wordcloud.display_word_cloud()

    def test_create_word_cloud_invalid(self):
        test_data = {'Word': ["nice", "very nice"], 'Frequency': [100, 200]}
        test_frame = pd.DataFrame(test_data, columns=['Word', 'Frequency'])
        self.wordcloud.word_frequency.append(test_frame)
        self.wordcloud.create_word_cloud()
        self.assertNotEqual(1, len(self.wordcloud.word_clouds))
        self.wordcloud.display_word_cloud()

    def test_save_word_cloud_valid(self):
        self.wordcloud.acquire_csv_files([
            'D:\Google_Play_Fraud_Benign_Malware\Fraud\Test\fraud_apps_640_review_info_final_2012_top_10.csv'
        ])
        self.wordcloud.create_dictionaries('Text')
        self.wordcloud.create_word_cloud()
        self.assertNotEqual(0, len(self.wordcloud.word_clouds))
        self.wordcloud.save_word_cloud(
            "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations")

    def test_save_word_cloud_invalid(self):
        self.wordcloud.acquire_csv_files([
            'D:\Google_Play_Fraud_Benign_Malware\Fraud\Test\fraud_apps_640_review_info_final_2012_top_10.csv'
        ])
        self.wordcloud.create_dictionaries('Text')
        self.wordcloud.create_word_cloud()
        self.assertNotEqual(0, len(self.wordcloud.word_clouds))
        self.wordcloud.save_word_cloud(
            "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations")

    def test_create_sentiment_dictionaries_valid(self):
        self.file_receiver.acquire_input_path()
        self.wordcloud.acquire_csv_files(self.file_receiver.csv_files)
        self.wordcloud.create_sentiment_dictionaries('Text')
        self.assertNotEqual(0, len(self.wordcloud.sentiment_word_cloud_data))

    def test_create_sentiment_word_cloud_valid(self):
        self.wordcloud.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\Sentiment_word_cloud\\fraud_apps_640_review_info_final_2012_top_10_including_sentiment_score.csv'
        ])
        self.wordcloud.create_sentiment_dictionaries('Text')
        self.assertNotEqual(0, len(self.wordcloud.sentiment_word_cloud_data))
        self.wordcloud.create_sentiment_word_cloud()

    def test_save_sentiment_word_cloud_valid(self):
        self.wordcloud.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\Sentiment_word_cloud\\fraud_apps_640_review_info_final_2013_top_10_including_sentiment_score.csv'
        ])
        self.wordcloud.create_sentiment_dictionaries('Text')
        self.assertNotEqual(0, len(self.wordcloud.sentiment_word_cloud_data))
        self.wordcloud.create_sentiment_word_cloud()
        self.wordcloud.save_sentiment_clouds(
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\Sentiment_word_cloud'
        )

    def test_create_dictionaries_from_text_topics(self):
        self.wordcloud.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Keywords\\topic_keywords_fraud_apps_2016_all_reviews_including_sentiment_score.csv'
        ])
        self.wordcloud.create_dictionaries_from_topics()

    def test_create_figure_with_multiple_word_clouds(self):
        self.wordcloud.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Keywords\\topic_keywords_fraud_apps_2016_all_reviews_including_sentiment_score.csv'
        ])
        self.wordcloud.create_dictionaries_from_topics()
        self.wordcloud.create_figure_with_multiple_word_clouds()
 def setUp(self):
     self.barchart_generator = BarChartGenerator()
     self.file_receiver = FileReceiver()
예제 #10
0
def main():
    file_receiver = FileReceiver()
    pie_chart_generator = PieChartGenerator()

    pie_type = input(
        "Basic Pie Chart(B), Nested Pie Chart(Sentiment-Rating=SR,Sentiment-Topic=ST, Rating-Topic=RT): "
    )

    if pie_type == 'B':
        file_receiver.acquire_input_path()
        file_receiver.acquire_output_path()

        pie_chart_generator.acquire_csv_files(file_receiver.csv_files)
        pie_chart_generator.get_chart_data_from_csv_files()
        pie_chart_generator.create_basic_pie_chart()
        pie_chart_generator.save_basic_pie_charts(
            file_receiver.output_folder_path)
    elif pie_type == 'SR':
        file_receiver.acquire_input_path()
        file_receiver.acquire_output_path()

        pie_chart_generator.acquire_csv_files(file_receiver.csv_files)
        pie_chart_generator.get_chart_data_from_csv_files()
        pie_chart_generator.create_nested_pie_chart_sentiment_and_rating()
        pie_chart_generator.save_nested_pie_chart(
            file_receiver.output_folder_path)
    elif pie_type == 'ST':
        input_folders = [
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2013",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2014",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2015",
            #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016"
        ]
        output_folders = [
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Sentiment-Topic Nested Pie Charts",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2013\\Sentiment-Topic Nested Pie Charts",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2014\\Sentiment-Topic Nested Pie Charts",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2015\\Sentiment-Topic Nested Pie Charts",
            #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Sentiment-Topic Nested Pie Charts"
        ]

        for i in range(0, len(input_folders)):
            filenames = get_csv_files_from_directories(input_folders[i])
            file_list = []
            for filename in filenames:
                file_list.append(filename)
            pie_chart_generator.acquire_csv_files(file_list)
            pie_chart_generator.get_chart_data_from_csv_files(True)
            pie_chart_generator.create_nested_pie_chart_sentiment_and_topic()
            pie_chart_generator.save_nested_pie_chart(output_folders[i])

    elif pie_type == 'RT':
        input_folders = [
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2013",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2014",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2015"  #,
            #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\Sentiment & Topic\\2016"
        ]
        output_folders = [
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Rating-Topic Nested Pie Charts",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2013\\Rating-Topic Nested Pie Charts",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2014\\Rating-Topic Nested Pie Charts",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2015\\Rating-Topic Nested Pie Charts"  #,
            #"D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\LATENT DIRICHLET ALLOCATION\\2016\\Rating-Topic Nested Pie Charts"
        ]

        for i in range(0, len(input_folders)):
            filenames = get_csv_files_from_directories(input_folders[i])
            file_list = []
            for filename in filenames:
                file_list.append(filename)
            pie_chart_generator.acquire_csv_files(file_list)
            pie_chart_generator.get_chart_data_from_csv_files(True)
            pie_chart_generator.create_nested_pie_chart_rating_and_topic()
            pie_chart_generator.save_nested_pie_chart(output_folders[i])
예제 #11
0
def main():
    file_receiver = FileReceiver()
    sentiment_analyzer = SentimentAnalyzer()
    data_adjuster = DataAdjustment()

    input_text = input(
        "Calculate Sentiment for text in csv(S) or Add column from existing csv file to another(C):"
    )

    if input_text == 'S':

        file_receiver.acquire_input_path()
        file_receiver.acquire_output_path()
        sentiment_analyzer.acquire_csv_files(file_receiver.csv_files)
        sentiment_analyzer.create_data_frames_with_result_columns()
        sentiment_analyzer.save_sentiment_csv_file(
            file_receiver.output_folder_path)

    elif input_text == "C":

        print("Input path to initial set:")
        file_receiver.acquire_input_path()
        first_set = file_receiver.csv_files
        print("Input path to second set:")
        file_receiver.acquire_input_path()
        second_set = file_receiver.csv_files

        file_receiver.acquire_output_path()

        first_set_files = []
        second_set_files = []
        first_folder = []
        for i in range(0, len(first_set)):
            first_head, first_tail = os.path.split(first_set[i])
            first_folder = first_head
            second_head, second_tail = os.path.split(second_set[i])
            first_set_files.append(first_tail)
            second_set_files.append(second_tail)

        for file in first_set_files:
            second_set_index = second_set_files.index(file)
            first_data = pd.read_csv(first_folder + '\\' + file)
            second_data = pd.read_csv(second_set[second_set_index])
            data_adjuster.concatenate_csv_data(
                file_receiver.output_folder_path, file, first_data,
                second_data, "Topic ID", "Topic Count")
class TestBarChart(unittest.TestCase):
    def setUp(self):
        self.barchart_generator = BarChartGenerator()
        self.file_receiver = FileReceiver()

    def test_calculate_text_length_valid(self):
        self.file_receiver.acquire_input_path()
        self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files)
        self.barchart_generator.categorize_text_by_word_count()
        self.assertNotEqual(0, len(self.barchart_generator.csv_files))

    def test_calculate_text_length_invalid(self):
        self.file_receiver.acquire_input_path()
        self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files)
        self.barchart_generator.categorize_text_by_word_count()
        self.assertEqual(0, len(self.barchart_generator.csv_files))

    def test_create_horizontal_bar_chart_valid(self):
        self.barchart_generator.total_word_count = {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }
        self.barchart_generator.per_file_word_count = (('2012', {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }), ('2013', {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }), ('2014', {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }))
        self.barchart_generator.create_overall_bar_charts()
        self.barchart_generator.display_bar_charts()

    def test_create_horizontal_bar_chart_invalid(self):
        self.barchart_generator.total_word_count = {
            '0-50': "invalid",
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }
        self.barchart_generator.per_file_word_count = (('2012', {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }), ('2013', {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }), ('2014', {
            '0-50': 1,
            '51-100': 9,
            '101-200': 11,
            '201-300': 25,
            '301-400': 60,
            '400+': 5
        }))
        self.barchart_generator.create_overall_bar_charts()
        self.barchart_generator.display_bar_charts()

    def test_save_bar_charts_valid(self):
        self.file_receiver.acquire_input_path()
        self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files)
        self.barchart_generator.categorize_text_by_word_count()
        self.barchart_generator.create_overall_bar_charts()
        self.barchart_generator.display_bar_charts()
        self.barchart_generator.save_overall_bar_charts(
            "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations")

    def test_save_bar_charts_invalid(self):
        self.file_receiver.acquire_input_path()
        self.barchart_generator.acquire_csv_files(self.file_receiver.csv_files)
        self.barchart_generator.categorize_text_by_word_count()
        self.barchart_generator.create_overall_bar_charts()
        self.barchart_generator.display_bar_charts()
        self.barchart_generator.save_overall_bar_charts(
            "D:\\Google_Play_Fraud_Benign_Malware\\Visualizations")

    def test_calculate_word_occurrence_valid(self):
        self.barchart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv'
        ])
        self.barchart_generator.categorize_words_by_valence()
        self.assertNotEqual(0, len(self.barchart_generator.file_valence_data))

    # missing text column
    def test_calculate_word_occurence_invalid(self):
        self.barchart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2016_top_10_missing_text_column.csv'
        ])
        self.barchart_generator.categorize_words_by_valence()
        self.assertNotEqual(0, len(self.barchart_generator.file_valence_data))

    def test_create_divergent_valence_bar_chart_valid(self):
        self.barchart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv'
        ])
        self.barchart_generator.categorize_words_by_valence()
        self.barchart_generator.create_divergent_valence_bar_chart()
        self.assertNotEqual(0, len(self.barchart_generator.file_valence_data))

    def test_create_divergent_valence_bar_chart_invalid(self):
        self.barchart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv'
        ])
        self.barchart_generator.categorize_words_by_valence()
        self.barchart_generator.file_valence_data = None
        self.barchart_generator.create_divergent_valence_bar_chart()
        self.assertNotEqual(0, len(self.barchart_generator.file_valence_data))

    def test_categorize_rating(self):
        self.barchart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\PieChartTest\\fraud_apps_640_review_info_final_2012_top_10.csv'
        ])
        self.barchart_generator.categorize_ratings()

    def test_create_rating_bar_charts(self):
        self.barchart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2012\\sentiment\\fraud_apps_2012_all_anon_reviews_including_sentiment_score.csv'
        ])
        self.barchart_generator.categorize_ratings()
        self.barchart_generator.create_rating_bar_charts()
예제 #13
0
 def setUp(self):
     self.file_receiver = FileReceiver()
예제 #14
0
class TestLineChart(unittest.TestCase):
    def setUp(self):
        self.file_receiver = FileReceiver()
        self.line_chart_generator = LineChartGenerator()

    def test_categorize_text_by_word_count(self):
        self.line_chart_generator.acquire_csv_files([
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2013\\sentiment\\fraud_apps_2013_all_anon_reviews_including_sentiment_score.csv"
        ])
        self.line_chart_generator.categorize_text_by_character_count()
        self.assertNotEqual(
            0, len(self.line_chart_generator.per_file_character_count))

    def test_create_simple_line_chart(self):
        self.line_chart_generator.acquire_csv_files([
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2012\\sentiment\\fraud_apps_2012_all_anon_reviews_including_sentiment_score.csv",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2013\\sentiment\\fraud_apps_2013_all_anon_reviews_including_sentiment_score.csv",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2014\\sentiment\\fraud_apps_2014_all_anon_including_sentiment_score.csv",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2015\\sentiment\\fraud_apps_2015_all_anon_reviews_including_sentiment_score.csv",
            "D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\All Data\\2016\\sentiment\\fraud_apps_2016_all_anon_reviews_including_sentiment_score.csv"
        ])
        self.line_chart_generator.categorize_text_by_character_count()
        self.line_chart_generator.create_simple_line_chart()
        self.assertNotEqual(0, len(self.line_chart_generator.line_charts))

    def test_calculate_monthly_app_reviews_valid(self):
        self.line_chart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10.csv'
        ])
        self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app(
        )

    # missing data column - month
    def test_calculate_monthly_app_reviews_invalid(self):
        self.line_chart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10_no_month_column.csv'
        ])
        self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app(
        )

    def test_create_charts_valid(self):
        self.line_chart_generator.create_charts([
            ('APP_1', [(1, 47), (2, 51), (3, 30), (4, 34), (5, 18), (6, 22),
                       (7, 11), (8, 14), (9, 11), (10, 343), (11, 142),
                       (12, 121)]),
            ('APP_2', [(1, 22), (2, 22), (3, 26), (4, 6), (5, 26), (6, 61),
                       (7, 22), (8, 34), (9, 26), (10, 174), (11, 148),
                       (12, 22)]),
            ('APP_3', [(1, 25), (2, 34), (3, 14), (4, 22), (5, 20), (6, 119),
                       (7, 72), (8, 24), (9, 17), (10, 61), (11, 13),
                       (12, 24)]),
            ('APP_4', [(1, 10), (2, 16), (3, 6), (4, 34), (5, 36), (6, 30),
                       (7, 30), (8, 18), (9, 36), (10, 138), (11, 112),
                       (12, 22)]),
            ('APP_5', [(1, 132), (2, 162), (3, 136), (4, 104), (5, 110),
                       (6, 144), (7, 141), (8, 156), (9, 122), (10, 78),
                       (11, 102), (12, 118)]),
            ('APP_6', [(1, 17), (2, 13), (3, 17), (4, 40), (5, 44), (6, 46),
                       (7, 47), (8, 58), (9, 52), (10, 59), (11, 80),
                       (12, 168)]),
            ('APP_7', [(1, 32), (2, 88), (3, 162), (4, 121), (5, 165),
                       (6, 112), (7, 149), (8, 145), (9, 83), (10, 259),
                       (11, 212), (12, 221)]),
            ('APP_8', [(1, 247), (2, 178), (3, 226), (4, 182), (5, 160),
                       (6, 121), (7, 82), (8, 130), (9, 152), (10, 104),
                       (11, 143), (12, 112)]),
            ('APP_9', [(1, 27), (2, 33), (3, 19), (4, 68), (5, 31), (6, 17),
                       (7, 30), (8, 37), (9, 52), (10, 149), (11, 89),
                       (12, 53)]),
            ('APP_10', [(1, 96), (2, 56), (3, 142), (4, 172), (5, 219),
                        (6, 211), (7, 130), (8, 140), (9, 186), (10, 147),
                        (11, 138), (12, 98)])
        ])

    # missing APP_2 in the second element
    def test_create_charts_invalid(self):
        self.line_chart_generator.create_charts([
            ('APP_1', [(1, 47), (2, 51), (3, 30), (4, 34), (5, 18), (6, 22),
                       (7, 11), (8, 14), (9, 11), (10, 343), (11, 142),
                       (12, 121)]),
            ([(1, 22), (2, 22), (3, 26), (4, 6), (5, 26), (6, 61), (7, 22),
              (8, 34), (9, 26), (10, 174), (11, 148), (12, 22)]),
            ('APP_3', [(1, 25), (2, 34), (3, 14), (4, 22), (5, 20), (6, 119),
                       (7, 72), (8, 24), (9, 17), (10, 61), (11, 13),
                       (12, 24)]),
            ('APP_4', [(1, 10), (2, 16), (3, 6), (4, 34), (5, 36), (6, 30),
                       (7, 30), (8, 18), (9, 36), (10, 138), (11, 112),
                       (12, 22)]),
            ('APP_5', [(1, 132), (2, 162), (3, 136), (4, 104), (5, 110),
                       (6, 144), (7, 141), (8, 156), (9, 122), (10, 78),
                       (11, 102), (12, 118)]),
            ('APP_6', [(1, 17), (2, 13), (3, 17), (4, 40), (5, 44), (6, 46),
                       (7, 47), (8, 58), (9, 52), (10, 59), (11, 80),
                       (12, 168)]),
            ('APP_7', [(1, 32), (2, 88), (3, 162), (4, 121), (5, 165),
                       (6, 112), (7, 149), (8, 145), (9, 83), (10, 259),
                       (11, 212), (12, 221)]),
            ('APP_8', [(1, 247), (2, 178), (3, 226), (4, 182), (5, 160),
                       (6, 121), (7, 82), (8, 130), (9, 152), (10, 104),
                       (11, 143), (12, 112)]),
            ('APP_9', [(1, 27), (2, 33), (3, 19), (4, 68), (5, 31), (6, 17),
                       (7, 30), (8, 37), (9, 52), (10, 149), (11, 89),
                       (12, 53)]),
            ('APP_10', [(1, 96), (2, 56), (3, 142), (4, 172), (5, 219),
                        (6, 211), (7, 130), (8, 140), (9, 186), (10, 147),
                        (11, 138), (12, 98)])
        ])

    def test_create_yearly_app_data_charts_valid(self):
        self.line_chart_generator.yearly_app_data = [[('APP_1', [(1, 47),
                                                                 (2, 51),
                                                                 (3, 30),
                                                                 (4, 34),
                                                                 (5, 18),
                                                                 (6, 22),
                                                                 (7, 11),
                                                                 (8, 14),
                                                                 (9, 11),
                                                                 (10, 343),
                                                                 (11, 142),
                                                                 (12, 121)]),
                                                      ('APP_2', [(1, 22),
                                                                 (2, 22),
                                                                 (3, 26),
                                                                 (4, 6),
                                                                 (5, 26),
                                                                 (6, 61),
                                                                 (7, 22),
                                                                 (8, 34),
                                                                 (9, 26),
                                                                 (10, 174),
                                                                 (11, 148),
                                                                 (12, 22)]),
                                                      ('APP_3', [(1, 25),
                                                                 (2, 34),
                                                                 (3, 14),
                                                                 (4, 22),
                                                                 (5, 20),
                                                                 (6, 119),
                                                                 (7, 72),
                                                                 (8, 24),
                                                                 (9, 17),
                                                                 (10, 61),
                                                                 (11, 13),
                                                                 (12, 24)]),
                                                      ('APP_4', [(1, 10),
                                                                 (2, 16),
                                                                 (3, 6),
                                                                 (4, 34),
                                                                 (5, 36),
                                                                 (6, 30),
                                                                 (7, 30),
                                                                 (8, 18),
                                                                 (9, 36),
                                                                 (10, 138),
                                                                 (11, 112),
                                                                 (12, 22)]),
                                                      ('APP_5', [(1, 132),
                                                                 (2, 162),
                                                                 (3, 136),
                                                                 (4, 104),
                                                                 (5, 110),
                                                                 (6, 144),
                                                                 (7, 141),
                                                                 (8, 156),
                                                                 (9, 122),
                                                                 (10, 78),
                                                                 (11, 102),
                                                                 (12, 118)]),
                                                      ('APP_6', [(1, 17),
                                                                 (2, 13),
                                                                 (3, 17),
                                                                 (4, 40),
                                                                 (5, 44),
                                                                 (6, 46),
                                                                 (7, 47),
                                                                 (8, 58),
                                                                 (9, 52),
                                                                 (10, 59),
                                                                 (11, 80),
                                                                 (12, 168)]),
                                                      ('APP_7', [(1, 32),
                                                                 (2, 88),
                                                                 (3, 162),
                                                                 (4, 121),
                                                                 (5, 165),
                                                                 (6, 112),
                                                                 (7, 149),
                                                                 (8, 145),
                                                                 (9, 83),
                                                                 (10, 259),
                                                                 (11, 212),
                                                                 (12, 221)]),
                                                      ('APP_8', [(1, 247),
                                                                 (2, 178),
                                                                 (3, 226),
                                                                 (4, 182),
                                                                 (5, 160),
                                                                 (6, 121),
                                                                 (7, 82),
                                                                 (8, 130),
                                                                 (9, 152),
                                                                 (10, 104),
                                                                 (11, 143),
                                                                 (12, 112)]),
                                                      ('APP_9', [(1, 27),
                                                                 (2, 33),
                                                                 (3, 19),
                                                                 (4, 68),
                                                                 (5, 31),
                                                                 (6, 17),
                                                                 (7, 30),
                                                                 (8, 37),
                                                                 (9, 52),
                                                                 (10, 149),
                                                                 (11, 89),
                                                                 (12, 53)]),
                                                      ('APP_10', [(1, 96),
                                                                  (2, 56),
                                                                  (3, 142),
                                                                  (4, 172),
                                                                  (5, 219),
                                                                  (6, 211),
                                                                  (7, 130),
                                                                  (8, 140),
                                                                  (9, 186),
                                                                  (10, 147),
                                                                  (11, 138),
                                                                  (12, 98)])],
                                                     [
                                                         ('APP_1', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 0),
                                                                    (4, 0),
                                                                    (5, 0),
                                                                    (6, 0),
                                                                    (7, 0),
                                                                    (8, 0),
                                                                    (9, 0),
                                                                    (10, 0),
                                                                    (11, 0),
                                                                    (12, 2)]),
                                                         ('APP_2', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 0),
                                                                    (4, 0),
                                                                    (5, 0),
                                                                    (6, 0),
                                                                    (7, 0),
                                                                    (8, 0),
                                                                    (9, 2),
                                                                    (10, 0),
                                                                    (11, 0),
                                                                    (12, 2)]),
                                                         ('APP_3', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 0),
                                                                    (4, 0),
                                                                    (5, 2),
                                                                    (6, 0),
                                                                    (7, 2),
                                                                    (8, 0),
                                                                    (9, 2),
                                                                    (10, 2),
                                                                    (11, 4),
                                                                    (12, 24)]),
                                                         ('APP_4', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 1),
                                                                    (4, 0),
                                                                    (5, 0),
                                                                    (6, 0),
                                                                    (7, 0),
                                                                    (8, 0),
                                                                    (9, 0),
                                                                    (10, 0),
                                                                    (11, 0),
                                                                    (12, 1)]),
                                                         ('APP_5', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 0),
                                                                    (4, 0),
                                                                    (5, 0),
                                                                    (6, 0),
                                                                    (7, 0),
                                                                    (8, 0),
                                                                    (9, 0),
                                                                    (10, 0),
                                                                    (11, 2),
                                                                    (12, 0)]),
                                                         ('APP_6', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 0),
                                                                    (4, 0),
                                                                    (5, 0),
                                                                    (6, 2),
                                                                    (7, 0),
                                                                    (8, 2),
                                                                    (9, 0),
                                                                    (10, 2),
                                                                    (11, 4),
                                                                    (12, 4)]),
                                                         ('APP_7', [(1, 0),
                                                                    (2, 0),
                                                                    (3, 0),
                                                                    (4, 0),
                                                                    (5, 0),
                                                                    (6, 0),
                                                                    (7, 0),
                                                                    (8, 0),
                                                                    (9, 0),
                                                                    (10, 0),
                                                                    (11, 0),
                                                                    (12, 1)])
                                                     ]]
        self.line_chart_generator.create_yearly_app_data_charts()

    # missing second element in the list
    def test_create_yearly_app_data_charts_invalid(self):
        self.line_chart_generator.yearly_app_data = [[('APP_1', [(1, 47),
                                                                 (2, 51),
                                                                 (3, 30),
                                                                 (4, 34),
                                                                 (5, 18),
                                                                 (6, 22),
                                                                 (7, 11),
                                                                 (8, 14),
                                                                 (9, 11),
                                                                 (10, 343),
                                                                 (11, 142),
                                                                 (12, 121)]),
                                                      ('APP_2', [(1, 22),
                                                                 (2, 22),
                                                                 (3, 26),
                                                                 (4, 6),
                                                                 (5, 26),
                                                                 (6, 61),
                                                                 (7, 22),
                                                                 (8, 34),
                                                                 (9, 26),
                                                                 (10, 174),
                                                                 (11, 148),
                                                                 (12, 22)]),
                                                      ('APP_3', [(1, 25),
                                                                 (2, 34),
                                                                 (3, 14),
                                                                 (4, 22),
                                                                 (5, 20),
                                                                 (6, 119),
                                                                 (7, 72),
                                                                 (8, 24),
                                                                 (9, 17),
                                                                 (10, 61),
                                                                 (11, 13),
                                                                 (12, 24)]),
                                                      ('APP_4', [(1, 10),
                                                                 (2, 16),
                                                                 (3, 6),
                                                                 (4, 34),
                                                                 (5, 36),
                                                                 (6, 30),
                                                                 (7, 30),
                                                                 (8, 18),
                                                                 (9, 36),
                                                                 (10, 138),
                                                                 (11, 112),
                                                                 (12, 22)]),
                                                      ('APP_5', [(1, 132),
                                                                 (2, 162),
                                                                 (3, 136),
                                                                 (4, 104),
                                                                 (5, 110),
                                                                 (6, 144),
                                                                 (7, 141),
                                                                 (8, 156),
                                                                 (9, 122),
                                                                 (10, 78),
                                                                 (11, 102),
                                                                 (12, 118)]),
                                                      ('APP_6', [(1, 17),
                                                                 (2, 13),
                                                                 (3, 17),
                                                                 (4, 40),
                                                                 (5, 44),
                                                                 (6, 46),
                                                                 (7, 47),
                                                                 (8, 58),
                                                                 (9, 52),
                                                                 (10, 59),
                                                                 (11, 80),
                                                                 (12, 168)]),
                                                      ('APP_7', [(1, 32),
                                                                 (2, 88),
                                                                 (3, 162),
                                                                 (4, 121),
                                                                 (5, 165),
                                                                 (6, 112),
                                                                 (7, 149),
                                                                 (8, 145),
                                                                 (9, 83),
                                                                 (10, 259),
                                                                 (11, 212),
                                                                 (12, 221)]),
                                                      ('APP_8', [(1, 247),
                                                                 (2, 178),
                                                                 (3, 226),
                                                                 (4, 182),
                                                                 (5, 160),
                                                                 (6, 121),
                                                                 (7, 82),
                                                                 (8, 130),
                                                                 (9, 152),
                                                                 (10, 104),
                                                                 (11, 143),
                                                                 (12, 112)]),
                                                      ('APP_9', [(1, 27),
                                                                 (2, 33),
                                                                 (3, 19),
                                                                 (4, 68),
                                                                 (5, 31),
                                                                 (6, 17),
                                                                 (7, 30),
                                                                 (8, 37),
                                                                 (9, 52),
                                                                 (10, 149),
                                                                 (11, 89),
                                                                 (12, 53)]),
                                                      ('APP_10', [(1, 96),
                                                                  (2, 56),
                                                                  (3, 142),
                                                                  (4, 172),
                                                                  (5, 219),
                                                                  (6, 211),
                                                                  (7, 130),
                                                                  (8, 140),
                                                                  (9, 186),
                                                                  (10, 147),
                                                                  (11, 138),
                                                                  (12, 98)])],
                                                     []]
        self.line_chart_generator.create_yearly_app_data_charts()

    def test_calculate_all_year_data_valid(self):
        self.line_chart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10.csv'
        ])
        self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals(
        )

    def test_calculate_all_year_data_invalid(self):
        self.line_chart_generator.acquire_csv_files([
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test\\fraud_apps_640_review_info_final_2014_top_10_no_month_column.csv'
        ])
        self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals(
        )

    def test_create_all_year_data_chart_valid(self):
        self.line_chart_generator.all_year_data = [('2014',
                                                    [(1, 655), (2, 653),
                                                     (3, 778), (4, 783),
                                                     (5, 829), (6, 883),
                                                     (7, 714), (8, 756),
                                                     (9, 737), (10, 1512),
                                                     (11, 1179), (12, 959)])]
        self.line_chart_generator.create_all_year_data_chart()

    # missing year tag
    def test_create_all_year_data_chart_invalid(self):
        self.line_chart_generator.all_year_data = [([(1, 655), (2, 653),
                                                     (3, 778), (4, 783),
                                                     (5, 829), (6, 883),
                                                     (7, 714), (8, 756),
                                                     (9, 737), (10, 1512),
                                                     (11, 1179), (12, 959)])]
        self.line_chart_generator.create_all_year_data_chart()

    def test_save_all_year_charts_valid(self):
        self.file_receiver.acquire_input_path()
        self.line_chart_generator.acquire_csv_files(
            self.file_receiver.csv_files)
        self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals(
        )
        self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app(
        )
        self.line_chart_generator.create_all_year_data_chart()
        self.line_chart_generator.create_yearly_app_data_charts()
        self.line_chart_generator.save_all_year_charts(
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test')
        self.line_chart_generator.save_yearly_app_charts(
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test')

    #invalid output path
    def test_save_all_year_charts_invalid(self):
        self.file_receiver.acquire_input_path()
        self.line_chart_generator.acquire_csv_files(
            self.file_receiver.csv_files)
        self.line_chart_generator.calculate_overall_reviews_by_identifiable_individuals(
        )
        self.line_chart_generator.calculate_reviews_by_identifiable_individuals_per_app(
        )
        self.line_chart_generator.create_all_year_data_chart()
        self.line_chart_generator.create_yearly_app_data_charts()
        self.line_chart_generator.save_all_year_charts('')
        self.line_chart_generator.save_yearly_app_charts(
            'D:\\Google_Play_Fraud_Benign_Malware\\Fraud\\Test')
예제 #15
0
def main():
    bc_generator = BarChartGenerator()
    file_receiver = FileReceiver()

    file_receiver.acquire_input_path()
    file_receiver.acquire_output_path()

    bc_generator.acquire_csv_files(file_receiver.csv_files)

    chart_type = input(
        "Sentiment Divergent bar chart(S) or Character count bar chart(W) or Rating bar charts(R) or Topic bar charts(T)?"
    )

    if chart_type == 'W':

        bc_generator.categorize_text_by_word_count()
        bc_generator.create_overall_bar_charts()
        # display bar charts
        while True:
            input_text = input("Would you like to see the bar charts? (Y/N) :")
            if input_text == 'Y':
                bc_generator.display_bar_charts()
                break
            else:
                if input_text == 'N':
                    print("Shutting down.")
                    break
                else:
                    print("I didn't catch that, try again.")
        bc_generator.save_overall_bar_charts(file_receiver.output_folder_path)

    elif chart_type == 'S':

        bc_generator.categorize_words_by_valence()
        bc_generator.create_divergent_valence_bar_chart()

        # display bar charts
        while True:
            input_text = input("Would you like to see the bar charts? (Y/N) :")
            if input_text == 'Y':
                bc_generator.display_divergent_bar_charts()
                break
            else:
                if input_text == 'N':
                    print("Shutting down.")
                    break
                else:
                    print("I didn't catch that, try again.")

        bc_generator.save_divergent_bar_charts(
            file_receiver.output_folder_path)

    elif chart_type == 'R':
        bc_generator.categorize_ratings()
        bc_generator.create_rating_bar_charts()

        while True:
            input_text = input("Would you like to see the bar charts? (Y/N) :")
            if input_text == 'Y':
                bc_generator.display_bar_charts()
                break
            else:
                if input_text == 'N':
                    print("Shutting down.")
                    break
                else:
                    print("I didn't catch that, try again.")
        bc_generator.save_rating_charts(file_receiver.output_folder_path)
    elif chart_type == 'T':
        topic_data = bc_generator.categorize_topic_distribution()
        bc_generator.create_bar_charts(topic_data)
        bc_generator.save_bar_charts(file_receiver.output_folder_path)
 def setUp(self):
     self.file_receiver = FileReceiver()
     self.sentiment_analyzer = SentimentAnalyzer()
예제 #17
0
 def setUp(self):
     self.file_receiver = FileReceiver()
     self.line_chart_generator = LineChartGenerator()