"labeled_data_test.csv", 6)
test_list_ld_unprocessed = machine_learning_processing.make_list_of_column(
    "labeled_data_test.csv", 6)
do_test_set_mem_sent_pos_ld(test_list_ld, test_list_ld_unprocessed,
                            "labeled_data_mem_final.csv",
                            "lexicon_with_occurences_ld.txt",
                            "labeled_data_train.csv", 5,
                            "labeled_data_train_with_sentiment.csv",
                            "labeled_data_test_with_sentiment.csv", 2)
do_test_set_mem_sent_pos_hs_ld(test_list_ld, test_list_ld_unprocessed,
                               "labeled_data_mem_final_hs.csv",
                               "lexicon_with_occurences_hs_ld.txt",
                               "labeled_data_train.csv", 5,
                               "labeled_data_train_with_sentiment.csv",
                               "labeled_data_test_with_sentiment.csv", 2)
estimation.test_results("labeled_data_test.csv", 5,
                        "labeled_data_mem_final.csv", 1)
estimation.test_results("labeled_data_test.csv", 5,
                        "labeled_data_mem_final_hs.csv", 1)

estimation.test_results("labeled_data_test.csv", 5, "labeled_data_mem.csv", 1)
estimation.test_results("labeled_data_test.csv", 5, "labeled_data_mem_hs.csv",
                        1)

# twitter hate speech
test_list_ths = machine_learning_processing.process_data(
    "twitter_hate_speech_test.csv", 1)
test_list_ths_unprocessed = machine_learning_processing.make_list_of_column(
    "twitter_hate_speech_test.csv", 1)
do_test_set_mem_sent_pos_ths(test_list_ths, test_list_ths_unprocessed,
                             "twitter_hate_speech_mem_final.csv",
                             "lexicon_with_occurences_ths.txt",
    "labeled_data_test.csv", 6)
term_utterance_matrix_ld = do_matrix(training_list_ld, "lexicon_ld.txt")
matrix_pos_ld = do_matrix(training_list_ld, "lexicon_pos_ld.txt")
matrix_neut_ld = do_matrix(training_list_ld, "lexicon_neut_ld.txt")
matrix_neg_ld = do_matrix(training_list_ld, "lexicon_neg_ld.txt")
do_test_set_svm_sent_other(
    test_list_ld, training_list_ld, "labeled_data_svm.csv",
    "lexicon_pos_ld.txt", "lexicon_neut_ld.txt", "lexicon_neg_ld.txt",
    "labeled_data_train.csv", 5, matrix_pos_ld, matrix_neut_ld, matrix_neg_ld,
    "labeled_data_test_with_sentiment.csv", "Cyberbullying", 2)
do_test_set_svm_sent_other_hs(
    test_list_ld, training_list_ld, "labeled_data_svm_hs.csv",
    "lexicon_pos_ld.txt", "lexicon_neut_ld.txt", "lexicon_neg_ld.txt",
    "labeled_data_train.csv", 5, matrix_pos_ld, matrix_neut_ld, matrix_neg_ld,
    "labeled_data_test_with_sentiment.csv", "Hate Speech", 2)
estimation.test_results("labeled_data_test.csv", 5, "labeled_data_svm.csv", 1)
estimation.test_results("labeled_data_test.csv", 5, "labeled_data_svm_hs.csv",
                        1)

# twitter hate speech
test_list_ths = machine_learning_processing.process_data(
    "twitter_hate_speech_test.csv", 1)
training_list_ths = machine_learning_processing.process_data(
    "twitter_hate_speech_test.csv", 1)
term_utterance_matrix_ths = do_matrix(training_list_ths, "lexicon_ths.txt")
matrix_pos_ths = do_matrix(training_list_ths, "lexicon_pos_ths.txt")
matrix_neut_ths = do_matrix(training_list_ths, "lexicon_neut_ths.txt")
matrix_neg_ths = do_matrix(training_list_ths, "lexicon_neg_ths.txt")
do_test_set_svm_sent_other(
    test_list_ths, training_list_ths, "twitter_hate_speech_svm.csv",
    "lexicon_pos_ths.txt", "lexicon_neut_ths.txt", "lexicon_neg_ths.txt",
Beispiel #3
0
    "labeled_data_test.csv", 6)
test_list_ld_unprocessed = machine_learning_processing.process_data(
    "labeled_data_test.csv", 6)
do_test_set_naive_bayes_sent_pos_ld(test_list_ld, test_list_ld_unprocessed,
                                    "labeled_data_naive_bayes_final.csv",
                                    "lexicon_with_occurences_ld.txt",
                                    "labeled_data_train.csv", 5,
                                    "labeled_data_train_with_sentiment.csv",
                                    "labeled_data_test_with_sentiment.csv", 2)
do_test_set_naive_bayes_sent_pos_hs_ld(
    test_list_ld, test_list_ld_unprocessed,
    "labeled_data_naive_bayes_final_hs.csv",
    "lexicon_with_occurences_hs_ld.txt", "labeled_data_train.csv", 5,
    "labeled_data_train_with_sentiment.csv",
    "labeled_data_test_with_sentiment.csv", 2)
estimation.test_results("labeled_data_test.csv", 5,
                        "labeled_data_naive_bayes_final.csv", 1)
estimation.test_results("labeled_data_test.csv", 5,
                        "labeled_data_naive_bayes.csv", 1)

estimation.test_results("labeled_data_test.csv", 5,
                        "labeled_data_naive_bayes_final_hs.csv", 1)
estimation.test_results("labeled_data_test.csv", 5,
                        "labeled_data_naive_bayes_hs.csv", 1)

# twitter hate speech
estimate_class_frequency_other_datasets("twitter_hate_speech_train.csv", 3)
estimate_hate_speech_frequency_other_datasets("twitter_hate_speech_train.csv",
                                              3)
test_list_ths = machine_learning_processing.process_data(
    "twitter_hate_speech_test.csv", 1)
test_list_ths_unprocessed = machine_learning_processing.process_data(
Beispiel #4
0
matrix_neg = support_vector_machine.do_matrix(training_list, "lexicon_neg.txt")
matrix_pos2 = support_vector_machine.do_matrix(training_list,
                                               "lexicon_pos2.txt")
matrix_neut2 = support_vector_machine.do_matrix(training_list,
                                                "lexicon_neut2.txt")
matrix_neg2 = support_vector_machine.do_matrix(training_list,
                                               "lexicon_neg2.txt")

# cyberbullying
do_test_set_svm_sent_pos(test_list, test_list_unprocessed, training_list,
                         "twitter_bullying_svm_final.csv", "lexicon_pos.txt",
                         "lexicon_neut.txt", "lexicon_neg.txt",
                         "train_set.csv", 7, matrix_pos, matrix_neut,
                         matrix_neg, "test_set_with_sentiment.csv",
                         "Cyberbullying")
estimation.test_results("test_set.csv", 7, "twitter_bullying_svm_final.csv", 1)

estimation.test_results("test_set.csv", 7, "twitter_bullying_svm_sent_c.csv",
                        1)

# hate speech
do_test_set_svm_sent_pos_hs(test_list, test_list_unprocessed, training_list,
                            "twitter_bullying_svm_final_hs.csv",
                            "lexicon_pos.txt", "lexicon_neut.txt",
                            "lexicon_neg.txt", "train_set.csv", 9, matrix_pos,
                            matrix_neut, matrix_neg,
                            "test_set_with_sentiment.csv", "Hate Speech")
estimation.test_results("test_set.csv", 9, "twitter_bullying_svm_final_hs.csv",
                        1)

estimation.test_results("test_set.csv", 9, "twitter_bullying_svm_sent_hs.csv",