def do_test_set_naive_bayes_sent_hs_other(utterances, filename, lex, file,
                                          column, sentimentfile_train,
                                          sentimentfile_test, mode):
    # annotation using naive_bayes will be saved in a new file
    with open(filename, 'w') as f:
        writer = csv.writer(f, delimiter=';')
        writer.writerow(["Utterance", "Hate Speech"])  # header

        sentimentlist = senti_strength.estimate_sentiment_probabilities_other_datasets(
            sentimentfile_train, file, column, mode)
        list_of_sentiments = machine_learning_processing.make_list_of_column(
            sentimentfile_test, 1)

        utterance_id = 0
        for utterance in utterances:
            class_hs = do_sentiment_naive_bayes_hs(
                utterance, lex, list_of_sentiments[utterance_id], sentimentlist
            )  # determine class of the utterance using do_sentiment_naive_bayes_hs()

            # write utterance and its assigned class into the file
            utterance_string = ""
            for word in utterance:
                utterance_string = utterance_string + word + " "
            writer.writerow([utterance_string, class_hs])
            utterance_id += 1
def do_test_set_mem_sent_pos_hs_ths(utterances_test, utterances_unprocessed,
                                    filename, lex, file, column,
                                    sentimentfile_train, sentimentfile_test,
                                    mode):
    # annotation using maximum entropy model will be saved in a new file
    with open(filename, 'w') as f:
        writer = csv.writer(f, delimiter=';')
        writer.writerow(["Utterance", "Hate Speech"])  # header

        sentimentlist = senti_strength.estimate_sentiment_probabilities_other_datasets(
            sentimentfile_train, file, column, mode)
        list_of_sentiments = machine_learning_processing.make_list_of_column(
            sentimentfile_test, 1)

        utterance_id = 0
        for utterance in utterances_test:
            utterance_unprocessed = utterances_unprocessed[utterance_id]
            class_hs = do_mem_sent_pos_hs_ths(
                utterance, utterance_unprocessed, lex,
                list_of_sentiments[utterance_id], sentimentlist
            )  # determine class of the utterance using do_svm()

            # write utterance and its assigned class into the file
            utterance_string = ""
            for word in utterance:
                utterance_string = utterance_string + word + " "
            writer.writerow([utterance_string, class_hs])

            if utterance_id == 100:
                print(100)
            elif utterance_id == 200:
                print(200)
            elif utterance_id == 300:
                print(300)
            elif utterance_id == 400:
                print(400)
            elif utterance_id == 500:
                print(500)
            elif utterance_id == 600:
                print(600)
            elif utterance_id == 700:
                print(700)
            elif utterance_id == 800:
                print(800)
            elif utterance_id == 900:
                print(900)

            utterance_id += 1
Exemple #3
0
def do_test_set_naive_bayes_sent_pos_ld(utterances, utterances_unprocessed,
                                        filename, lex, file, column,
                                        sentimentfile_train,
                                        sentimentfile_test, mode):
    # annotation using naive_bayes will be saved in a new file
    with open(filename, 'w') as f:
        writer = csv.writer(f, delimiter=';')
        writer.writerow(["Utterance", "Cyberbullying"])  # header

        sentimentlist = senti_strength.estimate_sentiment_probabilities_other_datasets(
            sentimentfile_train, file, column, mode)
        list_of_sentiments = machine_learning_processing.make_list_of_column(
            sentimentfile_test, 1)

        utterance_id = 0
        for utterance in utterances:
            utterance_unprocessed = utterances_unprocessed[utterance_id]
            class_cb = do_naive_bayes_sent_pos_ld(
                utterance, utterance_unprocessed, lex,
                list_of_sentiments[utterance_id], sentimentlist
            )  # determine class of the utterance using do_naive_bayes()

            # write utterance and its assigned class into the file
            utterance_string = ""
            for word in utterance:
                utterance_string = utterance_string + word + " "
            writer.writerow([utterance_string, class_cb])

            if utterance_id == 100:
                print(100)
            elif utterance_id == 200:
                print(200)
            elif utterance_id == 300:
                print(300)
            elif utterance_id == 400:
                print(400)
            elif utterance_id == 500:
                print(500)
            elif utterance_id == 600:
                print(600)
            elif utterance_id == 700:
                print(700)
            elif utterance_id == 800:
                print(800)
            elif utterance_id == 900:
                print(900)
            elif utterance_id == 1000:
                print(1000)
            elif utterance_id == 1100:
                print(1100)
            elif utterance_id == 1200:
                print(1200)
            elif utterance_id == 1300:
                print(1300)
            elif utterance_id == 1400:
                print(1400)
            elif utterance_id == 1500:
                print(1500)
            elif utterance_id == 1600:
                print(1600)
            elif utterance_id == 1700:
                print(1700)
            elif utterance_id == 1800:
                print(1800)
            elif utterance_id == 1900:
                print(1900)
            elif utterance_id == 2000:
                print(2000)
            elif utterance_id == 2100:
                print(2100)
            elif utterance_id == 2200:
                print(2200)

            utterance_id += 1