Example #1
0
def make_lex_based_on_sent(sentimentfile, trainfile, lexname, sentiment, mode, class_column):
    lex = []

    sentiment_list = machine_learning_processing.make_list_of_column(sentimentfile, 1)

    data_list = machine_learning_processing.process_data(trainfile, class_column)

    utterance_id = 0
    for list in data_list:
        if sentiment_list[utterance_id] == sentiment or sentiment_list[utterance_id] == str(sentiment):
            for word in list:
                if word not in lex:
                    lex.append(word)
                #else:
                    #print("already in lexicon")
        utterance_id += 1

    lex = sorted(lex)                                                       # sort list alphabetically

    # only words that occur at least twice in the dataset will be part of the lexicon
    lex2 = []
    for word in lex:
        count = sum(x.count(word) for x in data_list)
        if count > 1:
            lex2.append(word)

    with open(lexname, 'w') as f:
        if mode == 1:
            for word in lex:
                f.write("%s\n" % word)
        else:
            for word in lex2:
                f.write("%s\n" % word)
            elif utterance_id == 500:
                print(500)
            elif utterance_id == 600:
                print(600)
            elif utterance_id == 700:
                print(700)
            elif utterance_id == 800:
                print(800)
            elif utterance_id == 900:
                print(900)

            utterance_id += 1


# labeled data
test_list_ld = machine_learning_processing.process_data(
    "labeled_data_test.csv", 6)
test_list_ld_unprocessed = machine_learning_processing.make_list_of_column(
    "labeled_data_test.csv", 6)
do_test_set_mem_sent_pos_ld(test_list_ld, test_list_ld_unprocessed,
                            "labeled_data_mem_final.csv",
                            "lexicon_with_occurences_ld.txt",
                            "labeled_data_train.csv", 5,
                            "labeled_data_train_with_sentiment.csv",
                            "labeled_data_test_with_sentiment.csv", 2)
do_test_set_mem_sent_pos_hs_ld(test_list_ld, test_list_ld_unprocessed,
                               "labeled_data_mem_final_hs.csv",
                               "lexicon_with_occurences_hs_ld.txt",
                               "labeled_data_train.csv", 5,
                               "labeled_data_train_with_sentiment.csv",
                               "labeled_data_test_with_sentiment.csv", 2)
estimation.test_results("labeled_data_test.csv", 5,
Example #3
0
                print(utterance_id)
            elif utterance_id == 400:
                print(utterance_id)
            elif utterance_id == 500:
                print(utterance_id)
            elif utterance_id == 600:
                print(utterance_id)
            elif utterance_id == 700:
                print(utterance_id)
            elif utterance_id == 800:
                print(utterance_id)
            elif utterance_id == 900:
                print(utterance_id)


test_list = machine_learning_processing.process_data("test_set.csv", 5)
training_list = machine_learning_processing.process_data("train_set.csv", 5)
test_list_unprocessed = machine_learning_processing.make_list_of_column(
    "test_set.csv", 5)

term_utterance_matrix = support_vector_machine.do_matrix(
    training_list, "lexicon.txt")
matrix_pos = support_vector_machine.do_matrix(training_list, "lexicon_pos.txt")
matrix_neut = support_vector_machine.do_matrix(training_list,
                                               "lexicon_neut.txt")
matrix_neg = support_vector_machine.do_matrix(training_list, "lexicon_neg.txt")
matrix_pos2 = support_vector_machine.do_matrix(training_list,
                                               "lexicon_pos2.txt")
matrix_neut2 = support_vector_machine.do_matrix(training_list,
                                                "lexicon_neut2.txt")
matrix_neg2 = support_vector_machine.do_matrix(training_list,