def pre_process(): f = open("mode", "r") mode = f.read() f.close() if (mode == '0'): READ_DIR = "clean_train_set" elif (mode == '1'): READ_DIR = "clean_test_set" one_gram = T.create_mono_gram(READ_DIR) two_gram = T.create_bi_gram(READ_DIR, one_gram) three_gram = T.create_tri_gram(READ_DIR, one_gram) n_gram_list1 = [] READ_DIR = "Data" n_gram = one_gram + two_gram + three_gram negative_words = get_negative_words(READ_DIR) n_gram = remove_empty_tokens(n_gram) n_gram = remove_numeric_tokens(n_gram) n_gram = remove_non_cap_tokens(n_gram) n_gram = remove_negative_tokens(n_gram, negative_words) n_gram = remove_mixed_words(n_gram) os.chdir('..') return n_gram, one_gram