def classify(context): count = 0 try: m = load_model() for cont in context: print_progress(count, len(context)) count += 1 try: line_split = cont.split("\t") id = line_split[0] sent = line_split[1] word = line_split[2] if os.path.isfile("../test/subtask3/results_" + str(id) + ".txt"): continue original_sense, other_sense = produce_synset(sent, word, m) # print original_sense, other_sense original_sense_key = original_sense[0][1].lemmas()[0].key() other_sense_key = other_sense[0][1].lemmas()[0].key() with open("../test/subtask3/results_" + str(id) + ".txt", 'w') as out: out.write( str(id) + " " + original_sense_key + " " + other_sense_key + " " + original_sense[0][2] + " " + other_sense[0][2]) except Exception as e: pass except Exception as e: print e
def subtask3_topkeys(): size, count = 10, 0 lines = open("../data/subtask3-test.tsv").read().split("\n") size = len(lines) # m = load_model() for line in lines[2:3]: count += 1 # try: line_split = line.split("\t") id = line_split[0] print id sent = line_split[1] word = line_split[2] # if os.path.isfile("../baselines/subtask3/results_" + str(id) + ".txt"): # continue try: with open("../baselines/subtask3/results_" + str(id) + ".txt", 'w') as out: original_sense, other_sense = synset_baseline(sent, word) original_sense = wn.synsets( original_sense)[0].lemmas()[0].key() other_sense = wn.synsets(other_sense)[0] other_sense = other_sense.lemmas()[0].key() out.write(str(id) + " " + original_sense + " " + other_sense) except Exception as e: raise print_progress(count, size)
def classify_linear(): size, count = 10, 0 lines = open("../data/subtask3-test.tsv").read().split("\n") size = len(lines) # m = load_model() for line in lines: count += 1 # try: line_split = line.split("\t") id = line_split[0] sent = line_split[1] word = line_split[2] if os.path.isfile("../baselines/subtask3/results_" + str(id) + ".txt"): continue with open("../baselines/subtask3/results_" + str(id) + ".txt", 'w') as out: original_sense, other_sense = synset_baseline(sent, word) # print original_sense, other_sense original_sense_key = original_sense[0][1].lemmas()[0].key() other_sense_key = other_sense[0][1].lemmas()[0].key() out.write( str(id) + " " + original_sense_key + " " + other_sense_key + " " + original_sense[0][2] + " " + other_sense[0][2]) # except Exception as e: # print e # pass print_progress(count, size)
id = context[1] if os.path.isfile("../baselines/subtask1/results_" + str(id) + ".txt"): return try: res = "1" if is_pun(context[0]) else "0" with open("../baselines/subtask1/results_" + str(id) + ".txt", 'w') as f: f.write(id + " " + res) return except Exception as e: return if __name__ == "__main__": # size = 10 puns = [] tsv_file = open("../data/hetero_annotations.tsv").read() size = len(tsv_file.split("\n")) for i, line in enumerate(tsv_file.split("\n")[1:size + 1]): sent = line.split("\t")[0] puns.append((sent, "het_" + str(i))) count = 0 print_progress(count, size) pool = multiprocessing.Pool(processes=4) pun_it = pool.imap_unordered(classify_pun, puns) for res in pun_it: count += 1 print_progress(count, size)
import multiprocessing from data_processing import print_progress from pun_word_4grams import pun_data def write_data(line): line_split = line.split("\t") # print line_split if len(line_split) != 6: return try: with open("../data/subtask3_pun_data/" + line_split[0] + ".json", 'w') as out: json.dump(pun_data(line_split[1], line_split[2]), out, indent=4) except Exception as e: print e print "Failed on", line_split[1] if __name__ == "__main__": with open("../data/subtask3-annotations-full-2.tsv") as f: data = f.read().split("\n") count = 0 pool = multiprocessing.Pool(processes=4) pun_it = pool.imap_unordered(write_data, data) print_progress(count, len(data)) for res in pun_it: count += 1 print_progress(count, len(data))