예제 #1
0
def classify(context):
    count = 0
    try:
        m = load_model()
        for cont in context:
            print_progress(count, len(context))
            count += 1
            try:
                line_split = cont.split("\t")
                id = line_split[0]
                sent = line_split[1]
                word = line_split[2]
                if os.path.isfile("../test/subtask3/results_" + str(id) +
                                  ".txt"):
                    continue
                original_sense, other_sense = produce_synset(sent, word, m)
                # print original_sense, other_sense
                original_sense_key = original_sense[0][1].lemmas()[0].key()
                other_sense_key = other_sense[0][1].lemmas()[0].key()
                with open("../test/subtask3/results_" + str(id) + ".txt",
                          'w') as out:
                    out.write(
                        str(id) + " " + original_sense_key + " " +
                        other_sense_key + " " + original_sense[0][2] + " " +
                        other_sense[0][2])

            except Exception as e:
                pass

    except Exception as e:
        print e
예제 #2
0
def subtask3_topkeys():

    size, count = 10, 0
    lines = open("../data/subtask3-test.tsv").read().split("\n")
    size = len(lines)
    # m = load_model()
    for line in lines[2:3]:
        count += 1
        # try:
        line_split = line.split("\t")
        id = line_split[0]
        print id
        sent = line_split[1]
        word = line_split[2]
        # if os.path.isfile("../baselines/subtask3/results_" + str(id) + ".txt"):
        #     continue

        try:
            with open("../baselines/subtask3/results_" + str(id) + ".txt",
                      'w') as out:
                original_sense, other_sense = synset_baseline(sent, word)
                original_sense = wn.synsets(
                    original_sense)[0].lemmas()[0].key()
                other_sense = wn.synsets(other_sense)[0]
                other_sense = other_sense.lemmas()[0].key()
                out.write(str(id) + " " + original_sense + " " + other_sense)
        except Exception as e:
            raise
        print_progress(count, size)
예제 #3
0
def classify_linear():

    size, count = 10, 0
    lines = open("../data/subtask3-test.tsv").read().split("\n")
    size = len(lines)
    # m = load_model()
    for line in lines:
        count += 1
        # try:
        line_split = line.split("\t")
        id = line_split[0]
        sent = line_split[1]
        word = line_split[2]
        if os.path.isfile("../baselines/subtask3/results_" + str(id) + ".txt"):
            continue

        with open("../baselines/subtask3/results_" + str(id) + ".txt",
                  'w') as out:
            original_sense, other_sense = synset_baseline(sent, word)
            # print original_sense, other_sense
            original_sense_key = original_sense[0][1].lemmas()[0].key()
            other_sense_key = other_sense[0][1].lemmas()[0].key()
            out.write(
                str(id) + " " + original_sense_key + " " + other_sense_key +
                " " + original_sense[0][2] + " " + other_sense[0][2])
        # except Exception as e:
        #     print e
        #     pass
        print_progress(count, size)
예제 #4
0
    id = context[1]
    if os.path.isfile("../baselines/subtask1/results_" + str(id) + ".txt"):
        return
    try:
        res = "1" if is_pun(context[0]) else "0"
        with open("../baselines/subtask1/results_" + str(id) + ".txt",
                  'w') as f:
            f.write(id + " " + res)
        return
    except Exception as e:
        return


if __name__ == "__main__":

    # size = 10
    puns = []
    tsv_file = open("../data/hetero_annotations.tsv").read()
    size = len(tsv_file.split("\n"))
    for i, line in enumerate(tsv_file.split("\n")[1:size + 1]):
        sent = line.split("\t")[0]
        puns.append((sent, "het_" + str(i)))

    count = 0
    print_progress(count, size)
    pool = multiprocessing.Pool(processes=4)
    pun_it = pool.imap_unordered(classify_pun, puns)
    for res in pun_it:
        count += 1
        print_progress(count, size)
예제 #5
0
import multiprocessing
from data_processing import print_progress
from pun_word_4grams import pun_data


def write_data(line):
    line_split = line.split("\t")
    # print line_split
    if len(line_split) != 6:
        return
    try:
        with open("../data/subtask3_pun_data/" + line_split[0] + ".json", 'w') as out:
            json.dump(pun_data(line_split[1], line_split[2]), out, indent=4)
    except Exception as e:
        print e
        print "Failed on", line_split[1]


if __name__ == "__main__":
    with open("../data/subtask3-annotations-full-2.tsv") as f:
        data = f.read().split("\n")

    count = 0
    pool = multiprocessing.Pool(processes=4)
    pun_it = pool.imap_unordered(write_data, data)
    print_progress(count, len(data))

    for res in pun_it:
        count += 1
        print_progress(count, len(data))