Esempio n. 1
0
def run():
    model_id = sys.argv[1]
    tag_map, link_map = get_wiki_tag_and_link_maps(sys.argv[2])
    keydict_path = sys.argv[3]
    output_fn = sys.argv[4]
    words = sys.argv[5:]

    model = load_model(model_id)
    number_of_field = 15
    table = [[] for _ in xrange(number_of_field)]
    for word in words:
        sense_idx_map = get_sense_idx_map(keydict_path, word)
        i = 1
        for sense, idx in sense_idx_map.iteritems():
            try:
                similar_words = model.most_similar(positive=[idx], topn=10)
                table[i].extend([u"%s %s" % (t[0], t[1]) for t in similar_words])
                table[i].append(u",".join(tag_map[sense]))
                similarities = [t[1] for t in similar_words]
                avg_sim = sum(similarities) / len(similarities)
                table[i].append(u"%s %f" % (link_map[sense], avg_sim))
                i += 1
            except KeyError:
                pass

        for j in xrange(i, number_of_field):
            diff = len(table[1]) - len(table[j])
            for _ in xrange(diff):
                table[j].append("")

        for i in xrange(12):
            if i == 6:
                table[0].append(word)
            else:
                table[0].append("")

        for column in table:
            column.append(" ")

    table_filtered = []
    for column in table:
        if len(column) != 0:
            table_filtered.append(column)

    headers = ["target_word"]
    headers.extend(["sense-%d" % i for i in xrange(1, number_of_field)])

    with codecs.open(output_fn, "w", "utf-8") as f:
        t = tabulate.tabulate(zip(*table_filtered), tablefmt="simple", headers=headers)
        f.write(t)
Esempio n. 2
0
def run():
    model_id = sys.argv[1]
    tag_map, link_map = get_wiki_tag_and_link_maps(sys.argv[2])
    keydict_path = sys.argv[3]
    output_fn = sys.argv[4]
    words = sys.argv[5:]

    model = load_model(model_id)
    number_of_field = 15
    table = [[] for _ in xrange(number_of_field)]
    for word in words:
        sense_idx_map = get_sense_idx_map(keydict_path, word)
        i = 1
        for sense, idx in sense_idx_map.iteritems():
            try:
                similar_words = model.most_similar(positive=[idx], topn=10)
                table[i].extend([u"%s %s" % (t[0], t[1]) for t in similar_words])
                table[i].append(u",".join(tag_map[sense]))
                similarities = [t[1] for t in similar_words]
                avg_sim = sum(similarities) / len(similarities)
                table[i].append(u"%s %f" % (link_map[sense], avg_sim))
                i += 1
            except KeyError:
                pass

        for j in xrange(i, number_of_field):
            diff = len(table[1]) - len(table[j])
            for _ in xrange(diff):
                table[j].append("")

        for i in xrange(12):
            if i == 6:
                table[0].append(word)
            else:
                table[0].append("")

        for column in table:
            column.append(" ")

    table_filtered = []
    for column in table:
        if len(column) != 0:
            table_filtered.append(column)

    headers = ["target_word"]
    headers.extend(["sense-%d" % i for i in xrange(1, number_of_field)])

    with codecs.open(output_fn, 'w', 'utf-8') as f:
        t = tabulate.tabulate(zip(*table_filtered), tablefmt="simple", headers=headers)
        f.write(t)
Esempio n. 3
0
def test_model(model_id):
    model = load_model(model_id)
    print model
Esempio n. 4
0
def test_model(model_id):
    model = load_model(model_id)
    print model