Ejemplo n.º 1
0
from jnt.matching.synset_fetchers import BabelNet, SenseClusters
from jnt.matching.matcher import Matcher

BABELNET_KEY = ""   # insert a BabelNet API key here (not required)
ADAGRAM_BOW = "data/ddt-adagram-50.csv"  # bow with Adagram nearest neighbours
BABELNET_BOW = "data/babelnet-bow-5190.pkl"  # wget http://panchenko.me/data/joint/adagram/data/babelnet-bow-5190.pkl
VOC = "data/voc-50.csv"  # the mapping will be performed for these words
OUTPUT = VOC + ".match.csv"

babelnet = BabelNet(babelnet_keys=[BABELNET_KEY], babelnet_fpath=BABELNET_BOW)
adagram = SenseClusters(sense_clusters_fpath=ADAGRAM_BOW, strip_dst_senses=True)
m = Matcher(babelnet, adagram)
m.match_file(words_fpath=VOC, output_fpath=OUTPUT)

Ejemplo n.º 2
0
#     match_fpath = voc_fpath + "-match-p%d.csv" % p
#     m.match_file(voc_fpath, match_fpath, threshold_percentile=p)
#     df = read_csv(match_fpath, encoding='utf-8', delimiter="\t", error_bad_lines=False)
#     res["matches"].append(len(df))
#     res["words"].append(len(set(df.word.values)))
#     res["matches/words"].append(float(len(df)) / len(set(df.word.values)))
#     res["p"].append(p)
#     l = [len(rows) for word, rows in df.groupby(["word"])]
#     res["std"].append(std(l))


from jnt.matching.synset_fetchers import BabelNet, SenseClusters, BABELNET_KEYS
from jnt.matching.matcher import Matcher

BABELNET_DIR = "/Users/alex/tmp/matching/babelnet-eval/"
ADAGRAM_DDT = "/Users/alex/tmp/matching/ddt-adagram-ukwac+wacky.csv.gz.voc.out"
voc_fpath = "/Users/alex/work/joint/src/data/ambigous-words-mine.csv"
freq_fpath = "" # "/Users/alex/tmp/st/word-freq-t10.csv"

from time import time
tic = time()
babelnet = BabelNet(babelnet_keys=BABELNET_KEYS, babelnet_fpath=BABELNET_DIR, freq_fpath=freq_fpath, divide_by_freq=False)
print "BabelNet load time:", time()-tic

tic = time()
adagram = SenseClusters(ADAGRAM_DDT, normalized_bow=True)
print "SenseClusters load time:", time()-tic

m = Matcher(babelnet, adagram)
m.match_file(voc_fpath, voc_fpath + "-match.csv")
Ejemplo n.º 3
0
from jnt.matching.synset_fetchers import BabelNet, SenseClusters
from jnt.matching.matcher import Matcher

BABELNET_KEY = ""  # insert a BabelNet API key here (not required)
ADAGRAM_BOW = "data/ddt-adagram-50.csv"  # bow with Adagram nearest neighbours
BABELNET_BOW = "data/babelnet-bow-5190.pkl"  # wget http://panchenko.me/data/joint/adagram/data/babelnet-bow-5190.pkl
VOC = "data/voc-50.csv"  # the mapping will be performed for these words
OUTPUT = VOC + ".match.csv"

babelnet = BabelNet(babelnet_keys=[BABELNET_KEY], babelnet_fpath=BABELNET_BOW)
adagram = SenseClusters(sense_clusters_fpath=ADAGRAM_BOW,
                        strip_dst_senses=True)
m = Matcher(babelnet, adagram)
m.match_file(words_fpath=VOC, output_fpath=OUTPUT)