def evaluate_simi(wv, w2i, vocab): wv_dict = dict() for w in vocab: wv_dict[w] = wv[w2i[w], :] if isinstance(wv_dict, dict): w = Embedding.from_dict(wv_dict) # Calculate results on similarity print("Calculating similarity benchmarks") similarity_tasks = { "WS353": fetch_WS353(), "RG65": fetch_RG65(), # "WS353R": fetch_WS353(which="relatedness"), # "WS353S": fetch_WS353(which="similarity"), "SimLex999": fetch_SimLex999(), "MTurk": fetch_MTurk(), "RW": fetch_RW(), "MEN": fetch_MEN(), } # similarity_results = {} for name, data in iteritems(similarity_tasks): print( "Sample data from {}, num of samples: {} : pair \"{}\" and \"{}\" is assigned score {}" .format(name, len(data.X), data.X[0][0], data.X[0][1], data.y[0])) score = evaluate_similarity(w, data.X, data.y) print("Spearman correlation of scores on {} {}".format(name, score))
def web_tests(emb): """ :param emb: dict of words and their corresponding embeddings :return: dict of word-embeddings-benchmarks tests and scores received """ similarity_tasks = { 'WS353': fetch_WS353(), 'RG65': fetch_RG65(), 'RW': fetch_RW(), 'MTurk': fetch_MTurk(), 'MEN': fetch_MEN(), 'SimLex999': fetch_SimLex999() } web_emb = Embedding(Vocabulary(list(emb.keys())), list(emb.values())) similarity_results = {} for name, data in iteritems(similarity_tasks): similarity_results[name] = evaluate_similarity(web_emb, data.X, data.y) logging.info("Spearman correlation of scores on {} {}".format( name, evaluate_similarity(web_emb, data.X, data.y))) return similarity_results
def get_dataset(dataset_name): if dataset_name == "WS353": return fetch_WS353("similarity") elif dataset_name == "MEN": return fetch_MEN("all") elif dataset_name == "SimLex-999": return fetch_SimLex999() elif dataset_name == "MTurk": return fetch_MTurk() elif dataset_name == "WS353": return fetch_WS353('all') elif dataset_name == "RG65": return fetch_RG65() elif dataset_name == "RW": return fetch_RW() elif dataset_name == "TR9856": return fetch_TR9856() elif dataset_name == "MSR": return fetch_msr_analogy() elif dataset_name == "Google": return fetch_google_analogy() else: raise Exception("{}: dataset not supported".format(dataset_name))
def evaluateOnAll(w): similarity_tasks = { "MTurk": fetch_MTurk(), "MEN": fetch_MEN(), "WS353": fetch_WS353(), "RubensteinAndGoodenough": fetch_RG65(), "Rare Words": fetch_RW(), "SIMLEX999": fetch_SimLex999(), "TR9856": fetch_TR9856() } similarity_results = {} for name, data in iteritems(similarity_tasks): similarity_results[name] = evaluate_similarity(w, data.X, data.y) print("Spearman correlation of scores on {} {}".format(name, similarity_results[name])) # Calculate results on analogy print("Calculating analogy benchmarks") analogy_tasks = { "Google": fetch_google_analogy(), "MSR": fetch_msr_analogy() } analogy_results = {} for name, data in iteritems(analogy_tasks): analogy_results[name] = evaluate_analogy(w, data.X, data.y) print("Analogy prediction accuracy on {} {}".format(name, analogy_results[name])) analogy_results["SemEval2012_2"] = calAnswersonSemEval(w)['all'] print("Analogy prediction accuracy on {} {}".format("SemEval2012", analogy_results["SemEval2012_2"])) analogy = pd.DataFrame([analogy_results]) sim = pd.DataFrame([similarity_results]) results = sim.join(analogy) return results
plt.style.use('ggplot') np.random.seed(0) # Configure logging logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') # SETUP datasets = [('SL999', fetch_SimLex999()), ('SL333', fetch_SimLex999(which='333')), ('SV3500-t', fetch_SimVerb3500(which='test')), ('WS353', fetch_WS353()), #('WS353R', fetch_WS353(which='relatedness')), #('RW', fetch_RW()), ('MEN-t', fetch_MEN(which='test')), ('SCWS', fetch_SCWS()), ('MTurk', fetch_MTurk())] logging.info(fuel.config.data_path) vocab_defs_fname = os.path.join(fuel.config.data_path[0], "vocab.txt") logging.info("using vocab for definition {}".format(vocab_defs_fname)) # END SETUP parser = argparse.ArgumentParser("Evaluate embeddings") parser.add_argument("emb_filename", help="Location of embeddings") parser.add_argument("emb_format", help="either 'glove', 'dict' or 'dict_poly'") parser.add_argument("root_dicts", help="dirname that contains all.json and test.json") parser.add_argument("--lowercase", action='store_true') parser.add_argument("--normalize", action='store_true') parser.add_argument("--vocab_size", default=None, type=int, help="vocab size (GloVe only)")
def test_MTurk_fetcher(): data = fetch_MTurk() assert (len(data.y) == len(data.X) == 287) assert (10.0 >= data.y.max() >= 9)
'CBOW': 'fetch_CBOW()' } #w_glove = fetch_GloVe(corpus="wiki-6B", dim=300) #w_PDC = fetch_PDC() #w_HDC = fetch_HDC() #w_LexVec = fetch_LexVec(which="wikipedia+newscrawl-W") #w_conceptnet_numberbatch = fetch_conceptnet_numberbatch() #w_w2v = fetch_SG_GoogleNews() #w_fastText = fetch_FastText() #load_embedding(path, format='word2vec', normalize=True, lower=False, clean_words=False) # Define tasks tasks = { "MTurk": fetch_MTurk(), "MEN": fetch_MEN(), "WS353": fetch_WS353(), "RG65":fetch_RG65(), "RW":fetch_RW(), "SIMLEX999": fetch_SimLex999(), "TR9856":fetch_TR9856() } result = np.zeros((7,7)) # Print sample data #for name, data in iteritems(tasks): # print("Sample data from {}: pair \"{}\" and \"{}\" is assigned score {}".format(name, data.X[0][0], data.X[0][1], data.y[0])) # Calculate results using helper function i = 0