def main(words_path, vectors_path, weight_path, fpc_name, test_name): # Loading preprocessed words, vectors and weight4ind files. print("loading words file...") words = pickle.load(open(words_path, 'rb')) print("loading vectors file...") vectors = pickle.load(open(vectors_path, 'rb')) print("loading weight4ind file...") weight4ind = pickle.load(open(weight_path, 'rb')) rmpc = 1 params = params.params() params.rmpc = rmpc fpc_file = fpc_name test_dataset = test_name print("calculating sentence similarity scores, use fpc file: {}.".format(fpc_file)) pearson, mse = eval.sim_evaluate_one(vectors, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params, fpc_file, test_dataset)
print("loading words file...") words = pickle.load(open(words_path, 'rb')) print("loading vectors file...") vectors = pickle.load(open(vectors_path, 'rb')) print("loading weight4ind file...") weight4ind = pickle.load(open(weight_path, 'rb')) # Using a list of datasets to generate the corresponding fpc files. dataset_dir = "../data/" dataset_list = [f for f in listdir(dataset_dir) if isfile(join(dataset_dir, f))] for dataset_file in dataset_list: print("preparing the first principle component based on {}.".format(str(dataset_file))) eval.prepare_first_pc(vectors, words, weight4ind, sim_algo.get_first_pc, params, dataset_file) test_dataset = 'sicktest' # name of the test dataset pearson_list = [] mse_list = [] index = [fpc for fpc in fpc_list] # Using a list of fpc files to evaluate on datasets. fpc_dir = "../first_principle_component/" fpc_list = [f for f in listdir(fpc_dir) if isfile(join(fpc_dir, f))] for fpc_file in fpc_list: print("calculating sentence similarity scores, use fpc file: {}.".format(fpc_file)) pearson, mse = eval.sim_evaluate_one(vectors, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params, fpc_file, test_dataset) pearson_list.append(pearson) mse_list.append(mse) if __name__ == '__main__': plac.call(main)
parr4para = {} sarr4para = {} for wordfile in wordfiles: (words, We) = data_io.getWordmap(wordfile) for weightpara in weightparas: word2weight = data_io.getWordWeight(weightfile, weightpara) weight4ind = data_io.getWeight(words, word2weight) for rmpc in rmpcs: print('word vectors loaded from %s' % wordfile) print('word weights computed from %s using parameter a=%f' % (weightfile, weightpara)) params.rmpc = rmpc print('remove the first %d principal components' % rmpc) ## eval just one example dataset parr, sarr = eval.sim_evaluate_one( We, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params) ## eval all datasets; need to obtained datasets from John Wieting (https://github.com/jwieting/iclr2016) # parr, sarr = eval.sim_evaluate_all(We, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params) paras = (wordfile, weightfile, weightpara, rmpc) parr4para[paras] = parr sarr4para[paras] = sarr ## save results save_result = False #True result_file = 'result/sim_sif.result' comment4para = [ # need to align with the following loop ['word vector files', wordfiles], # comments and values, ['weight parameters', weightparas], ['remove principal component or not', rmpcs] ]