T1 = 0.01 T2 = 0.001 if __name__ == '__main__': save_path = sys.argv[-1] shutil.copyfile('settings.py', '%s/settings.txt' % save_path) print("Preparing data...") # load #lhs, rel, rhs = batch.load_labeled_entities(io.open(sys.argv[1],'r')) #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open(sys.argv[2],'r')) #lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt")) #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt")) lhs, rel, rhs = batch.load_labeled_entities( io.open("../data/yago-sparse-entity-train.txt")) lhs_v, rel_v, rhs_v = batch.load_labeled_entities( io.open("../data/yago-sparse-entity-valid.txt")) lhs_s, rel_s, rhs_s = batch.load_labeled_entities( io.open("../data/yago-sparse-entity-test.txt")) # left hand side dictionaries, both character and entity chardict, charcount = batch.build_char_dictionary(lhs) n_char = len(chardict.keys()) + 1 batch.save_dictionary(chardict, charcount, '%s/dict.pkl' % save_path) lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) n_lhs = len(lhs_dict.keys()) batch.save_dictionary(lhs_dict, lhs_count, '%s/lhs_dict.pkl' % save_path) # build dictionary for relations
T1 = 0.01 T2 = 0.001 if __name__=='__main__': save_path = sys.argv[-1] shutil.copyfile('settings.py','%s/settings.txt'%save_path) print("Preparing data...") # load #lhs, rel, rhs = batch.load_labeled_entities(io.open(sys.argv[1],'r')) #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open(sys.argv[2],'r')) #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-freq-test.txt")) #lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) # sparse 2 is by different train,valid,test ratio #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt")) #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt")) lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-train.txt")) # sparse 2 is by different train,valid,test ratio lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-valid.txt")) lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-test.txt")) # left hand side dictionaries, both character and entity chardict, charcount = batch.build_char_dictionary(lhs) n_char = len(chardict.keys()) + 1 batch.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path) lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) n_lhs = len(lhs_dict.keys()) batch.save_dictionary(lhs_dict,lhs_count,'%s/lhs_dict.pkl' % save_path) # build dictionary for relations rel_dict, rel_count = batch.build_entity_dictionary(rel) batch.save_dictionary(rel_dict, rel_count, '%s/rel_dict.pkl' % save_path)
import time import cPickle as pkl import io from collections import OrderedDict import batch from settings import N_BATCH, N_EPOCH, DISPF, SAVEF, VALF, WDIM from model_nn import charLM # TODO change model from model_nn import load_params_shared # TODO change model model = "_nn" # or "" or "_nn" where nn is hybrid # TODO change model if __name__ == "__main__": max_freq = sys.argv[1] lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) # sparse 2 is by different train,valid,test ratio chardict, charcount = batch.build_char_dictionary(lhs) n_char = len(chardict.keys()) + 1 lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) n_lhs = len(lhs_dict.keys()) rel_dict, rel_count = batch.build_entity_dictionary(rel) n_rel = len(rel_dict.keys()) rhs_dict, rhs_count = batch.build_entity_dictionary(rhs) n_rhs = len(rhs_dict.keys()) lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq))) test_iter = batch.Batch(lhs_s, rel_s, rhs_s, batch_size=N_BATCH) m = charLM(n_char, n_lhs + 1, n_rel, n_rhs) # emb_dim = WDIM by default m.param = load_params_shared("temp{}/best_model.npz".format(model))
import time import cPickle as pkl import io from collections import OrderedDict import batch from settings import N_BATCH, N_EPOCH, DISPF, SAVEF, VALF, WDIM from model_transe import charLM # TODO change model from model_transe import load_params_shared # TODO change model model = "_tr" # or "" or "_nn" where nn is hybrid # TODO change model if __name__ == "__main__": max_freq = int(sys.argv[1]) lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) # sparse 2 is by different train,valid,test ratio chardict, charcount = batch.build_char_dictionary(lhs) n_char = len(chardict.keys()) + 1 lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) n_lhs = len(lhs_dict.keys()) rel_dict, rel_count = batch.build_entity_dictionary(rel) n_rel = len(rel_dict.keys()) rhs_dict, rhs_count = batch.build_entity_dictionary(rhs) n_rhs = len(rhs_dict.keys()) lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq))) test_iter = batch.Batch(lhs_s, rel_s, rhs_s, batch_size=N_BATCH) m = charLM(n_char, n_lhs + 1, n_rel, n_rhs) # emb_dim = WDIM by default m.param = load_params_shared("temp{}/best_model.npz".format(model))
T1 = 0.01 T2 = 0.001 if __name__=='__main__': save_path = sys.argv[-1] shutil.copyfile('settings.py','%s/settings.txt'%save_path) print("Preparing data...") # load #lhs, rel, rhs = batch.load_labeled_entities(io.open(sys.argv[1],'r')) #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open(sys.argv[2],'r')) #lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) #lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt")) #lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt")) lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-train.txt")) lhs_v, rel_v, rhs_v = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-valid.txt")) lhs_s, rel_s, rhs_s = batch.load_labeled_entities(io.open("../data/yago-sparse-entity-test.txt")) # left hand side dictionaries, both character and entity chardict, charcount = batch.build_char_dictionary(lhs) n_char = len(chardict.keys()) + 1 batch.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path) lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) n_lhs = len(lhs_dict.keys()) batch.save_dictionary(lhs_dict,lhs_count,'%s/lhs_dict.pkl' % save_path) # build dictionary for relations rel_dict, rel_count = batch.build_entity_dictionary(rel) batch.save_dictionary(rel_dict, rel_count, '%s/rel_dict.pkl' % save_path)
# input: training set, validation set, test set # output: part of test set such that each left hand side entity appear no more # than FREQ times in the training set. import sys import io import numpy as np import batch # split the data into training set, validation set and testing set if __name__ == "__main__": max_freq = 2 # selecting lhs that appear <= max_freq times in the training set lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-train.txt")) lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-test.txt")) buf = [] for i in range(len(lhs)): if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq: buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i])) lhs, rel, rhs = batch.load_labeled_entities(io.open("../data/prescription-sparse2-valid.txt")) for i in range(len(lhs)): if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq: buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i])) with open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq), "w") as f_out: f_out.writelines(buf)
# input: training set, validation set, test set # output: part of test set such that each left hand side entity appear no more # than FREQ times in the training set. import sys import io import numpy as np import batch # split the data into training set, validation set and testing set if __name__ == "__main__": max_freq = 2 # selecting lhs that appear <= max_freq times in the training set lhs, rel, rhs = batch.load_labeled_entities( io.open("../data/prescription-sparse2-train.txt")) lhs_dict, lhs_count = batch.build_entity_dictionary(lhs) lhs, rel, rhs = batch.load_labeled_entities( io.open("../data/prescription-sparse2-test.txt")) buf = [] for i in range(len(lhs)): if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq: buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i])) lhs, rel, rhs = batch.load_labeled_entities( io.open("../data/prescription-sparse2-valid.txt")) for i in range(len(lhs)): if lhs[i] not in lhs_count or lhs_count[lhs[i]] <= max_freq: buf.append("{}\t{}\t{}\n".format(lhs[i], rel[i], rhs[i])) with open("../data/prescription-sparse2-rare-{}-test.txt".format(max_freq), "w") as f_out: