def load_model(checkpoint_file): assert(checkpoint_file is not None) checkpoint = th.load(checkpoint_file) tsv_file = checkpoint['dataset'] idx, objects, enames = slurp(tsv_file) dim = checkpoint['dim'] distfn = checkpoint['distfn'] opt_temp = Namespace() opt_temp.dim = dim opt_temp.distfn = distfn opt_temp.negs = 50 #doesn't matter opt_temp.dset = 'test.tsv' #doesn't matter model, data, model_name, _ = model_class.SNGraphDataset.initialize(distfn, opt_temp, idx, objects, enames) model.load_state_dict(checkpoint['model']) return model
def build_graph(dataset, directed=False): if directed: G = nx.DiGraph() else: G = nx.Graph() idx, objects, enames = slurp(dataset) enames_inv = dict() for k, v in enames.items(): enames_inv[v] = k idx = idx.numpy() idx = idx[:, :2] for r in range(idx.shape[0]): row = idx[r, :] G.add_edge(row[1], row[0]) return G, enames_inv, dict(enames)
parser.add_argument('-negs', help='Number of negatives', type=int, default=20) parser.add_argument('-nproc', help='Number of processes', type=int, default=5) parser.add_argument('-ndproc', help='Number of data loading processes', type=int, default=2) parser.add_argument('-eval_each', help='Run evaluation each n-th epoch', type=int, default=10) parser.add_argument('-burnin', help='Duration of burn in', type=int, default=20) parser.add_argument('-debug', help='Print debug output', action='store_true', default=False) opt = parser.parse_args() th.set_default_tensor_type('torch.DoubleTensor') if opt.debug: log_level = logging.DEBUG else: log_level = logging.INFO log = logging.getLogger('poincare-nips17') logging.basicConfig(level=log_level, format='%(message)s') idx, objects = slurp(opt.dset) # create adjacency list for evaluation adjacency = ddict(set) for i in range(len(idx)): s, o, _ = idx[i] adjacency[s].add(o) adjacency = dict(adjacency) # setup Riemannian gradients for distances opt.retraction = rsgd.euclidean_retraction if opt.distfn == 'poincare': distfn = model.PoincareDistance opt.rgrad = rsgd.poincare_grad elif opt.distf == 'euclidean': distfn = model.EuclideanDistance
parser.add_argument('-nproc', help='Number of processes', type=int, default=5) parser.add_argument('-ndproc', help='Number of data loading processes', type=int, default=2) parser.add_argument('-eval_each', help='Run evaluation each n-th epoch', type=int, default=10) parser.add_argument('-burnin', help='Duration of burn in', type=int, default=20) parser.add_argument('-debug', help='Print debug output', action='store_true', default=False) opt = parser.parse_args() th.set_default_tensor_type('torch.FloatTensor') if opt.debug: log_level = logging.DEBUG else: log_level = logging.INFO log = logging.getLogger('poincare-nips17') logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout) if opt.dset[-4:] == '.tsv': idx, objects = slurp(opt.dset) #, fparse=parse_space) elif opt.dset[-2:] == '.p': idx, objects = slurp_pickled_nx(opt.dset, opt.f) # create adjacency list for evaluation, key is node id and values are adjacent nodes ids adjacency = ddict(set) for i in range(len(idx)): s, o, _ = idx[i] adjacency[s].add(o) adjacency = dict(adjacency) # setup Riemannian gradients for distances opt.retraction = rsgd.euclidean_retraction if opt.distfn == 'poincare': distfn = model.PoincareDistance
import pickle val_filename = './package_renamed_wo_clique/functions_04182018_val' duplicate_file = './package_renamed_wo_clique/functions_04182018_duplicate_train' train_dset = './package_renamed_wo_clique/functions_04182018_train.tsv' if __name__ == '__main__': parser = argparse.ArgumentParser(description='Eval Poincare Embeddings') #parser.add_argument('-dir', help='directory', type=str) #parser.add_argument('-max_epoch', help='Maximum epoch', type=int) #parser.add_argument('-interval', help='Interval to evaluate', type=int) opt = parser.parse_args() opt.dir = '/lfs/hyperion/0/thaonguyen/poincare_embeddings/trained_model_0513/' opt.max_epoch = 575 opt.interval = 25 idx, _, _ = slurp(train_dset) G_train, enames_inv_train, enames_train = build_graph(train_dset) ecount = count() enames_val = defaultdict(ecount.__next__) enames_inv_val = dict() with open(val_filename, 'r') as fval: for line in fval: last_token = output_last_token(line.strip(), duplicate_file) if last_token not in enames_train: print("NOT FOUND IN TRAIN:", last_token) continue else: enames_inv_val[enames_val[last_token]] = last_token enames_val = dict(enames_val) #print(len(enames_val.values()), min(enames_val.values()), max(enames_val.values()))
print("reading edgelist: {}".format(training_edgelist)) th.set_default_tensor_type('torch.FloatTensor') # th.manual_seed(opt.seed) # th.set_default_tensor_type('torch.cuda.FloatTensor') if opt.debug: log_level = logging.DEBUG else: log_level = logging.INFO log = logging.getLogger('poincare-nips17') logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout) idx, objects = slurp(training_edgelist, symmetrize=True, fparse=parse_tsv) # create adjacency list for evaluation adjacency = ddict(set) for i in range(len(idx)): s, o, _ = idx[i] adjacency[s].add(o) adjacency = dict(adjacency) # setup Riemannian gradients for distances opt.retraction = rsgd.euclidean_retraction if opt.distfn == 'poincare': distfn = model.PoincareDistance opt.rgrad = rsgd.poincare_grad elif opt.distfn == 'euclidean': distfn = model.EuclideanDistance
parser.add_argument('-nproc', help='Number of processes', type=int, default=5) parser.add_argument('-ndproc', help='Number of data loading processes', type=int, default=2) parser.add_argument('-eval_each', help='Run evaluation each n-th epoch', type=int, default=10) parser.add_argument('-burnin', help='Duration of burn in', type=int, default=20) #parser.add_argument('-debug', help='Print debug output', action='store_true', default=False) opt = parser.parse_args() opt.fout = opt.dirout + opt.fout th.set_default_tensor_type('torch.FloatTensor') # if opt.debug: # log_level = logging.DEBUG # else: # log_level = logging.INFO # log = logging.getLogger('poincare-nips17') # logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout) idx, objects, enames_train = slurp(opt.dset) with open(opt.fout, 'w') as fout: fout.write('Trained on dataset:' + opt.dset + '\n') # create adjacency list for evaluation adjacency = ddict(dict) for i in range(len(idx)): s, o, w = idx[i] adjacency[s][o] = w adjacency = dict(adjacency) # setup Riemannian gradients for distances opt.retraction = rsgd.euclidean_retraction if opt.distfn == 'poincare': distfn = model.PoincareDistance opt.rgrad = rsgd.poincare_grad