예제 #1
0
def load_model(checkpoint_file):
	assert(checkpoint_file is not None)
	checkpoint = th.load(checkpoint_file)
	tsv_file = checkpoint['dataset']
	idx, objects, enames = slurp(tsv_file)
	dim = checkpoint['dim']
	distfn = checkpoint['distfn']
	opt_temp = Namespace()
	opt_temp.dim = dim
	opt_temp.distfn = distfn
	opt_temp.negs = 50 #doesn't matter
	opt_temp.dset = 'test.tsv' #doesn't matter
	model, data, model_name, _ = model_class.SNGraphDataset.initialize(distfn, opt_temp, idx, objects, enames)
	model.load_state_dict(checkpoint['model'])
	return model
예제 #2
0
def build_graph(dataset, directed=False):
	if directed:
		G = nx.DiGraph()
	else:
		G = nx.Graph()
	idx, objects, enames = slurp(dataset)
	enames_inv = dict()
	for k, v in enames.items():
		enames_inv[v] = k

	idx = idx.numpy()
	idx = idx[:, :2]
	for r in range(idx.shape[0]):
		row = idx[r, :]
		G.add_edge(row[1], row[0])
	return G, enames_inv, dict(enames)
예제 #3
0
    parser.add_argument('-negs', help='Number of negatives', type=int, default=20)
    parser.add_argument('-nproc', help='Number of processes', type=int, default=5)
    parser.add_argument('-ndproc', help='Number of data loading processes', type=int, default=2)
    parser.add_argument('-eval_each', help='Run evaluation each n-th epoch', type=int, default=10)
    parser.add_argument('-burnin', help='Duration of burn in', type=int, default=20)
    parser.add_argument('-debug', help='Print debug output', action='store_true', default=False)
    opt = parser.parse_args()

    th.set_default_tensor_type('torch.DoubleTensor')
    if opt.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log = logging.getLogger('poincare-nips17')
    logging.basicConfig(level=log_level, format='%(message)s')
    idx, objects = slurp(opt.dset)

    # create adjacency list for evaluation
    adjacency = ddict(set)
    for i in range(len(idx)):
        s, o, _ = idx[i]
        adjacency[s].add(o)
    adjacency = dict(adjacency)

    # setup Riemannian gradients for distances
    opt.retraction = rsgd.euclidean_retraction
    if opt.distfn == 'poincare':
        distfn = model.PoincareDistance
        opt.rgrad = rsgd.poincare_grad
    elif opt.distf == 'euclidean':
        distfn = model.EuclideanDistance
예제 #4
0
    parser.add_argument('-nproc', help='Number of processes', type=int, default=5)
    parser.add_argument('-ndproc', help='Number of data loading processes', type=int, default=2)
    parser.add_argument('-eval_each', help='Run evaluation each n-th epoch', type=int, default=10)
    parser.add_argument('-burnin', help='Duration of burn in', type=int, default=20)
    parser.add_argument('-debug', help='Print debug output', action='store_true', default=False)
    opt = parser.parse_args()

    th.set_default_tensor_type('torch.FloatTensor')
    if opt.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log = logging.getLogger('poincare-nips17')
    logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout)
    if opt.dset[-4:] == '.tsv':
        idx, objects = slurp(opt.dset) #, fparse=parse_space)
    elif opt.dset[-2:] == '.p':
        idx, objects = slurp_pickled_nx(opt.dset, opt.f)


    # create adjacency list for evaluation, key is node id and values are adjacent nodes ids
    adjacency = ddict(set)
    for i in range(len(idx)):
        s, o, _ = idx[i]
        adjacency[s].add(o)
    adjacency = dict(adjacency)

    # setup Riemannian gradients for distances
    opt.retraction = rsgd.euclidean_retraction
    if opt.distfn == 'poincare':
        distfn = model.PoincareDistance
import pickle

val_filename = './package_renamed_wo_clique/functions_04182018_val'
duplicate_file = './package_renamed_wo_clique/functions_04182018_duplicate_train'
train_dset = './package_renamed_wo_clique/functions_04182018_train.tsv'

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Eval Poincare Embeddings')
    #parser.add_argument('-dir', help='directory', type=str)
    #parser.add_argument('-max_epoch', help='Maximum epoch', type=int)
    #parser.add_argument('-interval', help='Interval to evaluate', type=int)
    opt = parser.parse_args()
    opt.dir = '/lfs/hyperion/0/thaonguyen/poincare_embeddings/trained_model_0513/'
    opt.max_epoch = 575
    opt.interval = 25
    idx, _, _ = slurp(train_dset)
    G_train, enames_inv_train, enames_train = build_graph(train_dset)
    ecount = count()
    enames_val = defaultdict(ecount.__next__)
    enames_inv_val = dict()
    with open(val_filename, 'r') as fval:
        for line in fval:
            last_token = output_last_token(line.strip(), duplicate_file)
            if last_token not in enames_train:
                print("NOT FOUND IN TRAIN:", last_token)
                continue
            else:
                enames_inv_val[enames_val[last_token]] = last_token

    enames_val = dict(enames_val)
    #print(len(enames_val.values()), min(enames_val.values()), max(enames_val.values()))
예제 #6
0
    print("reading edgelist: {}".format(training_edgelist))

    th.set_default_tensor_type('torch.FloatTensor')
    # th.manual_seed(opt.seed)
    # th.set_default_tensor_type('torch.cuda.FloatTensor')

    if opt.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log = logging.getLogger('poincare-nips17')
    logging.basicConfig(level=log_level,
                        format='%(message)s',
                        stream=sys.stdout)
    idx, objects = slurp(training_edgelist, symmetrize=True, fparse=parse_tsv)

    # create adjacency list for evaluation
    adjacency = ddict(set)
    for i in range(len(idx)):
        s, o, _ = idx[i]
        adjacency[s].add(o)
    adjacency = dict(adjacency)

    # setup Riemannian gradients for distances
    opt.retraction = rsgd.euclidean_retraction
    if opt.distfn == 'poincare':
        distfn = model.PoincareDistance
        opt.rgrad = rsgd.poincare_grad
    elif opt.distfn == 'euclidean':
        distfn = model.EuclideanDistance
예제 #7
0
    parser.add_argument('-nproc', help='Number of processes', type=int, default=5)
    parser.add_argument('-ndproc', help='Number of data loading processes', type=int, default=2)
    parser.add_argument('-eval_each', help='Run evaluation each n-th epoch', type=int, default=10)
    parser.add_argument('-burnin', help='Duration of burn in', type=int, default=20)
    #parser.add_argument('-debug', help='Print debug output', action='store_true', default=False)
    opt = parser.parse_args()
    opt.fout = opt.dirout + opt.fout

    th.set_default_tensor_type('torch.FloatTensor')
    # if opt.debug:
    #     log_level = logging.DEBUG
    # else:
    #     log_level = logging.INFO
    # log = logging.getLogger('poincare-nips17')
    # logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout)
    idx, objects, enames_train = slurp(opt.dset)
    with open(opt.fout, 'w') as fout:
        fout.write('Trained on dataset:' + opt.dset + '\n')

    # create adjacency list for evaluation
    adjacency = ddict(dict)
    for i in range(len(idx)):
        s, o, w = idx[i]
        adjacency[s][o] = w
    adjacency = dict(adjacency)

    # setup Riemannian gradients for distances
    opt.retraction = rsgd.euclidean_retraction
    if opt.distfn == 'poincare':
        distfn = model.PoincareDistance
        opt.rgrad = rsgd.poincare_grad