def main(args): args.eval_filter = not args.no_eval_filter if args.neg_deg_sample: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format) args.pickle_graph = False args.train = False args.valid = False args.test = True args.batch_size_eval = args.batch_size logger = get_logger(args) # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. eval_dataset = EvalDataset(dataset, args) args.neg_sample_size_test = args.neg_sample_size args.neg_deg_sample_eval = args.neg_deg_sample if args.neg_sample_size < 0: args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes( ) if args.neg_chunk_size < 0: args.neg_chunk_size = args.neg_sample_size num_workers = args.num_worker # for multiprocessing evaluation, we don't need to sample multiple batches at a time # in each process. if args.num_proc > 1: num_workers = 1 if args.num_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, args.neg_chunk_size, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=i, ranks=args.num_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, args.neg_chunk_size, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=i, ranks=args.num_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, args.neg_chunk_size, args.eval_filter, mode='chunk-head', num_workers=num_workers, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, args.neg_chunk_size, args.eval_filter, mode='chunk-tail', num_workers=num_workers, rank=0, ranks=1) # load model n_entities = dataset.n_entities n_relations = dataset.n_relations ckpt_path = args.model_path model = load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path) if args.num_proc > 1: model.share_memory() # test args.step = 0 args.max_step = 0 start = time.time() if args.num_proc > 1: queue = mp.Queue(args.num_proc) procs = [] for i in range(args.num_proc): proc = mp.Process(target=test, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ], 'Test', queue)) procs.append(proc) proc.start() for proc in procs: proc.join() total_metrics = {} for i in range(args.num_proc): metrics = queue.get() for k, v in metrics.items(): if i == 0: total_metrics[k] = v / args.num_proc else: total_metrics[k] += v / args.num_proc for k, v in metrics.items(): print('Test average {} at [{}/{}]: {}'.format( k, args.step, args.max_step, v)) else: test(args, model, [test_sampler_head, test_sampler_tail]) print('Test takes {:.3f} seconds'.format(time.time() - start))
def main(args): # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format) args.pickle_graph = False args.train = False args.valid = False args.test = True args.batch_size_eval = args.batch_size logger = get_logger(args) # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. eval_dataset = EvalDataset(dataset, args) args.neg_sample_size_test = args.neg_sample_size if args.neg_sample_size < 0: args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes( ) if args.num_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, mode='PBG-head', num_workers=args.num_worker, rank=i, ranks=args.num_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, mode='PBG-tail', num_workers=args.num_worker, rank=i, ranks=args.num_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, mode='PBG-head', num_workers=args.num_worker, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size, args.neg_sample_size, mode='PBG-tail', num_workers=args.num_worker, rank=0, ranks=1) # load model n_entities = dataset.n_entities n_relations = dataset.n_relations ckpt_path = args.model_path model = load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path) if args.num_proc > 1: model.share_memory() # test args.step = 0 args.max_step = 0 if args.num_proc > 1: procs = [] for i in range(args.num_proc): proc = mp.Process(target=test, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ])) procs.append(proc) proc.start() for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail])
def main(args): args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files) args.pickle_graph = False args.train = False args.valid = False args.test = True args.strict_rel_part = False args.soft_rel_part = False args.async_update = False logger = get_logger(args) # Here we want to use the regualr negative sampler because we need to ensure that # all positive edges are excluded. eval_dataset = EvalDataset(dataset, args) if args.neg_sample_size_eval < 0: args.neg_sample_size_eval = args.neg_sample_size = eval_dataset.g.number_of_nodes( ) args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval) args.num_workers = 8 # fix num_workers to 8 if args.num_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=i, ranks=args.num_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=i, ranks=args.num_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=0, ranks=1) # load model n_entities = dataset.n_entities n_relations = dataset.n_relations ckpt_path = args.model_path model = load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path) if args.num_proc > 1: model.share_memory() # test args.step = 0 args.max_step = 0 start = time.time() if args.num_proc > 1: queue = mp.Queue(args.num_proc) procs = [] for i in range(args.num_proc): proc = mp.Process(target=test_mp, args=(args, model, [ test_sampler_heads[i], test_sampler_tails[i] ], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} metrics = {} logs = [] for i in range(args.num_proc): log = queue.get() logs = logs + log for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) for k, v in metrics.items(): print('Test average {} at [{}/{}]: {}'.format( k, args.step, args.max_step, v)) for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail]) print('Test takes {:.3f} seconds'.format(time.time() - start))