Exemple #1
0
def main(args):
    args.eval_filter = not args.no_eval_filter
    if args.neg_deg_sample:
        assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."

    # load dataset and samplers
    dataset = get_dataset(args.data_path, args.dataset, args.format)
    args.pickle_graph = False
    args.train = False
    args.valid = False
    args.test = True
    args.batch_size_eval = args.batch_size

    logger = get_logger(args)
    # Here we want to use the regualr negative sampler because we need to ensure that
    # all positive edges are excluded.
    eval_dataset = EvalDataset(dataset, args)

    args.neg_sample_size_test = args.neg_sample_size
    args.neg_deg_sample_eval = args.neg_deg_sample
    if args.neg_sample_size < 0:
        args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes(
        )
    if args.neg_chunk_size < 0:
        args.neg_chunk_size = args.neg_sample_size

    num_workers = args.num_worker
    # for multiprocessing evaluation, we don't need to sample multiple batches at a time
    # in each process.
    if args.num_proc > 1:
        num_workers = 1
    if args.num_proc > 1:
        test_sampler_tails = []
        test_sampler_heads = []
        for i in range(args.num_proc):
            test_sampler_head = eval_dataset.create_sampler(
                'test',
                args.batch_size,
                args.neg_sample_size,
                args.neg_chunk_size,
                args.eval_filter,
                mode='chunk-head',
                num_workers=num_workers,
                rank=i,
                ranks=args.num_proc)
            test_sampler_tail = eval_dataset.create_sampler(
                'test',
                args.batch_size,
                args.neg_sample_size,
                args.neg_chunk_size,
                args.eval_filter,
                mode='chunk-tail',
                num_workers=num_workers,
                rank=i,
                ranks=args.num_proc)
            test_sampler_heads.append(test_sampler_head)
            test_sampler_tails.append(test_sampler_tail)
    else:
        test_sampler_head = eval_dataset.create_sampler(
            'test',
            args.batch_size,
            args.neg_sample_size,
            args.neg_chunk_size,
            args.eval_filter,
            mode='chunk-head',
            num_workers=num_workers,
            rank=0,
            ranks=1)
        test_sampler_tail = eval_dataset.create_sampler(
            'test',
            args.batch_size,
            args.neg_sample_size,
            args.neg_chunk_size,
            args.eval_filter,
            mode='chunk-tail',
            num_workers=num_workers,
            rank=0,
            ranks=1)

    # load model
    n_entities = dataset.n_entities
    n_relations = dataset.n_relations
    ckpt_path = args.model_path
    model = load_model_from_checkpoint(logger, args, n_entities, n_relations,
                                       ckpt_path)

    if args.num_proc > 1:
        model.share_memory()
    # test
    args.step = 0
    args.max_step = 0
    start = time.time()
    if args.num_proc > 1:
        queue = mp.Queue(args.num_proc)
        procs = []
        for i in range(args.num_proc):
            proc = mp.Process(target=test,
                              args=(args, model, [
                                  test_sampler_heads[i], test_sampler_tails[i]
                              ], 'Test', queue))
            procs.append(proc)
            proc.start()
        for proc in procs:
            proc.join()

        total_metrics = {}
        for i in range(args.num_proc):
            metrics = queue.get()
            for k, v in metrics.items():
                if i == 0:
                    total_metrics[k] = v / args.num_proc
                else:
                    total_metrics[k] += v / args.num_proc
        for k, v in metrics.items():
            print('Test average {} at [{}/{}]: {}'.format(
                k, args.step, args.max_step, v))
    else:
        test(args, model, [test_sampler_head, test_sampler_tail])
    print('Test takes {:.3f} seconds'.format(time.time() - start))
Exemple #2
0
def main(args):
    # load dataset and samplers
    dataset = get_dataset(args.data_path, args.dataset, args.format)
    args.pickle_graph = False
    args.train = False
    args.valid = False
    args.test = True
    args.batch_size_eval = args.batch_size

    logger = get_logger(args)
    # Here we want to use the regualr negative sampler because we need to ensure that
    # all positive edges are excluded.
    eval_dataset = EvalDataset(dataset, args)
    args.neg_sample_size_test = args.neg_sample_size
    if args.neg_sample_size < 0:
        args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes(
        )
    if args.num_proc > 1:
        test_sampler_tails = []
        test_sampler_heads = []
        for i in range(args.num_proc):
            test_sampler_head = eval_dataset.create_sampler(
                'test',
                args.batch_size,
                args.neg_sample_size,
                mode='PBG-head',
                num_workers=args.num_worker,
                rank=i,
                ranks=args.num_proc)
            test_sampler_tail = eval_dataset.create_sampler(
                'test',
                args.batch_size,
                args.neg_sample_size,
                mode='PBG-tail',
                num_workers=args.num_worker,
                rank=i,
                ranks=args.num_proc)
            test_sampler_heads.append(test_sampler_head)
            test_sampler_tails.append(test_sampler_tail)
    else:
        test_sampler_head = eval_dataset.create_sampler(
            'test',
            args.batch_size,
            args.neg_sample_size,
            mode='PBG-head',
            num_workers=args.num_worker,
            rank=0,
            ranks=1)
        test_sampler_tail = eval_dataset.create_sampler(
            'test',
            args.batch_size,
            args.neg_sample_size,
            mode='PBG-tail',
            num_workers=args.num_worker,
            rank=0,
            ranks=1)

    # load model
    n_entities = dataset.n_entities
    n_relations = dataset.n_relations
    ckpt_path = args.model_path
    model = load_model_from_checkpoint(logger, args, n_entities, n_relations,
                                       ckpt_path)

    if args.num_proc > 1:
        model.share_memory()
    # test
    args.step = 0
    args.max_step = 0
    if args.num_proc > 1:
        procs = []
        for i in range(args.num_proc):
            proc = mp.Process(target=test,
                              args=(args, model, [
                                  test_sampler_heads[i], test_sampler_tails[i]
                              ]))
            procs.append(proc)
            proc.start()
        for proc in procs:
            proc.join()
    else:
        test(args, model, [test_sampler_head, test_sampler_tail])
Exemple #3
0
def main(args):
    args.eval_filter = not args.no_eval_filter
    if args.neg_deg_sample_eval:
        assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."

    # load dataset and samplers
    dataset = get_dataset(args.data_path, args.dataset, args.format,
                          args.data_files)
    args.pickle_graph = False
    args.train = False
    args.valid = False
    args.test = True
    args.strict_rel_part = False
    args.soft_rel_part = False
    args.async_update = False

    logger = get_logger(args)
    # Here we want to use the regualr negative sampler because we need to ensure that
    # all positive edges are excluded.
    eval_dataset = EvalDataset(dataset, args)

    if args.neg_sample_size_eval < 0:
        args.neg_sample_size_eval = args.neg_sample_size = eval_dataset.g.number_of_nodes(
        )
    args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval,
                                                     args.neg_sample_size_eval)

    args.num_workers = 8  # fix num_workers to 8
    if args.num_proc > 1:
        test_sampler_tails = []
        test_sampler_heads = []
        for i in range(args.num_proc):
            test_sampler_head = eval_dataset.create_sampler(
                'test',
                args.batch_size_eval,
                args.neg_sample_size_eval,
                args.neg_sample_size_eval,
                args.eval_filter,
                mode='chunk-head',
                num_workers=args.num_workers,
                rank=i,
                ranks=args.num_proc)
            test_sampler_tail = eval_dataset.create_sampler(
                'test',
                args.batch_size_eval,
                args.neg_sample_size_eval,
                args.neg_sample_size_eval,
                args.eval_filter,
                mode='chunk-tail',
                num_workers=args.num_workers,
                rank=i,
                ranks=args.num_proc)
            test_sampler_heads.append(test_sampler_head)
            test_sampler_tails.append(test_sampler_tail)
    else:
        test_sampler_head = eval_dataset.create_sampler(
            'test',
            args.batch_size_eval,
            args.neg_sample_size_eval,
            args.neg_sample_size_eval,
            args.eval_filter,
            mode='chunk-head',
            num_workers=args.num_workers,
            rank=0,
            ranks=1)
        test_sampler_tail = eval_dataset.create_sampler(
            'test',
            args.batch_size_eval,
            args.neg_sample_size_eval,
            args.neg_sample_size_eval,
            args.eval_filter,
            mode='chunk-tail',
            num_workers=args.num_workers,
            rank=0,
            ranks=1)

    # load model
    n_entities = dataset.n_entities
    n_relations = dataset.n_relations
    ckpt_path = args.model_path
    model = load_model_from_checkpoint(logger, args, n_entities, n_relations,
                                       ckpt_path)

    if args.num_proc > 1:
        model.share_memory()
    # test
    args.step = 0
    args.max_step = 0
    start = time.time()
    if args.num_proc > 1:
        queue = mp.Queue(args.num_proc)
        procs = []
        for i in range(args.num_proc):
            proc = mp.Process(target=test_mp,
                              args=(args, model, [
                                  test_sampler_heads[i], test_sampler_tails[i]
                              ], i, 'Test', queue))
            procs.append(proc)
            proc.start()

        total_metrics = {}
        metrics = {}
        logs = []
        for i in range(args.num_proc):
            log = queue.get()
            logs = logs + log

        for metric in logs[0].keys():
            metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
        for k, v in metrics.items():
            print('Test average {} at [{}/{}]: {}'.format(
                k, args.step, args.max_step, v))

        for proc in procs:
            proc.join()
    else:
        test(args, model, [test_sampler_head, test_sampler_tail])
    print('Test takes {:.3f} seconds'.format(time.time() - start))