Ejemplo n.º 1
0
def run_program(args):
    kg = utils.load_kg(args.dataset)
    kg_mask = KGMask(kg)

    train_labels = utils.load_labels(args.dataset, 'train')
    test_labels = utils.load_labels(args.dataset, 'test')
    path_counts = utils.load_path_count(args.dataset)  # Training path freq
    with open(args.infer_path_data, 'rb') as f:
        raw_paths = pickle.load(f)  # Test path with scores

    symbolic_model = create_symbolic_model(args, kg, train=False)
    program_exe = MetaProgramExecutor(symbolic_model, kg_mask, args)

    pred_labels = {}
    pbar = tqdm(total=len(test_labels))
    for uid in test_labels:
        program = create_heuristic_program(kg.metapaths, raw_paths[uid], path_counts[uid], args.sample_size)
        program_exe.execute(program, uid, train_labels[uid])
        paths = program_exe.collect_results(program)
        tmp = [(r[0][-1], np.mean(r[1][-1])) for r in paths]
        tmp = sorted(tmp, key=lambda x: x[1], reverse=True)[:10]
        pred_labels[uid] = [t[0] for t in tmp]
        pbar.update(1)

    msg = evaluate_with_insufficient_pred(pred_labels, test_labels)
    logger.info(msg)
Ejemplo n.º 2
0
def estimate_path_count(args):
    kg = utils.load_kg(args.dataset)
    num_mp = len(kg.metapaths)
    train_labels = utils.load_labels(args.dataset, 'train')
    counts = {}
    pbar = tqdm(total=len(train_labels))
    for uid in train_labels:
        counts[uid] = np.zeros(num_mp)
        for pid in train_labels[uid]:
            for mpid in range(num_mp):
                cnt = kg.count_paths_with_target(mpid, uid, pid, 50)
                counts[uid][mpid] += cnt
        counts[uid] = counts[uid] / len(train_labels[uid])
        pbar.update(1)
    utils.save_path_count(args.dataset, counts)
Ejemplo n.º 3
0
def main(args):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    n_user, n_item, train_rec, eval_rec, test_rec = load_rating()
    n_entity, n_relation, kg = load_kg()

    kg_data = (kg[:, 0], kg[:, 1], kg[:, 2])
    rec_data = (train_rec[:, 0], train_rec[:, 1], train_rec[:, 2])
    rec_val = (eval_rec[:, 0], eval_rec[:, 1], eval_rec[:, 2])
    train_data_kg = TrainSet(kg_data)
    train_loader_kg = DataLoader(train_data_kg,
                                 batch_size=args.batch_size,
                                 shuffle=args.shuffle_train)
    train_data_rec = TrainSet(rec_data)
    eval_data_rec = TrainSet(rec_val)

    train_loader_rec = DataLoader(train_data_rec,
                                  batch_size=args.batch_size,
                                  shuffle=args.shuffle_train)
    eval_loader_rec = DataLoader(eval_data_rec,
                                 batch_size=args.batch_size,
                                 shuffle=args.shuffle_test)
    model = MultiKR(n_user + 1,
                    n_item + 1,
                    n_entity + 1,
                    n_relation + 1,
                    n_layer=args.n_layer,
                    embed_dim=args.batch_size,
                    hidden_layers=args.hidden_layers,
                    dropouts=args.dropouts,
                    output_rec=args.output_rec)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 weight_decay=args.weight_decay,
                                 lr=args.lr)
    loss_fuction = nn.BCEWithLogitsLoss()
    epochs = args.epochs
    # print('='*10+str(type(train_loader_kg))+"="*10)
    train_model(model, train_loader_rec, train_loader_kg, eval_loader_rec,
                optimizer, loss_fuction, epochs)
Ejemplo n.º 4
0
def infer_paths(args):
    kg = utils.load_kg(args.dataset)
    model = create_symbolic_model(args, kg, train=False)

    train_labels = utils.load_labels(args.dataset, 'train')
    train_uids = list(train_labels.keys())
    kg_mask = KGMask(kg)

    predicts = {}
    pbar = tqdm(total=len(train_uids))
    for uid in train_uids:
        predicts[uid] = {}
        for mpid in range(len(kg.metapaths)):
            metapath = kg.metapaths[mpid]
            paths = model.infer_with_path(metapath, uid, kg_mask,
                                          excluded_pids=train_labels[uid],
                                          topk_paths=20)
            predicts[uid][mpid] = paths
        pbar.update(1)
    with open(args.infer_path_data, 'wb') as f:
        pickle.dump(predicts, f)
Ejemplo n.º 5
0
Archivo: main.py Proyecto: busesese/MKR
def main(args):
    n_user, n_item, train_rec, eval_rec, test_rec = load_rating()
    n_entity, n_relation, kg = load_kg()

    kg_data = (kg[:, 0], kg[:, 1], kg[:, 2])
    rec_data = (train_rec[:, 0], train_rec[:, 1], train_rec[:, 2])
    rec_val = (eval_rec[:, 0], eval_rec[:, 1], eval_rec[:, 2])

    train_data_kg = TrainSet(kg_data)
    train_loader_kg = DataLoader(train_data_kg,
                                 batch_size=args.batch_size,
                                 shuffle=args.shuffle_train)

    train_data_rec = TrainSet(rec_data)
    eval_data_rec = TrainSet(rec_val)

    train_loader_rec = DataLoader(train_data_rec,
                                  batch_size=args.batch_size,
                                  shuffle=args.shuffle_train)
    eval_loader_rec = DataLoader(eval_data_rec,
                                 batch_size=args.batch_size,
                                 shuffle=args.shuffle_test)

    model = MultiKR(n_user + 1,
                    n_item + 1,
                    n_entity + 1,
                    n_relation + 1,
                    n_layer=args.n_layer,
                    embed_dim=args.batch_size,
                    hidden_layers=args.hidden_layers,
                    dropouts=args.dropouts,
                    output_rec=args.output_rec)
    optimizer = torch.optim.Adam(model.parameters(),
                                 weight_decay=args.weight_decay,
                                 lr=args.lr)
    loss_function = nn.BCEWithLogitsLoss()
    epochs = args.epochs
    train_model(model, train_loader_rec, train_loader_kg, eval_loader_rec,
                optimizer, loss_function, epochs)
Ejemplo n.º 6
0
def main(args, model_path):
    print(os.getcwd())
    print("start training ...")
    print(model_path)
    start = time.time()

    ent_str2id, ent_id2str, rel_str2id, rel_id2str = load_kg()
    print("making vocab is done " + str(time.time() - start))
    n_ent, n_rel = len(ent_str2id), len(rel_str2id)

    model = ConvE(args, n_ent, n_rel)
    model.init()
    if args.multi_gpu:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(torch.load(model_path))
    model.cuda()
    print('cuda : ' + str(torch.cuda.is_available()) + ' count : ' +
          str(torch.cuda.device_count()))

    params = [value.numel() for value in model.parameters()]
    print(params)
    print(sum(params))
    start = time.time()
    evalset = KG_EvalSet(dir + '/test_set.txt', args, n_ent)
    print("making evalset is done " + str(time.time() - start))
    evalloader = DataLoader(dataset=evalset,
                            num_workers=args.num_worker,
                            batch_size=args.batch_size,
                            shuffle=True)

    model.eval()
    with torch.no_grad():
        start = time.time()
        ranking_and_hits(model, args, evalloader, n_ent, ent_id2str,
                         rel_id2str)
        end = time.time()
        print('eval time used: {} minutes'.format((end - start) / 60))
Ejemplo n.º 7
0
def main(args, model_path):
    print (os.getcwd())
    print ("start training ...")

    start = time.time()

    ent_str2id, ent_id2str, rel_str2id, rel_id2str = load_kg()
    print ("making vocab is done "+str(time.time()-start))
    n_ent, n_rel = len(ent_str2id), len(rel_str2id)


    model = ConvE(args, n_ent, n_rel)
    model.init()
    if args.multi_gpu:
        model = torch.nn.DataParallel(model)
    bce = torch.nn.BCELoss().cuda()
    model.cuda()
    print ('cuda : ' + str(torch.cuda.is_available()) + ' count : ' + str(torch.cuda.device_count()))

    params = [value.numel() for value in model.parameters()]
    print(params)
    print(sum(params))
    opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
    start = time.time()
    dataset = KG_DataSet(dir+'/train_set.txt', args, n_ent)
    print ("making train dataset is done " + str(time.time()-start))
    start = time.time()
    evalset = KG_EvalSet(dir+'/test_set.txt', args, n_ent)
    print ("making evalset is done " + str(time.time()-start))
    prev_loss = 1000
    patience = 0
    early_stop = False
    best_loss = 1000
    for epoch in range(args.epochs):
        print (epoch)
        epoch_loss = 0
        epoch_start = time.time()
        model.train()
        tot = 0.0
        dataloader = DataLoader(dataset=dataset, num_workers=args.num_worker, batch_size=args.batch_size, shuffle=True)
        evalloader = DataLoader(dataset=evalset, num_workers=args.num_worker, batch_size=args.batch_size, shuffle=True)
        n_train = dataset.__len__()

        for i, data in enumerate(dataloader):
            opt.zero_grad()
            start = time.time()
            head, rel, tail = data
            head = torch.LongTensor(head)
            rel = torch.LongTensor(rel)
            head = head.cuda()
            rel = rel.cuda()
            batch_size = head.size(0)
            e2_multi = tail.cuda()
            print ("e2_multi " + str(time.time()-start) + "\n")
            start = time.time()
            pred = model.forward(head, rel)
            loss = bce(pred, e2_multi)
            loss.backward()
            opt.step()
            batch_loss = torch.sum(loss)
            print ("step " + str(time.time()-start) + "\n")
            epoch_loss += batch_loss
            tot += head.size(0)
            print ('\r{:>10} epoch {} progress {} loss: {}\n'.format('', epoch, tot/n_train, batch_loss), end='')
        epoch_loss /= batch_size
        print ('')
        end = time.time()
        time_used = end - epoch_start
        print ('one epoch time: {} minutes'.format(time_used/60))
        print ('{} epochs'.format(epoch))
        print ('epoch {} loss: {}'.format(epoch+1, epoch_loss))
        # TODO: calculate valid loss and develop early stopping
        model.eval()
        with torch.no_grad():
            valid_loss = 0.0
            for i,data in enumerate(evalloader):
                #head, rel, tail, head2, rel_rev, tail2 = data
                head, rel, tail, tail_idx = data
                head = torch.LongTensor(head)
                rel = torch.LongTensor(rel)
                #head2 = torch.LongTensor(head2)
                #rel_rev = torch.LongTensor(rel_rev)
                head = head.cuda()
                rel = rel.cuda()
                #head2 = head2.cuda()
                #rel_rev = rel_rev.cuda()
                batch_size = head.size(0)

                e2_multi1 = tail.cuda()
                #e2_multi2 = tail2.cuda()
                pred1 = model.forward(head, rel)
                #pred2 = model.forward(head2, rel_rev)
                loss1 = bce(pred1, e2_multi1)
                #loss2 = bce(pred2, e2_multi2)
                sum_loss = torch.sum(loss1).item()
                #sum_loss = (torch.sum(loss1).item() + torch.sum(loss2).item())/2
                sum_loss /= batch_size
                valid_loss += sum_loss
            print ("valid loss : " + str(valid_loss))
            with open(os.getcwd() + '/log_file/log.txt', 'a') as f:
                f.write(str(epoch) + " epochs valid loss : " + str(valid_loss) + "\n")
        if valid_loss > prev_loss:
            patience += 1
            if patience > 2:
                early_stop = True
        else:
            patience = 0
        prev_loss = valid_loss
        if early_stop:
            print("{0} epochs Early stopping ...".format(epoch))
            break
        if valid_loss < best_loss:
            best_loss = valid_loss
            print ('saving to {0}'.format(model_path))
            torch.save(model.state_dict(), model_path)

    model.eval()
    with torch.no_grad():
        start = time.time()
        ranking_and_hits(model, args, evalloader, n_ent, epoch)
        end = time.time()
        print ('eval time used: {} minutes'.format((end - start)/60))
Ejemplo n.º 8
0
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# Load data
# adj, features, labels, idx_train, idx_val, idx_test = load_data()

# load sensor
adj, features, labels, idx_train, idx_val, idx_test = load_data(
    path='./data/sensor/', dataset='sensor')

# Load kg
kg_adj, kg_features = load_kg(path='./data/sensor/', dataset='kg')

# Model and optimizer
if args.sparse:
    model = SpGAT(nfeat=features.shape[1],
                  nhid=args.hidden,
                  nclass=int(labels.max()) + 1,
                  dropout=args.dropout,
                  nheads=args.nb_heads,
                  alpha=args.alpha)
else:
    model = GAFT(nsize=features.shape[0],
                 kgsize=kg_features.shape[0],
                 nfeat=features.shape[1],
                 kgfeat=kg_features.shape[1],
                 nhid=args.hidden,