Example #1
0
def valid(epoch, data, conv_e, batch_size, log_decs):
    dataset = KnowledgeGraphDataset(data.x,
                                    data.y,
                                    e_to_index=data.e_to_index,
                                    r_to_index=data.r_to_index)
    valid_set = DataLoader(dataset,
                           collate_fn=collate_valid,
                           batch_size=batch_size,
                           num_workers=4,
                           shuffle=True)

    conv_e.train(False)
    ranks = list()
    for s, r, os in tqdm(iter(valid_set)):
        s, r = Variable(s).cuda(), Variable(r).cuda()
        output = conv_e.test(s, r)

        for i in range(min(batch_size, s.size()[0])):
            _, top_indices = output[i].topk(output.size()[1])
            for o in os[i]:
                _, rank = (top_indices == o).max(dim=0)
                ranks.append(rank.data[0] + 1)

    ranks_t = torch.FloatTensor(ranks)
    mr = ranks_t.mean()
    mrr = (1 / ranks_t).mean()

    logger.info(log_decs + ' MR: {:.3f}, MRR: {:.10f}'.format(mr, mrr))
    tensorboard_logger.log_value(log_decs + ' mr', mr, epoch + 1)
    tensorboard_logger.log_value(log_decs + ' mrr', mrr, epoch + 1)
Example #2
0
def train(epoch, train_loader, valid_loader, test_loader, log_desc='train_'):
    model.train()

    loss = 0.
    total = 0.
    for i_batch, batch in enumerate(train_loader):
        graph, features, labels, vertices = batch
        bs = graph.size(0)

        if args.cuda:
            features = features.cuda()
            graph = graph.cuda()
            labels = labels.cuda()
            vertices = vertices.cuda()

        optimizer.zero_grad()
        output = model(features, vertices, graph)
        if args.model == "gcn" or args.model == "gat":
            output = output[:, -1, :]
        loss_train = F.nll_loss(output, labels, class_weight)
        loss += bs * loss_train.item()
        total += bs
        loss_train.backward()
        optimizer.step()
    logger.info("train loss in this epoch %f", loss / total)
    tensorboard_logger.log_value('train_loss', loss / total, epoch + 1)
    if (epoch + 1) % args.check_point == 0:
        logger.info("epoch %d, checkpoint!", epoch)
        best_thr = evaluate(epoch,
                            valid_loader,
                            return_best_thr=True,
                            log_desc='valid_')
        evaluate(epoch, test_loader, thr=best_thr, log_desc='test_')
Example #3
0
def loop_dataset(g_list,
                 epoch,
                 classifier,
                 sample_idxes,
                 optimizer=None,
                 bsize=cmd_args.batch_size):
    total_loss = []
    total_iters = (len(sample_idxes) + (bsize - 1) *
                   (optimizer is None)) // bsize  # noqa
    pbar = tqdm(range(total_iters), unit='batch')
    all_targets = []
    all_scores = []

    n_samples = 0
    # print("bsize", bsize)
    for pos in pbar:
        selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize]

        batch_graph = [g_list[idx] for idx in selected_idx]
        targets = [g_list[idx].label for idx in selected_idx]
        all_targets += targets
        logits, loss, acc = classifier(batch_graph)
        all_scores.append(logits[:, 1].detach())  # for binary classification

        if optimizer is not None:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        loss = loss.data.cpu().numpy()
        pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc))

        total_loss.append(np.array([loss, acc]) * len(selected_idx))

        n_samples += len(selected_idx)
    if optimizer is None:
        assert n_samples == len(sample_idxes)
    total_loss = np.array(total_loss)
    avg_loss = np.sum(total_loss, 0) / n_samples
    all_scores = torch.cat(all_scores).cpu().numpy()

    # np.savetxt('test_scores.txt', all_scores)  # output test predictions

    all_targets = np.array(all_targets)
    fpr, tpr, _ = metrics.roc_curve(all_targets, all_scores, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    # print("avg loss", avg_loss)
    tensorboard_logger.log_value('train_loss', avg_loss[0], epoch + 1)
    avg_loss = np.concatenate((avg_loss, [auc]))

    return avg_loss
Example #4
0
def train(epoch, data, conv_e, criterion, optimizer, args):
    train_set = DataLoader(KnowledgeGraphDataset(data.x,
                                                 data.y,
                                                 e_to_index=data.e_to_index,
                                                 r_to_index=data.r_to_index),
                           collate_fn=collate_train,
                           batch_size=args.batch_size,
                           num_workers=4,
                           shuffle=True)

    progress_bar = tqdm(iter(train_set))
    moving_loss = 0

    conv_e.train(True)
    y_multihot = torch.LongTensor(args.batch_size, len(data.e_to_index))
    for s, r, os in progress_bar:
        s, r = Variable(s).cuda(), Variable(r).cuda()

        if s.size()[0] != args.batch_size:
            y_multihot = torch.LongTensor(s.size()[0], len(data.e_to_index))

        y_multihot.zero_()
        y_multihot = y_multihot.scatter_(1, os, 1)
        y_smooth = (1 - args.label_smooth) * y_multihot.float(
        ) + args.label_smooth / len(data.e_to_index)

        targets = Variable(y_smooth, requires_grad=False).cuda()

        output = conv_e(s, r)
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()
        conv_e.zero_grad()

        if moving_loss == 0:
            moving_loss = loss.data[0]
        else:
            moving_loss = moving_loss * 0.9 + loss.data[0] * 0.1

        progress_bar.set_description(
            'Epoch: {}; Loss: {:.5f}; Avg: {:.5f}'.format(
                epoch + 1, loss.data[0], moving_loss))

    logger.info('Epoch: {}; Loss: {:.5f}; Avg: {:.5f}'.format(
        epoch + 1, loss.data[0], moving_loss))
    tensorboard_logger.log_value('avg loss', moving_loss, epoch + 1)
    tensorboard_logger.log_value('loss', loss.data[0], epoch + 1)
Example #5
0
File: main.py Project: zfjsail/ASAP
    def run_epoch(self, loader, epoch):
        self.model.train()

        total_loss = 0
        for d_i, data in enumerate(loader):
            self.optimizer.zero_grad()
            data = data.to(self.device)
            ground_truth = data.y.clone()
            out = self.model(data)
            loss = F.nll_loss(out, ground_truth.view(-1))
            loss.backward()
            total_loss += loss.item() * self.num_graphs(data)
            self.optimizer.step()
            if d_i % 20 == 0:
                logger.info("train batch %d", d_i)
        tensorboard_logger.log_value('train_loss',
                                     total_loss / len(loader.dataset),
                                     epoch + 1)
        return total_loss / len(loader.dataset)
Example #6
0
    def run_epoch(self, epoch, data, model, optimizer):
        losses, accs, n_samples = [], [], 0
        for batch in tqdm(data, desc=str(epoch), unit='b'):
            cur_len, gs, hs, ys = batch
            gs, hs, ys = map(self.to_cuda, [gs, hs, ys])
            loss, acc, _ = model(gs, hs, ys)
            losses.append(loss.item() * cur_len)
            accs.append(acc.item() * cur_len)
            n_samples += cur_len
            if optimizer is not None:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        avg_loss, avg_acc = sum(losses) / n_samples, sum(accs) / n_samples
        # return avg_loss.item(), avg_acc.item()
        tensorboard_logger.log_value('train_loss', avg_loss, epoch + 1)

        return avg_loss, avg_acc
Example #7
0
File: main.py Project: zfjsail/ASAP
    def evaluate(self, loader, epoch, thr=None, return_best_thr=False):
        self.model.eval()

        correct = 0
        total = 0.
        loss, prec, rec, f1 = 0., 0., 0., 0.
        y_true, y_pred, y_score = [], [], []
        for d_i, data in enumerate(loader):
            data = data.to(self.device)
            bs = data.y.size(0)

            with torch.no_grad():
                # pred = self.model(data).max(1)[1]
                out = self.model(data)
                pred = out.max(1)[1]

            loss += F.nll_loss(out, data.y, reduction='sum').item()

            y_true += data.y.data.tolist()
            y_pred += out.max(1)[1].data.tolist()
            y_score += out[:, 1].data.tolist()
            total += bs

            correct += pred.eq(data.y.view(-1)).sum().item()
            if d_i % 50 == 0:
                logger.info("eval batch %d", d_i)

        if thr is not None:
            logger.info("using threshold %.4f", thr)
            y_score = np.array(y_score)
            y_pred = np.zeros_like(y_score)
            y_pred[y_score > thr] = 1

        prec, rec, f1, _ = precision_recall_fscore_support(y_true,
                                                           y_pred,
                                                           average="binary")
        auc = roc_auc_score(y_true, y_score)
        logger.info("loss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f",
                    loss / total, auc, prec, rec, f1)

        if return_best_thr:
            log_desc = "valid_"
        else:
            log_desc = "test_"
        tensorboard_logger.log_value(log_desc + 'loss', loss / total,
                                     epoch + 1)
        tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1)
        tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1)

        if return_best_thr:
            precs, recs, thrs = precision_recall_curve(y_true, y_score)
            f1s = 2 * precs * recs / (precs + recs)
            f1s = f1s[:-1]
            thrs = thrs[~np.isnan(f1s)]
            f1s = f1s[~np.isnan(f1s)]
            best_thr = thrs[np.argmax(f1s)]
            logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s))
            return [prec, rec, f1, auc], loss / len(loader.dataset), best_thr
        else:
            return [prec, rec, f1, auc], loss / len(loader.dataset), None
Example #8
0
    def evaluate(self, epoch, data, model, thr=None, return_best_thr=False):
        model.eval()
        total = 0.
        prec, rec, f1 = 0., 0., 0.
        y_true, y_pred, y_score = [], [], []
        losses, accs, n_samples = [], [], 0
        for batch in tqdm(data, desc=str(epoch), unit='b'):
            cur_len, gs, hs, ys = batch
            # print("cur len", cur_len)
            # print("gs", len(gs), "hs", len(hs), "ys", len(ys))
            gs, hs, ys = map(self.to_cuda, [gs, hs, ys])
            loss, acc, out = model(gs, hs, ys)
            losses.append(loss.data.item() * cur_len)
            # accs.append(acc*cur_len)
            n_samples += cur_len

            y_true += ys.data.tolist()
            y_pred += out.max(1)[1].data.tolist()
            y_score += out[:, 1].data.tolist()
            total += cur_len

        if thr is not None:
            logger.info("using threshold %.4f", thr)
            y_score = np.array(y_score)
            y_pred = np.zeros_like(y_score)
            y_pred[y_score > thr] = 1

        prec, rec, f1, _ = precision_recall_fscore_support(y_true,
                                                           y_pred,
                                                           average="binary")
        auc = roc_auc_score(y_true, y_score)
        logger.info("loss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f",
                    sum(losses) / n_samples, auc, prec, rec, f1)

        # avg_loss, avg_acc = sum(losses) / n_samples, sum(accs) / n_samples
        # return avg_loss.item(), avg_acc.item()

        # loss_ret = (sum(losses) / n_samples).data.item()
        loss_ret = sum(losses) / n_samples

        if return_best_thr:
            log_desc = "valid_"
        else:
            log_desc = "test_"

        tensorboard_logger.log_value(log_desc + 'loss', loss_ret, epoch + 1)
        tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1)
        tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1)

        if return_best_thr:
            precs, recs, thrs = precision_recall_curve(y_true, y_score)
            f1s = 2 * precs * recs / (precs + recs)
            f1s = f1s[:-1]
            thrs = thrs[~np.isnan(f1s)]
            f1s = f1s[~np.isnan(f1s)]
            best_thr = thrs[np.argmax(f1s)]
            logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s))
            return loss_ret, [prec, rec, f1, auc], best_thr
        else:
            return loss_ret, [prec, rec, f1, auc], None
Example #9
0
def evaluate(epoch,
             loader,
             thr=None,
             return_best_thr=False,
             log_desc='valid_'):
    model.eval()
    total = 0.
    loss, prec, rec, f1 = 0., 0., 0., 0.
    y_true, y_pred, y_score = [], [], []
    for i_batch, batch in enumerate(loader):
        graph, features, labels, vertices = batch
        bs = graph.size(0)

        if args.cuda:
            features = features.cuda()
            graph = graph.cuda()
            labels = labels.cuda()
            vertices = vertices.cuda()

        output = model(features, vertices, graph)
        if args.model == "gcn" or args.model == "gat":
            output = output[:, -1, :]
        loss_batch = F.nll_loss(output, labels, class_weight)
        loss += bs * loss_batch.item()

        y_true += labels.data.tolist()
        y_pred += output.max(1)[1].data.tolist()
        y_score += output[:, 1].data.tolist()
        total += bs

    model.train()

    if thr is not None:
        logger.info("using threshold %.4f", thr)
        y_score = np.array(y_score)
        y_pred = np.zeros_like(y_score)
        y_pred[y_score > thr] = 1

    prec, rec, f1, _ = precision_recall_fscore_support(y_true,
                                                       y_pred,
                                                       average="binary")
    auc = roc_auc_score(y_true, y_score)
    logger.info("%sloss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f",
                log_desc, loss / total, auc, prec, rec, f1)

    tensorboard_logger.log_value(log_desc + 'loss', loss / total, epoch + 1)
    tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1)
    tensorboard_logger.log_value(log_desc + 'prec', prec, epoch + 1)
    tensorboard_logger.log_value(log_desc + 'rec', rec, epoch + 1)
    tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1)

    if return_best_thr:
        precs, recs, thrs = precision_recall_curve(y_true, y_score)
        f1s = 2 * precs * recs / (precs + recs)
        f1s = f1s[:-1]
        thrs = thrs[~np.isnan(f1s)]
        f1s = f1s[~np.isnan(f1s)]
        best_thr = thrs[np.argmax(f1s)]
        logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s))
        return best_thr
    else:
        return None
Example #10
0
def evaluate(g_list,
             epoch,
             classifier,
             sample_idxes,
             bsize=cmd_args.batch_size,
             thr=None,
             return_best_thr=False):
    total_iters = (len(sample_idxes) + (bsize - 1) *
                   (optimizer is None)) // bsize  # noqa
    pbar = tqdm(range(total_iters), unit='batch')

    total = 0
    y_true, y_pred, y_score = [], [], []
    losses = []

    for pos in pbar:
        selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize]

        batch_graph = [g_list[idx] for idx in selected_idx]
        targets = [g_list[idx].label for idx in selected_idx]
        # all_targets += targets
        out, loss, acc = classifier(batch_graph)
        # all_scores.append(logits[:, 1].detach())  # for binary classification

        loss = loss.data.cpu().numpy()
        pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc))

        # total_loss.append(np.array([loss, acc]) * len(selected_idx))
        losses.append(loss)

        y_true += targets
        y_pred += out.max(1)[1].data.tolist()
        y_score += out[:, 1].data.tolist()

        total += len(selected_idx)

    if thr is not None:
        logger.info("using threshold %.4f", thr)
        y_score = np.array(y_score)
        y_pred = np.zeros_like(y_score)
        y_pred[y_score > thr] = 1

    # print("y_true", len(y_true), y_true)
    # print("y_score", len(y_score), y_score)

    prec, rec, f1, _ = precision_recall_fscore_support(y_true,
                                                       y_pred,
                                                       average="binary")
    auc = roc_auc_score(y_true, y_score)
    logger.info("loss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f",
                sum(losses) / total, auc, prec, rec, f1)
    loss_ret = sum(losses) / total

    if return_best_thr:
        log_desc = "valid_"
    else:
        log_desc = "test_"
    tensorboard_logger.log_value(log_desc + 'loss', loss_ret, epoch + 1)
    tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1)
    tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1)

    if return_best_thr:
        precs, recs, thrs = precision_recall_curve(y_true, y_score)
        f1s = 2 * precs * recs / (precs + recs)
        f1s = f1s[:-1]
        thrs = thrs[~np.isnan(f1s)]
        f1s = f1s[~np.isnan(f1s)]
        best_thr = thrs[np.argmax(f1s)]
        logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s))
        return loss_ret, [prec, rec, f1, auc], best_thr
    else:
        return loss_ret, [prec, rec, f1, auc], None
Example #11
0
last_epoch = -1

for epoch in range(args.epochs):
    model.train()
    losses_train = []
    for i, data in enumerate(train_loader):
        data = data.to(args.device)
        out = model(data)
        loss = F.nll_loss(out, data.y)
        if i % 10 == 0:
            print("Training loss:{}".format(loss.item()))
        loss.backward()
        losses_train.append(loss.item())
        optimizer.step()
        optimizer.zero_grad()
    tensorboard_logger.log_value('train_loss', np.mean(losses_train),
                                 epoch + 1)

    val_metrics, val_loss, thr = test(model,
                                      epoch,
                                      val_loader,
                                      return_best_thr=True)
    print("Validation loss:{}\teval metrics:".format(val_loss), val_metrics)
    test_acc, test_loss, _ = test(model, epoch, test_loader, thr=best_thr)
    print("Test performance:", test_acc)
    if val_loss < min_loss:
        torch.save(model.state_dict(), 'latest.pth')
        print("Model saved at epoch {}".format(epoch))
        min_loss = val_loss
        best_thr = thr
        patience = 0
        logger.info("**************BEST UNTIL NOW*****************")