def valid(epoch, data, conv_e, batch_size, log_decs): dataset = KnowledgeGraphDataset(data.x, data.y, e_to_index=data.e_to_index, r_to_index=data.r_to_index) valid_set = DataLoader(dataset, collate_fn=collate_valid, batch_size=batch_size, num_workers=4, shuffle=True) conv_e.train(False) ranks = list() for s, r, os in tqdm(iter(valid_set)): s, r = Variable(s).cuda(), Variable(r).cuda() output = conv_e.test(s, r) for i in range(min(batch_size, s.size()[0])): _, top_indices = output[i].topk(output.size()[1]) for o in os[i]: _, rank = (top_indices == o).max(dim=0) ranks.append(rank.data[0] + 1) ranks_t = torch.FloatTensor(ranks) mr = ranks_t.mean() mrr = (1 / ranks_t).mean() logger.info(log_decs + ' MR: {:.3f}, MRR: {:.10f}'.format(mr, mrr)) tensorboard_logger.log_value(log_decs + ' mr', mr, epoch + 1) tensorboard_logger.log_value(log_decs + ' mrr', mrr, epoch + 1)
def train(epoch, train_loader, valid_loader, test_loader, log_desc='train_'): model.train() loss = 0. total = 0. for i_batch, batch in enumerate(train_loader): graph, features, labels, vertices = batch bs = graph.size(0) if args.cuda: features = features.cuda() graph = graph.cuda() labels = labels.cuda() vertices = vertices.cuda() optimizer.zero_grad() output = model(features, vertices, graph) if args.model == "gcn" or args.model == "gat": output = output[:, -1, :] loss_train = F.nll_loss(output, labels, class_weight) loss += bs * loss_train.item() total += bs loss_train.backward() optimizer.step() logger.info("train loss in this epoch %f", loss / total) tensorboard_logger.log_value('train_loss', loss / total, epoch + 1) if (epoch + 1) % args.check_point == 0: logger.info("epoch %d, checkpoint!", epoch) best_thr = evaluate(epoch, valid_loader, return_best_thr=True, log_desc='valid_') evaluate(epoch, test_loader, thr=best_thr, log_desc='test_')
def loop_dataset(g_list, epoch, classifier, sample_idxes, optimizer=None, bsize=cmd_args.batch_size): total_loss = [] total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize # noqa pbar = tqdm(range(total_iters), unit='batch') all_targets = [] all_scores = [] n_samples = 0 # print("bsize", bsize) for pos in pbar: selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize] batch_graph = [g_list[idx] for idx in selected_idx] targets = [g_list[idx].label for idx in selected_idx] all_targets += targets logits, loss, acc = classifier(batch_graph) all_scores.append(logits[:, 1].detach()) # for binary classification if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() loss = loss.data.cpu().numpy() pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc)) total_loss.append(np.array([loss, acc]) * len(selected_idx)) n_samples += len(selected_idx) if optimizer is None: assert n_samples == len(sample_idxes) total_loss = np.array(total_loss) avg_loss = np.sum(total_loss, 0) / n_samples all_scores = torch.cat(all_scores).cpu().numpy() # np.savetxt('test_scores.txt', all_scores) # output test predictions all_targets = np.array(all_targets) fpr, tpr, _ = metrics.roc_curve(all_targets, all_scores, pos_label=1) auc = metrics.auc(fpr, tpr) # print("avg loss", avg_loss) tensorboard_logger.log_value('train_loss', avg_loss[0], epoch + 1) avg_loss = np.concatenate((avg_loss, [auc])) return avg_loss
def train(epoch, data, conv_e, criterion, optimizer, args): train_set = DataLoader(KnowledgeGraphDataset(data.x, data.y, e_to_index=data.e_to_index, r_to_index=data.r_to_index), collate_fn=collate_train, batch_size=args.batch_size, num_workers=4, shuffle=True) progress_bar = tqdm(iter(train_set)) moving_loss = 0 conv_e.train(True) y_multihot = torch.LongTensor(args.batch_size, len(data.e_to_index)) for s, r, os in progress_bar: s, r = Variable(s).cuda(), Variable(r).cuda() if s.size()[0] != args.batch_size: y_multihot = torch.LongTensor(s.size()[0], len(data.e_to_index)) y_multihot.zero_() y_multihot = y_multihot.scatter_(1, os, 1) y_smooth = (1 - args.label_smooth) * y_multihot.float( ) + args.label_smooth / len(data.e_to_index) targets = Variable(y_smooth, requires_grad=False).cuda() output = conv_e(s, r) loss = criterion(output, targets) loss.backward() optimizer.step() conv_e.zero_grad() if moving_loss == 0: moving_loss = loss.data[0] else: moving_loss = moving_loss * 0.9 + loss.data[0] * 0.1 progress_bar.set_description( 'Epoch: {}; Loss: {:.5f}; Avg: {:.5f}'.format( epoch + 1, loss.data[0], moving_loss)) logger.info('Epoch: {}; Loss: {:.5f}; Avg: {:.5f}'.format( epoch + 1, loss.data[0], moving_loss)) tensorboard_logger.log_value('avg loss', moving_loss, epoch + 1) tensorboard_logger.log_value('loss', loss.data[0], epoch + 1)
def run_epoch(self, loader, epoch): self.model.train() total_loss = 0 for d_i, data in enumerate(loader): self.optimizer.zero_grad() data = data.to(self.device) ground_truth = data.y.clone() out = self.model(data) loss = F.nll_loss(out, ground_truth.view(-1)) loss.backward() total_loss += loss.item() * self.num_graphs(data) self.optimizer.step() if d_i % 20 == 0: logger.info("train batch %d", d_i) tensorboard_logger.log_value('train_loss', total_loss / len(loader.dataset), epoch + 1) return total_loss / len(loader.dataset)
def run_epoch(self, epoch, data, model, optimizer): losses, accs, n_samples = [], [], 0 for batch in tqdm(data, desc=str(epoch), unit='b'): cur_len, gs, hs, ys = batch gs, hs, ys = map(self.to_cuda, [gs, hs, ys]) loss, acc, _ = model(gs, hs, ys) losses.append(loss.item() * cur_len) accs.append(acc.item() * cur_len) n_samples += cur_len if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() avg_loss, avg_acc = sum(losses) / n_samples, sum(accs) / n_samples # return avg_loss.item(), avg_acc.item() tensorboard_logger.log_value('train_loss', avg_loss, epoch + 1) return avg_loss, avg_acc
def evaluate(self, loader, epoch, thr=None, return_best_thr=False): self.model.eval() correct = 0 total = 0. loss, prec, rec, f1 = 0., 0., 0., 0. y_true, y_pred, y_score = [], [], [] for d_i, data in enumerate(loader): data = data.to(self.device) bs = data.y.size(0) with torch.no_grad(): # pred = self.model(data).max(1)[1] out = self.model(data) pred = out.max(1)[1] loss += F.nll_loss(out, data.y, reduction='sum').item() y_true += data.y.data.tolist() y_pred += out.max(1)[1].data.tolist() y_score += out[:, 1].data.tolist() total += bs correct += pred.eq(data.y.view(-1)).sum().item() if d_i % 50 == 0: logger.info("eval batch %d", d_i) if thr is not None: logger.info("using threshold %.4f", thr) y_score = np.array(y_score) y_pred = np.zeros_like(y_score) y_pred[y_score > thr] = 1 prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary") auc = roc_auc_score(y_true, y_score) logger.info("loss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f", loss / total, auc, prec, rec, f1) if return_best_thr: log_desc = "valid_" else: log_desc = "test_" tensorboard_logger.log_value(log_desc + 'loss', loss / total, epoch + 1) tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1) tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1) if return_best_thr: precs, recs, thrs = precision_recall_curve(y_true, y_score) f1s = 2 * precs * recs / (precs + recs) f1s = f1s[:-1] thrs = thrs[~np.isnan(f1s)] f1s = f1s[~np.isnan(f1s)] best_thr = thrs[np.argmax(f1s)] logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s)) return [prec, rec, f1, auc], loss / len(loader.dataset), best_thr else: return [prec, rec, f1, auc], loss / len(loader.dataset), None
def evaluate(self, epoch, data, model, thr=None, return_best_thr=False): model.eval() total = 0. prec, rec, f1 = 0., 0., 0. y_true, y_pred, y_score = [], [], [] losses, accs, n_samples = [], [], 0 for batch in tqdm(data, desc=str(epoch), unit='b'): cur_len, gs, hs, ys = batch # print("cur len", cur_len) # print("gs", len(gs), "hs", len(hs), "ys", len(ys)) gs, hs, ys = map(self.to_cuda, [gs, hs, ys]) loss, acc, out = model(gs, hs, ys) losses.append(loss.data.item() * cur_len) # accs.append(acc*cur_len) n_samples += cur_len y_true += ys.data.tolist() y_pred += out.max(1)[1].data.tolist() y_score += out[:, 1].data.tolist() total += cur_len if thr is not None: logger.info("using threshold %.4f", thr) y_score = np.array(y_score) y_pred = np.zeros_like(y_score) y_pred[y_score > thr] = 1 prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary") auc = roc_auc_score(y_true, y_score) logger.info("loss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f", sum(losses) / n_samples, auc, prec, rec, f1) # avg_loss, avg_acc = sum(losses) / n_samples, sum(accs) / n_samples # return avg_loss.item(), avg_acc.item() # loss_ret = (sum(losses) / n_samples).data.item() loss_ret = sum(losses) / n_samples if return_best_thr: log_desc = "valid_" else: log_desc = "test_" tensorboard_logger.log_value(log_desc + 'loss', loss_ret, epoch + 1) tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1) tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1) if return_best_thr: precs, recs, thrs = precision_recall_curve(y_true, y_score) f1s = 2 * precs * recs / (precs + recs) f1s = f1s[:-1] thrs = thrs[~np.isnan(f1s)] f1s = f1s[~np.isnan(f1s)] best_thr = thrs[np.argmax(f1s)] logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s)) return loss_ret, [prec, rec, f1, auc], best_thr else: return loss_ret, [prec, rec, f1, auc], None
def evaluate(epoch, loader, thr=None, return_best_thr=False, log_desc='valid_'): model.eval() total = 0. loss, prec, rec, f1 = 0., 0., 0., 0. y_true, y_pred, y_score = [], [], [] for i_batch, batch in enumerate(loader): graph, features, labels, vertices = batch bs = graph.size(0) if args.cuda: features = features.cuda() graph = graph.cuda() labels = labels.cuda() vertices = vertices.cuda() output = model(features, vertices, graph) if args.model == "gcn" or args.model == "gat": output = output[:, -1, :] loss_batch = F.nll_loss(output, labels, class_weight) loss += bs * loss_batch.item() y_true += labels.data.tolist() y_pred += output.max(1)[1].data.tolist() y_score += output[:, 1].data.tolist() total += bs model.train() if thr is not None: logger.info("using threshold %.4f", thr) y_score = np.array(y_score) y_pred = np.zeros_like(y_score) y_pred[y_score > thr] = 1 prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary") auc = roc_auc_score(y_true, y_score) logger.info("%sloss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f", log_desc, loss / total, auc, prec, rec, f1) tensorboard_logger.log_value(log_desc + 'loss', loss / total, epoch + 1) tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1) tensorboard_logger.log_value(log_desc + 'prec', prec, epoch + 1) tensorboard_logger.log_value(log_desc + 'rec', rec, epoch + 1) tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1) if return_best_thr: precs, recs, thrs = precision_recall_curve(y_true, y_score) f1s = 2 * precs * recs / (precs + recs) f1s = f1s[:-1] thrs = thrs[~np.isnan(f1s)] f1s = f1s[~np.isnan(f1s)] best_thr = thrs[np.argmax(f1s)] logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s)) return best_thr else: return None
def evaluate(g_list, epoch, classifier, sample_idxes, bsize=cmd_args.batch_size, thr=None, return_best_thr=False): total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize # noqa pbar = tqdm(range(total_iters), unit='batch') total = 0 y_true, y_pred, y_score = [], [], [] losses = [] for pos in pbar: selected_idx = sample_idxes[pos * bsize:(pos + 1) * bsize] batch_graph = [g_list[idx] for idx in selected_idx] targets = [g_list[idx].label for idx in selected_idx] # all_targets += targets out, loss, acc = classifier(batch_graph) # all_scores.append(logits[:, 1].detach()) # for binary classification loss = loss.data.cpu().numpy() pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc)) # total_loss.append(np.array([loss, acc]) * len(selected_idx)) losses.append(loss) y_true += targets y_pred += out.max(1)[1].data.tolist() y_score += out[:, 1].data.tolist() total += len(selected_idx) if thr is not None: logger.info("using threshold %.4f", thr) y_score = np.array(y_score) y_pred = np.zeros_like(y_score) y_pred[y_score > thr] = 1 # print("y_true", len(y_true), y_true) # print("y_score", len(y_score), y_score) prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary") auc = roc_auc_score(y_true, y_score) logger.info("loss: %.4f AUC: %.4f Prec: %.4f Rec: %.4f F1: %.4f", sum(losses) / total, auc, prec, rec, f1) loss_ret = sum(losses) / total if return_best_thr: log_desc = "valid_" else: log_desc = "test_" tensorboard_logger.log_value(log_desc + 'loss', loss_ret, epoch + 1) tensorboard_logger.log_value(log_desc + 'auc', auc, epoch + 1) tensorboard_logger.log_value(log_desc + 'f1', f1, epoch + 1) if return_best_thr: precs, recs, thrs = precision_recall_curve(y_true, y_score) f1s = 2 * precs * recs / (precs + recs) f1s = f1s[:-1] thrs = thrs[~np.isnan(f1s)] f1s = f1s[~np.isnan(f1s)] best_thr = thrs[np.argmax(f1s)] logger.info("best threshold=%4f, f1=%.4f", best_thr, np.max(f1s)) return loss_ret, [prec, rec, f1, auc], best_thr else: return loss_ret, [prec, rec, f1, auc], None
last_epoch = -1 for epoch in range(args.epochs): model.train() losses_train = [] for i, data in enumerate(train_loader): data = data.to(args.device) out = model(data) loss = F.nll_loss(out, data.y) if i % 10 == 0: print("Training loss:{}".format(loss.item())) loss.backward() losses_train.append(loss.item()) optimizer.step() optimizer.zero_grad() tensorboard_logger.log_value('train_loss', np.mean(losses_train), epoch + 1) val_metrics, val_loss, thr = test(model, epoch, val_loader, return_best_thr=True) print("Validation loss:{}\teval metrics:".format(val_loss), val_metrics) test_acc, test_loss, _ = test(model, epoch, test_loader, thr=best_thr) print("Test performance:", test_acc) if val_loss < min_loss: torch.save(model.state_dict(), 'latest.pth') print("Model saved at epoch {}".format(epoch)) min_loss = val_loss best_thr = thr patience = 0 logger.info("**************BEST UNTIL NOW*****************")