def test_graph_classifier(model: GAT_Graph_Classifier, loss_func, data_loader: torch.utils.data.dataloader.DataLoader): model.eval() preds = [] trues = [] losses = [] for iter, (graph_batch, label) in enumerate(data_loader): ## Store emb in a separate file as self_loop removes emb info: emb = graph_batch.ndata['emb'] # graph_batch = dgl.add_self_loop(graph_batch) prediction = model(graph_batch, emb) loss = loss_func(prediction, label) preds.append(prediction.detach()) trues.append(label.detach()) losses.append(loss.detach()) losses = torch.mean(torch.stack(losses)) preds = torch.cat(preds) ## Converting raw scores to probabilities using Sigmoid: preds = torch.sigmoid(preds) ## Converting probabilities to class labels: preds = logit2label(preds.detach(), cls_thresh=0.5) trues = torch.cat(trues) result_dict = calculate_performance(trues, preds) test_output = {'preds': preds, 'trues': trues, 'result': result_dict} # logger.info(dumps(result_dict, indent=4)) return losses, test_output
def eval_graph_classifier(model: GAT_GCN_Classifier, G, X, loss_func, data_loader: utils.data.dataloader.DataLoader, n_classes=cfg['data']['num_classes'], save_gcn_embs=False): model.eval() preds = [] trues = [] losses = [] for iter, (graph_batch, local_ids, label, global_ids, node_counts) in enumerate(data_loader): ## Store emb in a separate file as self_loop removes emb info: emb = graph_batch.ndata['emb'] # graph_batch = dgl.add_self_loop(graph_batch) if cfg['model']['use_cuda'][plat][user] and cuda.is_available(): graph_batch = graph_batch.to(device) emb = emb.to(device) # local_ids = local_ids.to(device) # node_counts = node_counts.to(device) # global_ids = global_ids.to(device) G = G.to(device) X = X.to(device) if save_gcn_embs: save(X, 'X_glove.pt') start_time = timeit.default_timer() prediction = model(graph_batch, emb, local_ids, node_counts, global_ids, G, X, save_gcn_embs) test_time = timeit.default_timer() - start_time test_count = label.shape[0] logger.info(f"Test time per example: [{test_time / test_count} sec]") if prediction.dim() == 1: prediction = prediction.unsqueeze(1) if cfg['model']['use_cuda'][plat][user] and cuda.is_available(): prediction = prediction.to(device) loss = loss_func(prediction, label) preds.append(prediction.detach()) trues.append(label.detach()) losses.append(loss.detach()) losses = mean(stack(losses)) preds = cat(preds) ## Converting raw scores to probabilities using Sigmoid: preds = sigmoid(preds) ## Converting probabilities to class labels: preds = logit2label(preds.detach(), cls_thresh=0.5) trues = cat(trues) if n_classes == 1: result_dict = calculate_performance_bin_sk(trues, preds) else: result_dict = calculate_performance(trues, preds) test_output = {'preds': preds, 'trues': trues, 'result': result_dict} # logger.info(dumps(result_dict, indent=4)) return losses, test_output
def train_graph_classifier( model: GAT_Graph_Classifier, data_loader: torch.utils.data.dataloader.DataLoader, loss_func: torch.nn.modules.loss.BCEWithLogitsLoss, optimizer, epochs: int = 5, eval_data_loader: torch.utils.data.dataloader.DataLoader = None): train_epoch_losses = [] train_epoch_dict = OrderedDict() for epoch in range(epochs): model.train() epoch_loss = 0 preds = [] trues = [] for iter, (graph_batch, label) in enumerate(data_loader): ## Store emb in a separate file as self_loop removes emb info: emb = graph_batch.ndata['emb'] # graph_batch = dgl.add_self_loop(graph_batch) prediction = model(graph_batch, emb) loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.detach().item() preds.append(prediction.detach()) trues.append(label.detach()) epoch_loss /= (iter + 1) losses, test_output = test_graph_classifier( model, loss_func=loss_func, data_loader=eval_data_loader) logger.info( f"Epoch {epoch}, Train loss {epoch_loss}, Eval loss {losses}," f" Macro F1 {test_output['result']['f1']['macro'].item()}") # logger.info(dumps(test_output['result'], indent=4)) train_epoch_losses.append(epoch_loss) preds = torch.cat(preds) ## Converting raw scores to probabilities using Sigmoid: preds = torch.sigmoid(preds) ## Converting probabilities to class labels: preds = logit2label(preds.detach(), cls_thresh=0.5) trues = torch.cat(trues) result_dict = calculate_performance(trues, preds) # logger.info(dumps(result_dict, indent=4)) train_epoch_dict[epoch] = { 'preds': preds, 'trues': trues, 'result': result_dict } # logger.info(f'Epoch {epoch} result: \n{result_dict}') return train_epoch_losses, train_epoch_dict
def train_graph_classifier( model, G, X, data_loader: utils.data.dataloader.DataLoader, loss_func: nn.modules.loss.BCEWithLogitsLoss, optimizer, epochs: int = 5, eval_data_loader: utils.data.dataloader.DataLoader = None, test_data_loader: utils.data.dataloader.DataLoader = None, n_classes=cfg['data']['num_classes']): logger.info("Started training...") train_epoch_losses = [] train_epoch_dict = OrderedDict() for epoch in range(epochs): model.train() epoch_loss = 0 preds = [] trues = [] for iter, (graph_batch, local_ids, label, global_ids, node_counts) in enumerate(data_loader): ## Store emb in a separate file as self_loop removes emb info: emb = graph_batch.ndata['emb'] # graph_batch = dgl.add_self_loop(graph_batch) if cfg['model']['use_cuda'][plat][user] and cuda.is_available(): graph_batch = graph_batch.to(device) emb = emb.to(device) # local_ids = local_ids.to(device) # node_counts = node_counts.to(device) # global_ids = global_ids.to(device) G = G.to(device) X = X.to(device) start_time = timeit.default_timer() prediction = model(graph_batch, emb, local_ids, node_counts, global_ids, G, X) # if epoch == 30: # from evaluations import get_freq_disjoint_token_vecs, plot # glove_vecs = get_freq_disjoint_token_vecs(S_vocab, T_vocab, X) # plot(glove_vecs) if cfg['model']['use_cuda'][plat][user] and cuda.is_available(): prediction = prediction.to(device) if prediction.dim() == 1: prediction = prediction.unsqueeze(1) loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward() optimizer.step() train_time = timeit.default_timer() - start_time train_count = label.shape[0] logger.info( f"Training time per example: [{train_time / train_count} sec]") logger.info(f"Iteration {iter}, loss: {loss.detach().item()}") epoch_loss += loss.detach().item() preds.append(prediction.detach()) trues.append(label.detach()) epoch_loss /= (iter + 1) val_losses, val_output = eval_graph_classifier( model, G, X, loss_func=loss_func, data_loader=eval_data_loader) logger.info(f'val_output: \n{dumps(val_output["result"], indent=4)}') test_losses, test_output = eval_graph_classifier( model, G, X, loss_func=loss_func, data_loader=test_data_loader) logger.info(f'test_output: \n{dumps(test_output["result"], indent=4)}') logger.info( f"Epoch {epoch}, Train loss {epoch_loss}, val loss " f"{val_losses}, test loss {test_losses}, Val Macro F1 " f"{val_output['result']['f1']['macro'].item()} Test Macro F1" f" {test_output['result']['f1']['macro'].item()}") # logger.info(f"Epoch {epoch}, Train loss {epoch_loss}, val loss " # f"{val_losses}, Val Macro F1 {val_output['result']['f1']['macro'].item()}") train_epoch_losses.append(epoch_loss) preds = cat(preds) ## Converting raw scores to probabilities using Sigmoid: preds = sigmoid(preds) ## Converting probabilities to class labels: preds = logit2label(preds.detach(), cls_thresh=0.5) trues = cat(trues) if n_classes == 1: result_dict = calculate_performance_bin_sk(trues, preds) else: result_dict = calculate_performance(trues, preds) # logger.info(dumps(result_dict, indent=4)) train_epoch_dict[epoch] = { 'preds': preds, 'trues': trues, 'result': result_dict } # logger.info(f'Epoch {epoch} result: \n{result_dict}') return train_epoch_losses, train_epoch_dict
def predict_with_label(model, iterator, criterion=None, metric=True): """ Predicts and calculates performance. Labels mandatory Args: model: iterator: criterion: Returns: """ # initialize every epoch epoch_loss = 0 if criterion is None: criterion = nn.BCEWithLogitsLoss() preds_trues = { 'preds': [], 'trues': [], 'ids': [], 'losses': [], 'results': [] } # deactivating dropout layers model.eval() # deactivates autograd with no_grad(): for i, batch in enumerate(iterator): # retrieve text and no. of words text, text_lengths = batch.text # convert to 1d tensor predictions = model(text, text_lengths).squeeze() # compute loss and accuracy batch_labels = torchtext_batch2multilabel(batch) preds_trues['preds'].append(predictions) preds_trues['trues'].append(batch_labels) preds_trues['ids'].append(batch.ids) loss = criterion(predictions, batch_labels) # keep track of loss and accuracy epoch_loss += loss.item() preds_trues['losses'].append(epoch_loss) # epoch_acc += acc.item() # epoch_acc += acc["accuracy"]["unnormalize"] if metric: ## Converting raw scores to probabilities using Sigmoid: preds = sigmoid(predictions) ## Converting probabilities to class labels: preds = logit2label(preds.detach(), cls_thresh=0.5) trues = cat(preds_trues['trues']) result_dict = calculate_performance(trues, preds) preds_trues['preds'] = cat(preds_trues['preds']) preds_trues['trues'] = cat(preds_trues['trues']) preds_trues['ids'] = cat(preds_trues['ids']) preds_trues['losses'] = cat(preds_trues['losses']) return epoch_loss / len(iterator), preds_trues