예제 #1
0
def test_graph_classifier(model: GAT_Graph_Classifier, loss_func,
                          data_loader: torch.utils.data.dataloader.DataLoader):
    model.eval()
    preds = []
    trues = []
    losses = []
    for iter, (graph_batch, label) in enumerate(data_loader):
        ## Store emb in a separate file as self_loop removes emb info:
        emb = graph_batch.ndata['emb']
        # graph_batch = dgl.add_self_loop(graph_batch)
        prediction = model(graph_batch, emb)
        loss = loss_func(prediction, label)
        preds.append(prediction.detach())
        trues.append(label.detach())
        losses.append(loss.detach())
    losses = torch.mean(torch.stack(losses))
    preds = torch.cat(preds)

    ## Converting raw scores to probabilities using Sigmoid:
    preds = torch.sigmoid(preds)

    ## Converting probabilities to class labels:
    preds = logit2label(preds.detach(), cls_thresh=0.5)
    trues = torch.cat(trues)
    result_dict = calculate_performance(trues, preds)
    test_output = {'preds': preds, 'trues': trues, 'result': result_dict}
    # logger.info(dumps(result_dict, indent=4))

    return losses, test_output
def eval_graph_classifier(model: GAT_GCN_Classifier,
                          G,
                          X,
                          loss_func,
                          data_loader: utils.data.dataloader.DataLoader,
                          n_classes=cfg['data']['num_classes'],
                          save_gcn_embs=False):
    model.eval()
    preds = []
    trues = []
    losses = []
    for iter, (graph_batch, local_ids, label, global_ids,
               node_counts) in enumerate(data_loader):
        ## Store emb in a separate file as self_loop removes emb info:
        emb = graph_batch.ndata['emb']
        # graph_batch = dgl.add_self_loop(graph_batch)
        if cfg['model']['use_cuda'][plat][user] and cuda.is_available():
            graph_batch = graph_batch.to(device)
            emb = emb.to(device)
            # local_ids = local_ids.to(device)
            # node_counts = node_counts.to(device)
            # global_ids = global_ids.to(device)
            G = G.to(device)
            X = X.to(device)
        if save_gcn_embs:
            save(X, 'X_glove.pt')
        start_time = timeit.default_timer()
        prediction = model(graph_batch, emb, local_ids, node_counts,
                           global_ids, G, X, save_gcn_embs)
        test_time = timeit.default_timer() - start_time
        test_count = label.shape[0]
        logger.info(f"Test time per example: [{test_time / test_count} sec]")
        if prediction.dim() == 1:
            prediction = prediction.unsqueeze(1)
        if cfg['model']['use_cuda'][plat][user] and cuda.is_available():
            prediction = prediction.to(device)
        loss = loss_func(prediction, label)
        preds.append(prediction.detach())
        trues.append(label.detach())
        losses.append(loss.detach())
    losses = mean(stack(losses))
    preds = cat(preds)

    ## Converting raw scores to probabilities using Sigmoid:
    preds = sigmoid(preds)

    ## Converting probabilities to class labels:
    preds = logit2label(preds.detach(), cls_thresh=0.5)
    trues = cat(trues)
    if n_classes == 1:
        result_dict = calculate_performance_bin_sk(trues, preds)
    else:
        result_dict = calculate_performance(trues, preds)
    test_output = {'preds': preds, 'trues': trues, 'result': result_dict}
    # logger.info(dumps(result_dict, indent=4))

    return losses, test_output
예제 #3
0
def train_graph_classifier(
        model: GAT_Graph_Classifier,
        data_loader: torch.utils.data.dataloader.DataLoader,
        loss_func: torch.nn.modules.loss.BCEWithLogitsLoss,
        optimizer,
        epochs: int = 5,
        eval_data_loader: torch.utils.data.dataloader.DataLoader = None):
    train_epoch_losses = []
    train_epoch_dict = OrderedDict()
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        preds = []
        trues = []
        for iter, (graph_batch, label) in enumerate(data_loader):
            ## Store emb in a separate file as self_loop removes emb info:
            emb = graph_batch.ndata['emb']
            # graph_batch = dgl.add_self_loop(graph_batch)
            prediction = model(graph_batch, emb)
            loss = loss_func(prediction, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.detach().item()
            preds.append(prediction.detach())
            trues.append(label.detach())
        epoch_loss /= (iter + 1)
        losses, test_output = test_graph_classifier(
            model, loss_func=loss_func, data_loader=eval_data_loader)
        logger.info(
            f"Epoch {epoch}, Train loss {epoch_loss}, Eval loss {losses},"
            f" Macro F1 {test_output['result']['f1']['macro'].item()}")
        # logger.info(dumps(test_output['result'], indent=4))
        train_epoch_losses.append(epoch_loss)
        preds = torch.cat(preds)

        ## Converting raw scores to probabilities using Sigmoid:
        preds = torch.sigmoid(preds)

        ## Converting probabilities to class labels:
        preds = logit2label(preds.detach(), cls_thresh=0.5)
        trues = torch.cat(trues)
        result_dict = calculate_performance(trues, preds)
        # logger.info(dumps(result_dict, indent=4))
        train_epoch_dict[epoch] = {
            'preds': preds,
            'trues': trues,
            'result': result_dict
        }
        # logger.info(f'Epoch {epoch} result: \n{result_dict}')

    return train_epoch_losses, train_epoch_dict
def train_graph_classifier(
        model,
        G,
        X,
        data_loader: utils.data.dataloader.DataLoader,
        loss_func: nn.modules.loss.BCEWithLogitsLoss,
        optimizer,
        epochs: int = 5,
        eval_data_loader: utils.data.dataloader.DataLoader = None,
        test_data_loader: utils.data.dataloader.DataLoader = None,
        n_classes=cfg['data']['num_classes']):
    logger.info("Started training...")
    train_epoch_losses = []
    train_epoch_dict = OrderedDict()
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        preds = []
        trues = []
        for iter, (graph_batch, local_ids, label, global_ids,
                   node_counts) in enumerate(data_loader):
            ## Store emb in a separate file as self_loop removes emb info:
            emb = graph_batch.ndata['emb']
            # graph_batch = dgl.add_self_loop(graph_batch)
            if cfg['model']['use_cuda'][plat][user] and cuda.is_available():
                graph_batch = graph_batch.to(device)
                emb = emb.to(device)
                # local_ids = local_ids.to(device)
                # node_counts = node_counts.to(device)
                # global_ids = global_ids.to(device)
                G = G.to(device)
                X = X.to(device)
            start_time = timeit.default_timer()
            prediction = model(graph_batch, emb, local_ids, node_counts,
                               global_ids, G, X)
            # if epoch == 30:
            #     from evaluations import get_freq_disjoint_token_vecs, plot
            #     glove_vecs = get_freq_disjoint_token_vecs(S_vocab, T_vocab, X)
            #     plot(glove_vecs)
            if cfg['model']['use_cuda'][plat][user] and cuda.is_available():
                prediction = prediction.to(device)
            if prediction.dim() == 1:
                prediction = prediction.unsqueeze(1)
            loss = loss_func(prediction, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_time = timeit.default_timer() - start_time
            train_count = label.shape[0]
            logger.info(
                f"Training time per example: [{train_time / train_count} sec]")
            logger.info(f"Iteration {iter}, loss: {loss.detach().item()}")
            epoch_loss += loss.detach().item()
            preds.append(prediction.detach())
            trues.append(label.detach())
        epoch_loss /= (iter + 1)
        val_losses, val_output = eval_graph_classifier(
            model, G, X, loss_func=loss_func, data_loader=eval_data_loader)
        logger.info(f'val_output: \n{dumps(val_output["result"], indent=4)}')
        test_losses, test_output = eval_graph_classifier(
            model, G, X, loss_func=loss_func, data_loader=test_data_loader)
        logger.info(f'test_output: \n{dumps(test_output["result"], indent=4)}')
        logger.info(
            f"Epoch {epoch}, Train loss {epoch_loss}, val loss "
            f"{val_losses}, test loss {test_losses}, Val Macro F1 "
            f"{val_output['result']['f1']['macro'].item()} Test Macro F1"
            f" {test_output['result']['f1']['macro'].item()}")
        # logger.info(f"Epoch {epoch}, Train loss {epoch_loss}, val loss "
        #             f"{val_losses}, Val Macro F1 {val_output['result']['f1']['macro'].item()}")
        train_epoch_losses.append(epoch_loss)
        preds = cat(preds)

        ## Converting raw scores to probabilities using Sigmoid:
        preds = sigmoid(preds)

        ## Converting probabilities to class labels:
        preds = logit2label(preds.detach(), cls_thresh=0.5)
        trues = cat(trues)
        if n_classes == 1:
            result_dict = calculate_performance_bin_sk(trues, preds)
        else:
            result_dict = calculate_performance(trues, preds)
        # logger.info(dumps(result_dict, indent=4))
        train_epoch_dict[epoch] = {
            'preds': preds,
            'trues': trues,
            'result': result_dict
        }
        # logger.info(f'Epoch {epoch} result: \n{result_dict}')

    return train_epoch_losses, train_epoch_dict
예제 #5
0
def predict_with_label(model, iterator, criterion=None, metric=True):
    """ Predicts and calculates performance. Labels mandatory

    Args:
        model:
        iterator:
        criterion:

    Returns:

    """
    # initialize every epoch
    epoch_loss = 0

    if criterion is None:
        criterion = nn.BCEWithLogitsLoss()

    preds_trues = {
        'preds': [],
        'trues': [],
        'ids': [],
        'losses': [],
        'results': []
    }

    # deactivating dropout layers
    model.eval()

    # deactivates autograd
    with no_grad():
        for i, batch in enumerate(iterator):
            # retrieve text and no. of words
            text, text_lengths = batch.text

            # convert to 1d tensor
            predictions = model(text, text_lengths).squeeze()

            # compute loss and accuracy
            batch_labels = torchtext_batch2multilabel(batch)
            preds_trues['preds'].append(predictions)
            preds_trues['trues'].append(batch_labels)
            preds_trues['ids'].append(batch.ids)
            loss = criterion(predictions, batch_labels)

            # keep track of loss and accuracy
            epoch_loss += loss.item()
            preds_trues['losses'].append(epoch_loss)
            # epoch_acc += acc.item()
            # epoch_acc += acc["accuracy"]["unnormalize"]
        if metric:
            ## Converting raw scores to probabilities using Sigmoid:
            preds = sigmoid(predictions)

            ## Converting probabilities to class labels:
            preds = logit2label(preds.detach(), cls_thresh=0.5)
            trues = cat(preds_trues['trues'])
            result_dict = calculate_performance(trues, preds)

        preds_trues['preds'] = cat(preds_trues['preds'])
        preds_trues['trues'] = cat(preds_trues['trues'])
        preds_trues['ids'] = cat(preds_trues['ids'])
        preds_trues['losses'] = cat(preds_trues['losses'])

    return epoch_loss / len(iterator), preds_trues