def predict_texts_nn(nn_model,
                         train_loader,
                         title_name=None,
                         num_classes=2):
        # Set up GPU
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        answer = []
        prediction = []
        start = time.time()
        with torch.no_grad():
            for batch in train_loader:
                text_tensor = torch.as_tensor(batch[0]).to(device)
                label_tensor = torch.as_tensor(batch[1]).to(device)

                score = nn_model(text_tensor)
                _, pred = torch.max(score, 1)

                prediction += list(pred.cpu().numpy())
                answer += list(label_tensor.cpu().numpy())

        end = time.time()
        print("time : ", end - start)

        # print classification report
        print(classification_report(prediction, answer))
        print("predicted label: ", set(prediction))

        # model evaluation
        plot_confusion_matrix_heatmap(answer, prediction,
                                      "confusion matrix {}".format(title_name))
        plot_roc_auc(answer, prediction, title_name, num_classes)
        return
Esempio n. 2
0
    def predict_embedding(model, device, train_iter, title_name=None, num_classes=2):
        # # Set up GPU
        # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      

        answer = []
        prediction = []
        start = time.time()
        with torch.no_grad():
            for data in (train_iter):
                label_tensor = data.label.to(device)
                if len(data.text)==2:      
                    text_tensor = data.text[0].to(device)
                else:
                    text_tensor = data.text.T.to(device)

                score = model(text_tensor)
                _, pred = torch.max(score, 1)

                prediction += list(pred.cpu().numpy())
                answer += list(label_tensor.cpu().numpy())

        end = time.time()
        print ("time : ", end - start)
        
        # print classification report
        print(classification_report(prediction, answer))
        print("predicted label: ", set(prediction))

        # model evaluation
        plot_confusion_matrix_heatmap(answer, prediction, "confusion matrix {}".format(title_name))
        plot_roc_auc(answer, prediction, title_name, num_classes)
        
        # # release GPU memory
        # torch.cuda.empty_cache()        
        return 
    def predict(model,
                device,
                batch_iter,
                token_name="distilbert",
                title_name=None,
                num_classes=2):
        # # Set up GPU
        # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        answer = []
        prediction = []

        start = time.time()
        with torch.no_grad():
            for batch in (batch_iter):
                label = batch.label.to(device)
                if token_name == "distilbert":
                    text = batch.text[0].to(device)
                else:
                    text = batch.text[0].T.to(device)
                text_lengths = batch.text[1].to(device)

                score = model(text, text_lengths)
                _, pred = torch.max(score, 1)

                prediction += list(pred.cpu().numpy())
                answer += list(label.cpu().numpy())

        end = time.time()
        print("time : ", end - start)

        # print classification report
        print(classification_report(prediction, answer))
        print("predicted label: ", set(prediction))

        # model evaluation
        plot_confusion_matrix_heatmap(answer, prediction,
                                      "confusion matrix {}".format(title_name))
        plot_roc_auc(answer, prediction, title_name, num_classes)

        # # release GPU memory
        # torch.cuda.empty_cache()
        return
    def predict_texts_logreg(logreg, test_text, test_label, title_name=None):
        test_list = []
        for i in range(len(test_text)):
            test_list.append(model.infer_vector(test_text[i]))
        test_x = np.asarray(test_list)
        test_Y = np.asarray(test_label)
        test_y = le.transform(test_Y)
        preds = logreg.predict(test_x)

        # print classification report
        print(classification_report(preds, test_y))
        print("predicted label: ", set(preds))

        # plot confusion matrix
        plot_confusion_matrix_heatmap(test_y, preds,
                                      "confusion matrix {}".format(title_name))
        try:
            plot_roc_auc(test_y, preds, title_name)
        except:
            pass
        return