def accuracy(config):
    data = pd.read_csv(config.data_path)
    inputs, targets = dataloader.process(data, window_size=5)

    device = torch.device(config.device)

    n = len(targets)
    train_inputs, train_targets = inputs[:round(.8 * n)].to(
        device), targets[:round(.8 * n)].to(device)
    test_inputs, test_targets = inputs[round(.8 * n):].to(
        device), targets[round(.8 * n):].to(device)

    print("Initializing LSTM model...")
    model = MoodPredictionModel(config.input_length, config.input_dim,
                                config.num_hidden,
                                config.num_layers).to(device)

    print("Loading model parameters from trained model")
    model.load_state_dict(torch.load(config.saved_model))
    model.eval()
    print(model)

    train_out = model(train_inputs).squeeze()
    test_out = model(test_inputs).squeeze()

    correct_train = (abs(train_out - train_targets) <
                     config.corr_thres).sum().item()
    train_accuracy = correct_train / train_out.size(0)

    correct_test = (abs(test_out - test_targets) <
                    config.corr_thres).sum().item()
    test_accuracy = correct_test / test_out.size(0)

    print(f"Accuracy on training set: {train_accuracy*100}%")
    print(f"Accuracy on testing set: {test_accuracy*100}%")
Beispiel #2
0
def classify_coqa(args, device):
    args.config.batch_size = 1
    config = args.config

    model = ELMoClassifier(config, device)
    model.cuda()
    # If the saving directory has no checkpoints, this function will not do anything
    load_weights(model, args.best_dir)

    with open('data/specificity_qa_dataset/dev.pickle', 'rb') as f:
        data = pickle.load(f)

    with torch.no_grad():
        model.eval()
        correct_hand = 0
        incorrect_hand = 0
        correct_rule = 0
        incorrect_rule = 0
        class_map = ['overview', 'conceptual']
        for i, instance in enumerate(data):
            if instance['dataset'] == 'quac':
                continue
            if i % 100 == 0:
                print("%d / %d" % (i, len(data)))
            for para in instance['paragraphs']:
                for qa in para['qas']:
                    if qa['high_low_mode'] == 'rules':
                        continue
                    if qa['high_low'] == 'overview' or qa[
                            'high_low'] == 'conceptual':
                        _, preds, _ = model({
                            'question': [process(qa['question'])],
                            'class':
                            torch.LongTensor([0])
                        })
                        if qa['high_low_mode'] == 'hand' and class_map[
                                preds.item()] == qa['high_low']:
                            correct_hand += 1
                        elif qa['high_low_mode'] == 'rules' and class_map[
                                preds.item()] == qa['high_low']:
                            correct_rule += 1
                        elif qa['high_low_mode'] == 'hand' and class_map[
                                preds.item()] != qa['high_low']:
                            incorrect_hand += 1
                        elif qa['high_low_mode'] == 'rules' and class_map[
                                preds.item()] != qa['high_low']:
                            incorrect_rule += 1
        print("%d / %d correct for hand" %
              (correct_hand, correct_hand + incorrect_hand))
        print("%d / %d correct for rules" %
              (correct_rule, correct_rule + incorrect_rule))
        print("%d / %d total correct" %
              (correct_rule + correct_hand,
               correct_rule + incorrect_rule + correct_hand + incorrect_hand))
Beispiel #3
0
def classify_final(args, device):
    args.config.batch_size = 1
    config = args.config

    model = ELMoClassifier(config, device)
    model.cuda()
    # If the saving directory has no checkpoints, this function will not do anything
    load_weights(model, args.best_dir)

    correct_class = {'gold': 0, 'gen': 0}
    questions_so_far = {'gold': {}, 'gen': {}}
    total = {'gold': 0, 'gen': 0}

    with torch.no_grad():
        model.eval()
        with open(
                'doc2qa/final/final_crowd/results/question_more_relevant.csv',
                'r') as f:
            data = csv.reader(f)
            for i, row in enumerate(data):
                if row[8][:3] == 'Zen':
                    print("yolo")
                    continue
                if i == 0:
                    continue
                if row[2] == 'golden':
                    continue
                for ques, tag in zip([row[12], row[13]], [row[16], row[17]]):
                    if ques in questions_so_far[row[9]]:
                        continue
                    total[row[9]] += 1
                    auto_label = labeller(ques)
                    if auto_label == 'none':
                        _, preds, _ = model({
                            'question': [process(ques)],
                            'class': torch.LongTensor([0])
                        })
                        if preds.item() == 0:
                            auto_label = 'overview'
                        else:
                            auto_label = 'conceptual'
                    questions_so_far[row[9]][ques] = 1
                    if auto_label in [
                            'overview', 'causal', 'instrumental', 'judgemental'
                    ] and tag == 'high':
                        print(auto_label)
                        correct_class[row[9]] += 1
                    elif auto_label == 'conceptual' and tag == 'low':
                        correct_class[row[9]] += 1
            print("Gold correct class = %d / %d" %
                  (correct_class['gold'], total['gold']))
            print("Gen correct class = %d / %d" %
                  (correct_class['gen'], total['gen']))
Beispiel #4
0
def classify(args, device):
    args.config.batch_size = 1
    config = args.config

    model = ELMoClassifier(config, device)
    model.cuda()
    # If the saving directory has no checkpoints, this function will not do anything
    load_weights(model, args.best_dir)

    with open('data/specificity_qa_dataset/dev.pickle', 'rb') as f:
        data = pickle.load(f)

    with torch.no_grad():
        model.eval()
        for i, instance in enumerate(data):
            if instance['dataset'] == 'quac' or instance['dataset'] == 'coqa':
                continue
            if i % 1 == 0:
                print("%d / %d" % (i, len(data)))
            for para in instance['paragraphs']:
                for qa in para['qas']:
                    if qa['high_low_mode'] == 'idk':
                        if len(qa['question'].strip()) == 0:
                            qa['high_low'] = 'overview'
                        else:
                            _, preds, _ = model({
                                'question': [process(qa['question'])],
                                'class':
                                torch.LongTensor([0])
                            })
                            qa['high_low_mode'] = 'classifier'
                            qa['high_low'] = 'overview' if preds.item(
                            ) == 0 else 'conceptual'

    with open('data/specificity_qa_dataset/dev.pickle', 'wb') as f:
        pickle.dump(data, f)
Beispiel #5
0
                        type=int,
                        default=50,
                        metavar='N',
                        help='input batch size for testing (default: 50)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.00001,
                        metavar='LR',
                        help='learning rate (default: 0.00001)')
    args = parser.parse_args()  # load parameters

    torch.manual_seed(20)  # generate random seeds for shuffle dataset
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # get data from folders and txt files
    trainData, trainGT, trainLabel, testData, testGT, testLabel = process(
        class_list, rote)
    print("------------Data Load Finished !!------------")

    # create my dataset by override torch.utils.data.DataSet
    train_dataset = MyDataLoader(trainData, trainGT, trainLabel)
    test_dataset = MyDataLoader(testData, testGT, testLabel)

    # dataloader to pytorch network
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=args.test_batch,
                             shuffle=True)
    eval_loader = DataLoader(dataset=test_dataset, shuffle=False)
Beispiel #6
0
import pandas as pd
import torch

import dataloader

if __name__ == "__main__":
    data = pd.read_csv("./dataset_mood_smartphone.csv")
    _, targets = dataloader.process(data)

    targets = torch.round(targets)

    n = len(targets)
    train_targets, test_targets = targets[:round(.8 * n)], targets[round(.8 *
                                                                         n):]

    train_diff = train_targets[:-1] == train_targets[1:]
    test_diff = test_targets[:-1] == test_targets[1:]

    train_accuracy = sum(train_diff) / len(train_diff)
    test_accuracy = sum(test_diff) / len(test_diff)

    print(f"Accuracy on training set: {train_accuracy}")
    print(f"Accuracy on testing set: {test_accuracy}")
Beispiel #7
0
import torch
import pandas as pd
from sklearn import svm

import dataloader

if __name__ == "__main__":
    data = pd.read_csv("./dataset_mood_smartphone.csv")
    inputs, targets = dataloader.process(data, window_size=2)

    inputs = inputs.mean(dim=1)
    targets = torch.round(targets)

    n = len(targets)
    train_inputs, train_targets = inputs[:round(.8 * n)], targets[:round(.8 *
                                                                         n)]
    test_inputs, test_targets = inputs[round(.8 * n):], targets[round(.8 * n):]

    clf = svm.SVC()
    clf.fit(train_inputs, train_targets)

    train_out = clf.predict(train_inputs)
    test_out = clf.predict(test_inputs)

    train_diff = torch.tensor(train_out) == train_targets
    test_diff = torch.tensor(test_out) == test_targets

    train_accuracy = sum(train_diff) / len(train_diff)
    test_accuracy = sum(test_diff) / len(test_diff)

    print(f"Accuracy on training set: {train_accuracy}")