Ejemplo n.º 1
0
def test():
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--device", default="cpu")
    parser.add_argument("-i", "--hidden-size", type=int, default=256)
    parser.add_argument("-o", "--outputFolder", type=str)

    args = parser.parse_args()
    args.device = torch.device(args.device)

    model = MyModel0(len(VOCAB), 16, args.hidden_size).to(args.device)
    dataset = MyDataset(None, args.device, test_path="/input/data/test_dict2.pth")
    k = [x for x in dataset.test_dict.keys()][0]
    dataset.test_dict[k] = dataset.test_dict[k].replace("\t", " ")

    model.load_state_dict(torch.load("/input/model.pth", map_location='cpu'))

    model.eval()
    with torch.no_grad():
        for key in dataset.test_dict.keys():
            text_tensor = dataset.get_test_data(key)

            oupt = model(text_tensor)
            prob = torch.nn.functional.softmax(oupt, dim=2)
            prob, pred = torch.max(prob, dim=2)

            prob = prob.squeeze().cpu().numpy()
            pred = pred.squeeze().cpu().numpy()

            real_text = dataset.test_dict[key]
            result = pred_to_dict(real_text, pred, prob)

            with open(args.outputFolder + 'result' + ".json", "w", encoding="utf-8") as json_opened:
                json.dump(result, json_opened, indent=4)

            print(key)
Ejemplo n.º 2
0
def test():
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--device", default="cpu")
    parser.add_argument("-i", "--hidden-size", type=int, default=256)

    args = parser.parse_args()
    args.device = torch.device(args.device)

    model = MyModel0(len(VOCAB), 16, args.hidden_size).to(args.device)
    dataset = MyDataset(None, args.device, test_path="data/testdict.pth")

    model.load_state_dict(torch.load("Bi-LSTM_model.pth"))

    model.eval()
    with torch.no_grad():
        for k in dataset.testdict.ks():
            texttensor = dataset.get_test_data(k)

            oupt = model(texttensor)
            prob = torch.nn.functional.softmax(oupt, dim=2)
            prob, pred = torch.max(prob, dim=2)

            prob = prob.squeeze().cpu().numpy()
            pred = pred.squeeze().cpu().numpy()

            realtext = dataset.testdict[k]
            result = pred_to_dict(realtext, pred, prob)

            with open("results/" + k + ".json", "w",
                      encoding="utf-8") as jsonopened:
                json.dump(result, jsonopened, indent=4)

            print(k)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--device", default="cpu")
    parser.add_argument("-b", "--batch_size", type=int, default=16)
    parser.add_argument("-e", "--max_epoch", type=int, default=1500)
    parser.add_argument("-v", "--val-at", type=int, default=100)
    parser.add_argument("-i", "--hidden-size", type=int, default=256)
    parser.add_argument("--val-size", type=int, default=76)

    args = parser.parse_args()
    args.device = torch.device(args.device)

    model = MyModel0(len(VOCAB), 16, args.hidden_size).to(args.device)

    dataset = MyDataset(
        "data/data_dict4.pth",
        args.device,
        val_size=args.val_size,
        test_path="data/test_dict.pth",
    )

    criterion = nn.CrossEntropyLoss(
        weight=torch.tensor([0.1, 1, 1.2, 0.8, 1.5], device=args.device))
    optimizer = optim.Adam(model.parameters())
    scheduler = optim.lr_scheduler.StepLR(optimizer, 1000)

    for i in range(args.max_epoch // args.val_at):
        train(
            model,
            dataset,
            criterion,
            optimizer,
            (i * args.val_at + 1, (i + 1) * args.val_at + 1),
            args.batch_size,
        )
        # validate(model, dataset)

    validate(model, dataset, batch_size=76)

    torch.save(model.state_dict(), "model.pth")

    model.eval()
    with torch.no_grad():
        for key in dataset.test_dict.keys():
            text_tensor = dataset.get_test_data(key)

            oupt = model(text_tensor)
            prob = torch.nn.functional.softmax(oupt, dim=2)
            prob, pred = torch.max(prob, dim=2)

            prob = prob.squeeze().cpu().numpy()
            pred = pred.squeeze().cpu().numpy()

            real_text = dataset.test_dict[key]
            result = pred_to_dict(real_text, pred, prob)

            with open("results/" + key + ".json", "w",
                      encoding="utf-8") as json_opened:
                json.dump(result, json_opened, indent=4)
Ejemplo n.º 4
0
def validate(model, dataset, batch_size=1, print_size=10):
    model.eval()
    with torch.no_grad():
        keys, text, truth = dataset.get_val_data(batch_size=batch_size)

        oupt = model(text)
        prob = torch.nn.functional.softmax(oupt, dim=2)
        prob, pred = torch.max(prob, dim=2)

        prob = prob.cpu().numpy()
        pred = pred.cpu().numpy()

        class_acc = 0.0
        char_acc = 0.0

        for i, key in enumerate(keys):
            real_text, real_label = dataset.val_dict[key]
            result = pred_to_dict(real_text, pred[:, i], prob[:, i])
            ground_truth = truth_to_dict(real_text, real_label)

            class_acc_unit = calc_accuracy(result, ground_truth)
            char_acc_unit = compare_truth(result, ground_truth)
            class_acc += class_acc_unit
            char_acc += char_acc_unit

            if i < print_size:
                print("====== Val. number %d ======" % i)
                for k, v in result.items():
                    print(f"{k:>8}: {v}")
                print()

                for k, v in ground_truth.items():
                    print(f"{k:>8}: {v}")

                print("-ACCURACY(Class): %.2f" % class_acc_unit)
                print("-ACCURACY(Char) : %.2f" % char_acc_unit)
                print()

                color_print(real_text, pred[:, i])
                print("============================")
                print()

        print("=ACCURACY(Class): %.2f" % (class_acc * 100 / batch_size))
        print("=ACCURACY(Char) : %.2f" % (char_acc * 100 / batch_size))
Ejemplo n.º 5
0
def validate(model, dataset, batch_size=1):
    model.eval()
    with torch.no_grad():
        keys, text, truth = dataset.get_val_data(batch_size=batch_size)

        oupt = model(text)
        prob = torch.nn.functional.softmax(oupt, dim=2)
        prob, pred = torch.max(prob, dim=2)

        prob = prob.cpu().numpy()
        pred = pred.cpu().numpy()

        for i, key in enumerate(keys):
            real_text, _ = dataset.val_dict[key]
            result = pred_to_dict(real_text, pred[:, i], prob[:, i])

            for k, v in result.items():
                print(f"{k:>8}: {v}")

            color_print(real_text, pred[:, i])
Ejemplo n.º 6
0
def test():
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--device", default="cpu")
    parser.add_argument("-i", "--hidden-size", type=int, default=256)
    parser.add_argument('--name', type=str, default="")
    opt = parser.parse_args()
    savename = opt.name.split(".")[0]

    args = parser.parse_args()
    args.device = torch.device(args.device)

    model = MyModel0(len(VOCAB), 16, args.hidden_size).to(args.device)
    dataset = MyDataset(None, args.device, test_path="C:\\Users\\lenovo\\Desktop\\Class\\Network\\Project\\task3\\data/test_dict.pth")

    model.load_state_dict(torch.load("C:\\Users\\lenovo\\Desktop\\Class\\Network\\Project\\task3\\src\\model.pth"))

    model.eval()
    with torch.no_grad():
        for key in dataset.test_dict.keys():
            if key != savename:
                continue
            text_tensor = dataset.get_test_data(key)

            oupt = model(text_tensor)
            prob = torch.nn.functional.softmax(oupt, dim=2)
            prob, pred = torch.max(prob, dim=2)

            prob = prob.squeeze().cpu().numpy()
            pred = pred.squeeze().cpu().numpy()

            real_text = dataset.test_dict[key]
            result = pred_to_dict(real_text, pred, prob)

            with open("results/" + key + ".json", "w", encoding="utf-8") as json_opened:
                json.dump(result, json_opened, indent=4)

            print(key)
Ejemplo n.º 7
0
def inference(text):
    text[0] = preprocess(text[0])
    device = torch.device("cpu")
    hidden_size = 256
    model = MyModel0(len(VOCAB), 16, hidden_size).to(device)
    model.load_state_dict(
        torch.load("model.pth", map_location=torch.device('cpu')))

    #text = ["shubham bisht, something happens"]
    text_tensor = torch.zeros(len(text[0]), 1, dtype=torch.long)
    text_tensor[:,
                0] = torch.LongTensor([VOCAB.find(c) for c in text[0].upper()])
    #print(text_tensor)
    inp = text_tensor.to(device)

    oupt = model(inp)
    prob = torch.nn.functional.softmax(oupt, dim=2)
    prob, pred = torch.max(prob, dim=2)

    color_print(text[0], pred)
    json = pred_to_dict(text[0], pred, prob)
    print("\n###########################\n")
    print(json)
    return json
Ejemplo n.º 8
0
def main():
    # read the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--device", default="cpu")
    parser.add_argument("-b", "--batch_size", type=int, default=10)
    parser.add_argument("-e", "--max_epoch", type=int, default=2500)
    parser.add_argument("-v", "--val-at", type=int, default=100)
    parser.add_argument("-i", "--hidden-size", type=int, default=256)
    parser.add_argument("--val-size", type=int, default=76)

    args = parser.parse_args()
    args.device = torch.device(args.device)

    # build model
    model = MyModel0(len(VOCAB), 16, args.hidden_size).to(args.device)

    # prepare the dataset by initializing MyDataset class
    dataset = MyDataset(
        "data/data_dict4.pth",
        args.device,
        val_size=args.val_size,
        test_path="data/test_dict.pth",
    )

    # Use CrossEntropy as loss function
    criterion = nn.CrossEntropyLoss(
        weight=torch.tensor([0.1, 1, 1.2, 0.8, 1.5], device=args.device))
    # Adam optimizer gives best performance in this case
    optimizer = optim.Adam(model.parameters())
    # Added learning rate scheduler for future callbacks
    scheduler = optim.lr_scheduler.StepLR(optimizer, 1000)

    for i in range(args.max_epoch // args.val_at):
        train(
            model,
            dataset,
            criterion,
            optimizer,
            (i * args.val_at + 1, (i + 1) * args.val_at + 1),
            args.batch_size,
        )
        # validate(model, dataset)

    # validate(model, dataset, batch_size=10)

    torch.save(model.state_dict(), "Bi-LSTM_model.pth")

    model.eval()
    with torch.no_grad():
        for key in dataset.test_dict.keys():
            text_tensor = dataset.get_test_data(key)

            oupt = model(text_tensor)
            prob = torch.nn.functional.softmax(oupt, dim=2)
            prob, pred = torch.max(prob, dim=2)

            prob = prob.squeeze().cpu().numpy()
            pred = pred.squeeze().cpu().numpy()

            tealtext = dataset.test_dict[key]
            result = pred_to_dict(tealtext, pred, prob)

            with open("results/" + key + ".json", "w",
                      encoding="utf-8") as json_opened:
                json.dump(result, json_opened, indent=4)

            print(key)