Ejemplo n.º 1
0
def main():
    # loading train data
    print(print_str.format("Loading Data"))
    train_data, train_label = load_data("train", "data/protocol/ASVspoof2017_train.trn.txt",
                                        mode="train", feature_type="cqcc")
    train_dataset = ASVDataSet(train_data, train_label, mode="train")
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True)

    dev_data, dev_label, dev_wav_ids = load_data("dev", "data/protocol/ASVspoof2017_dev.trl.txt",
                                                 mode="test", feature_type="cqcc")
    dev_dataset = ASVDataSet(dev_data, dev_label, wav_ids=dev_wav_ids, mode="test")
    #
    # test_data, test_label, test_wav_ids = load_data("eval",
    #                                                 "data/protocol/ASVspoof2017_eval_v2_key.trl.txt", mode="test")
    # test_dataset = ASVDataSet(test_data, test_label, wav_ids=test_wav_ids, mode="test")

    model = DNNModel(input_dim, hidden_dim, output_dim)
    if use_cuda():
        model = model.cuda()
    cross_entropy = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=model.parameters(), lr=0.01)
    scheduler = MultiStepLR(optimizer, milestones=[3, 7], gamma=0.1)

    base_dev_acc = 0.5
    for epoch in range(num_epochs):
        scheduler.step()
        for i, tmp in enumerate(train_dataloader):
            data = Variable(tmp['data'])
            label = Variable(tmp['label'])
            if use_cuda():
                data, label = data.cuda(), label.cuda()

            optimizer.zero_grad()
            predict = model(data)
            loss = cross_entropy(predict, label.long().view(-1))

            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print('Epoch [%d/%d], Iter [%d/%d], Loss: %.4f,' % (
                    epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]), end="")

                # to test the dev data and test data
                dev_accuracy, scores = get_test_accuracy(dev_dataset, model)
                test_accuracy = 0.0
                # test_accuracy = get_test_accuracy(test_dataset, model)
                print(" Dev Acc: %.2f Test Acc: %.2f" % (dev_accuracy, test_accuracy))

                if dev_accuracy > base_dev_acc:
                    base_dev_acc = dev_accuracy
                    with open("dev_score.txt", 'w', encoding="utf-8") as f:
                        for k, v in scores.items():
                            f.write("{} {}\n".format(k, v))
Ejemplo n.º 2
0
def main():
    prepare()
    print(print_str.format("Begin to loading Data"))

    net = RNN(90, 256, 2, 2, 0.1)
    if use_cuda():
        net = net.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
    cross_entropy = nn.CrossEntropyLoss()

    if mode == "train":
        train_data, train_label, train_wav_ids, train_lengths = load_rnn_data(
            "train", train_protocol, mode=mode, feature_type=feature_type)
        train_dataset = ASVDataSet(train_data,
                                   train_label,
                                   wav_ids=train_wav_ids,
                                   mode=mode,
                                   lengths=train_lengths)
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      num_workers=4,
                                      shuffle=True)

    for epoch in range(num_epochs):
        correct = 0
        total = 0
        total_loss = 0
        for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)):
            data = tmp['data']
            label = tmp['label']
            length = tmp['length']

            max_len = int(torch.max(length))
            data = data[:, :max_len, :]
            label = label[:, :max_len]

            sorted_length, indices = torch.sort(length.view(-1),
                                                dim=0,
                                                descending=True)
            sorted_length = sorted_length.long().numpy()

            data, label = data[indices], label[indices]

            data, label = Variable(data), Variable(label).view(-1)
            if use_cuda():
                data, label = data.cuda(), label.cuda()

            optimizer.zero_grad()
            outputs, out_length = net(data, sorted_length)
            loss = cross_entropy(outputs, label)
            loss.backward()
            optimizer.step()

            total_loss += loss.data[0]
            _, predict = torch.max(outputs, 1)
            correct += (predict.data == label.data).sum()
            total += label.size(0)

        print("Loss: {} \t Acc: {}".format(total_loss / len(train_dataloader),
                                           correct / total))
Ejemplo n.º 3
0
def main():
    args = get_args()

    use_cuda = True if torch.cuda.is_available() else False

    if args.pkl is None or args.dt is None:
        raise ValueError("some args value must not be None")

    pkl = torch.load(args.pkl)
    net = pkl['state_dict']
    print("model acc: {}".format(pkl['acc']))

    if use_cuda:
        net = net.cuda()

    if args.dt == "eval":
        protocol = "../data/protocol/ASVspoof2017_eval_v2_key.trl.txt"
    else:
        protocol = "../data/protocol/ASVspoof2017_dev.trl.txt"

    test_data, test_label, test_wav_ids = load_cnn_data(args.dt, protocol, mode="test", feature_type=args.ft)

    # tmp = np.concatenate(test_data, axis=0)
    # tmp_mean = np.mean(tmp, axis=0)
    # tmp_std = np.std(tmp, axis=0)

    # for i in range(len(test_data)):
    #     test_data[i] = (test_data[i] - mean) / std

    test_dataset = ASVDataSet(test_data, test_label, wav_ids=test_wav_ids, mode="test")
    test_dataloader = DataLoader(test_dataset, batch_size=1, num_workers=1, shuffle=False)

    scores = {}
    net.eval()
    for tmp in tqdm(test_dataloader):
        data = Variable(tmp['data'])
        wav_id = tmp['wav_id'][0]

        if use_cuda:
            data = data.cuda()
        predict = net(data)
        predict = F.softmax(predict, dim=1)
        tmp = predict.data.cpu().view(-1)[1]
        scores[wav_id] = tmp

    save_dir = os.path.join("result", args.tm, args.ft)
    os.system('mkdir -p {}'.format(save_dir))
    with open(os.path.join(save_dir, args.dt+"_score.txt"), 'w', encoding='utf-8') as f:
        for k, v in scores.items():
            f.write("{} {}\n".format(k, v))
def main():
    # do thing before training
    args = get_args()
    save_dir = os.path.join(args.sd, args.tm, args.ft)
    print(args)
    input("*****Please check the params  also --> {} <--, Enter to continue*****".format(save_dir))
    os.system('mkdir -p {}'.format(save_dir))
    mode = args.mode
    batch_size = args.bs
    feature_type = args.ft
    num_epochs = args.ne

    # loading train data
    if mode == "train":
        train_data, train_label = load_data("train", train_protocol, mode=mode, feature_type=feature_type)

        # for i in range(len(train_data)):
        #     mean = np.mean(train_data[i], axis=0)
        #     std = np.std(train_data[i], axis=0)
        #     train_data[i] = (train_data[i] - mean) / std

        train_dataset = ASVDataSet(train_data, train_label, mode=mode)
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True)

        dev_data, dev_label = load_data("dev", dev_protocol, mode=mode, feature_type=feature_type)

        # for i in range(len(dev_data)):
        #     mean = np.mean(dev_data[i], axis=0)
        #     std = np.std(dev_data[i], axis=0)
        #     dev_data[i] = (dev_data[i] - mean) / std

        dev_dataset = ASVDataSet(dev_data, dev_label, mode=mode)
        dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
    elif mode == "final":
        train_data, train_label = load_data(["train", "dev"], final_protocol,
                                            mode=mode, feature_type=feature_type)
        train_data = np.array(train_data)
        mean = np.mean(train_data, axis=0)
        std = np.std(train_data, axis=0)
        train_data = (train_data - mean) / std
        train_dataset = ASVDataSet(train_data, train_label, mode="train")
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True)

    if "lcnn" in args.tm:
        model = LCNN(input_dim=77, num_classes=2)
    elif "dnn" in args.tm:
        model = DNN(990, 512, 2)  # mfcc imfcc cqt 429 cqcc 990
    elif "vgg" in args.tm:
        model = VGG(77, "VGG11")
    elif "cnn" in args.tm:
        model = CNN(77, 2, 0)

    if use_cuda():
        model = model.cuda()
    print(model)
    cross_entropy = nn.CrossEntropyLoss()
    optimizer = optim.ASGD(params=model.parameters(), lr=args.lr, weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, patience=0, verbose=True, factor=0.1, min_lr=1e-7)

    best_dev_accuracy = 0
    best_train_accuracy = 0
    for epoch in range(num_epochs):
        correct = 0
        total = 0
        train_loss = 0
        model.train()

        for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)):
            data = Variable(tmp['data'])
            label = Variable(tmp['label']).view(-1)
            if use_cuda():
                data, label = data.cuda(), label.cuda()

            optimizer.zero_grad()
            predict = model(data)

            _, predict_label = torch.max(predict.data, 1)
            correct += (predict_label.cpu() == label.cpu().data).sum()
            total += label.size(0)

            loss = cross_entropy(predict, label.long())
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]

        train_accuracy = correct / total
        if mode == "final":
            if train_accuracy > best_train_accuracy:
                best_train_accuracy = train_accuracy
                save_checkpoint(
                    {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': train_accuracy},
                    save_path=os.path.join(save_dir, "best_eval.pkl")
                )
            save_checkpoint(
                {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': train_accuracy},
                save_path=os.path.join(save_dir, "final_eval.pkl")
            )
            print("Epoch [%d/%d], Loss: %.4fe-4,  Train Acc %.2f%%" % (
                epoch+1, num_epochs, 1e4 * train_loss / total, train_accuracy * 100))
            print(print_str.format("Best Acc: {}".format(best_train_accuracy)))

            scheduler.step(train_loss/total)

            if use_cuda():
                model.cuda()

        if mode == "train":
            dev_accuracy, dev_loss = get_test_accuracy(dev_dataloader, model, cross_entropy)

            save_checkpoint(
                {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': dev_accuracy},
                save_path=os.path.join(save_dir, 'final_dev.pkl')
            )

            if dev_accuracy > best_dev_accuracy:
                best_dev_accuracy = dev_accuracy
                save_checkpoint(
                    {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': dev_accuracy},
                    save_path=os.path.join(save_dir, 'best_dev.pkl')
                )

            if use_cuda():
                model.cuda()

            print("Epoch [%d/%d], Train Loss: %.4fe-4, Train Acc %.2f%% Dev Loss: %.4fe-4 Dev Acc %.2f%% " % (
                epoch + 1, num_epochs, 1e4 * train_loss / total, train_accuracy * 100,  dev_loss, dev_accuracy * 100
            ))
            print(print_str.format("Best Acc: {}".format(best_dev_accuracy)))
            scheduler.step(dev_loss)