Beispiel #1
0
def main():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction=None)
    nepochs = 25
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    # val_dataset =
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate)
    # val_loader =
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    for epoch in range(nepochs):
        train(model, train_loader, criterion, optimizer, epoch)
        # val()
        test(model, test_loader, epoch)
def main():
    model = Seq2Seq(input_dim=40,
                    vocab_size=len(LETTER_LIST),
                    hidden_dim=256,
                    isAttended=True)
    #     print(model)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-5)
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    criterion = nn.CrossEntropyLoss(reduce=False, reduction=None)
    nepochs = 18
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)

    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)  #, collate_fn=collate_train)
    val_loader = DataLoader(val_dataset, batch_size=batch_size,
                            shuffle=True)  #, collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False)  #, collate_fn=collate_test)

    for epoch in range(nepochs):
        train(model, train_loader, criterion, optimizer, epoch)
        # scheduler.step()
        val(model, val_loader, criterion, epoch)
        # Test and Save results
        test_preds = test(model, test_loader)
        test_preds = test_preds.cpu().numpy()
        results = []
        for i in range(test_preds.shape[0]):
            result = ""
            for j in range(test_preds.shape[1]):
                if (test_preds[i, j] == 0 or (test_preds[i, j] == 33)):
                    continue
                if (test_preds[i, j] == 34):
                    break
                result = result + index2letter[test_preds[i, j]]
            results.append(result)
        name = "Epoch_" + str(epoch) + "_LAS_submission.csv"
        ids = list(range(len(test_dataset)))
        ids.insert(0, 'Id')
        results.insert(0, 'Predicted')
        with open(name, 'w') as f:
            writer = csv.writer(f)
            writer.writerows(zip(ids, results))
def main():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    optimizer = optim.SGD(model.parameters(), lr=1e-4, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.93)
    criterion = nn.CrossEntropyLoss(reduction='none')
    init_epoch = 0
    nepochs = 50
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train_val)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            collate_fn=collate_train_val)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    val_distances = []
    exp = 27
    # model.load_state_dict(torch.load('BestModel9.pth'))

    with open('stats_{}'.format(exp), 'w') as file:

        file.write('Experiment: {}\n'.format(exp))

    for epoch in range(init_epoch, nepochs):
        train(model, train_loader, criterion, optimizer, scheduler, epoch, exp)
        val_distances.append(val(model, val_loader, epoch, exp))
        if val_distances[-1] == min(val_distances):
            torch.save(model.state_dict(), 'BestModel{}.pth'.format(exp))

        if epoch % 3 == 0 or epoch == nepochs - 1:
            test(model, test_loader, exp)
Beispiel #4
0
def main():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    learningRate = 0.001
    weightDecay = 5e-5
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learningRate,
                                 weight_decay=weightDecay)
    criterion = nn.CrossEntropyLoss(reduction='none')
    nepochs = 40
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    model.train()
    model.load_state_dict(torch.load('./new1.pth'))
    model.to(DEVICE)

    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    for epoch in range(nepochs):
        train(model, train_loader, criterion, optimizer, epoch)
        scheduler.step()

    model.eval()
    data_list = test(model, test_loader)

    save_to_csv(data_list)
    print('done')
def main():
    model = Seq2Seq(input_dim=40,
                    vocab_size=len(LETTER_LIST),
                    hidden_dim=128,
                    value_size=128,
                    key_size=256,
                    isAttended=True)
    model.load_state_dict(torch.load('model3'))
    model.eval()
    model = model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.0005)
    criterion = nn.CrossEntropyLoss(reduction='sum')
    nepochs = 10
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train)
    for x in train_loader:
        pass

    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    for epoch in range(nepochs):
        print('==============', 'Epoch', epoch + 1, '================')
        train(model, train_loader, criterion, optimizer, epoch)
        val(model, val_loader, criterion, optimizer, epoch)
    torch.save(model.state_dict(), 'model3')

    load_model(model, test_loader)
Beispiel #6
0
def main():
    model = Seq2Seq(input_dim=40,
                    vocab_size=len(LETTER_LIST),
                    hidden_dim=128,
                    value_size=128,
                    key_size=256,
                    is_attended=True)

    # cur_model_num = 6
    # model.load_state_dict(torch.load('model_{}'.format(cur_model_num)))

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction="sum")
    n_epochs = 30
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER2INDEX)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER2INDEX)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    for epoch in range(n_epochs):
        train(model, train_loader, criterion, optimizer, epoch)
        val(model, val_loader, criterion, epoch)

    # test(model, test_loader)

    torch.save(model.state_dict(), 'model_{}'.format(1))

    result_gen(test_loader, 1)
Beispiel #7
0
def train_test():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction='none') # TODO: TA change reduction=None to 'none'
    nepochs = 1
    batch_size = 64 if DEVICE == 'cuda' else 1
    
    speech_valid = np.load('dev.npy', allow_pickle=True, encoding='bytes')
    transcript_valid = np.load('./dev_transcripts.npy', allow_pickle=True,encoding='bytes')
    character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST)
    
    val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train)
    
    print("train() Test:")
    running_best = ['',0.]
    for epoch in range(nepochs):
        print("\t epoch", epoch)
        train(model, val_loader, criterion, optimizer, epoch, batch_size, 1.0)
        validate(model, val_loader)
    print("Runs!")
Beispiel #8
0
                                        'train_transcripts.npy'),
                           allow_pickle=True,
                           encoding='bytes')

speech_valid = np.load(os.path.join(args.data_path, 'dev_new.npy'),
                       allow_pickle=True,
                       encoding='bytes')
transcript_valid = np.load(os.path.join(args.data_path, 'dev_transcripts.npy'),
                           allow_pickle=True,
                           encoding='bytes')

speech_test = np.load(os.path.join(args.data_path, 'test_new.npy'),
                      allow_pickle=True,
                      encoding='bytes')

train_dataset = Speech2TextDataset(speech_train, text=transcript_train)
train_loader = DataLoader(train_dataset,
                          batch_size=args.batch_size,
                          shuffle=True,
                          collate_fn=collate_train,
                          num_workers=os.cpu_count())

vali_dataset = Speech2TextDataset(speech_valid, text=transcript_valid)
vali_loader = DataLoader(vali_dataset,
                         batch_size=args.batch_size,
                         shuffle=False,
                         collate_fn=collate_train,
                         num_workers=os.cpu_count())

test_dataset = Speech2TextDataset(speech_test)
test_loader = DataLoader(test_dataset,
Beispiel #9
0
    # decoder = Decoder(len(LETTER_LIST), hidden_dim=128, isAttended=False).to(DEVICE)
    # # must pass None for key when not attended
    # predictions = decoder(outputs.to(DEVICE), None, lens=lens, isTrain=False)
    # print(predictions.shape)

    print("Seq2Seq Test:")
    transcript_valid = np.load('./dev_transcripts.npy',
                               allow_pickle=True,
                               encoding='bytes')
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    speech_valid = np.load('dev.npy', allow_pickle=True, encoding='bytes')

    batch_size = 16
    valid_dataset = Speech2TextDataset(speech_valid, text=character_text_valid)
    result = collate_train([valid_dataset[0], valid_dataset[1]])

    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train)
    # loop through loader
    for i, (x, y, x_len, y_len) in enumerate(valid_loader):
        print("Input shapes:", x.shape, x_len.shape, "\t", y.shape,
              y_len.shape)
        print()
        model = Seq2Seq(input_dim=40,
                        vocab_size=len(LETTER_LIST),
                        hidden_dim=128).to(DEVICE)
        out, attn, context = model.forward(x.to(DEVICE),
Beispiel #10
0
def main(action="train", model_version=-1, model_number=0, submission_name='submission.csv'):
    # _____-----**********-----__________-----**********-----_____ CHECK THIS *****-----__________-----**********-----__________-----*****
    isAttended = True
    # _____-----**********-----__________-----**********-----_____ CHECK THIS *****-----__________-----**********-----__________-----*****

    if action in ["load", "test", "validate"]:
        path = "./hw4p2_models/model_" + str(model_version) + "_" + str(model_number) + "/model.pt"
        print("Loading model from: {}".format(path))
        model = torch.load(path)
    else:
        print("Initializing NEW model version {}, model number {}".format(model_version, model_number))
        model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=512, value_size=128, key_size=128, isAttended=isAttended)
        # model 3: hidden_dim=256, value_size=128, key_size=128
        # model 4: hidden_dim=512, value_size=128, key_size=128 (helped - best so far)
        # model 5: hidden_dim=512, value_size=256, key_size=256 (not much gained)
        # model 6: hidden_dim=512, value_size=150, key_size=150 input_dropout before first LSTM [(7, 0.15), (10, 0.2)] (no help)
        # model 7: hidden_dim=512, value_size=128, key_size=128 conv1d k=5, pad=2, stride=1, accidental input_dropout of 0.2 later
        # model 8: hidden_dim=512, value_size=128, key_size=128 conv1d k=5, pad=2, stride=1
        # model 9: hidden_dim=512, value_size=128, key_size=128 locked dropout, batchnorm1d between pBLSTM layers
        # model 10: hidden_dim=512, value_size=128, key_size=128 locked dropout (up then down), batchnorm1d between pBLSTM layers, weight decay



    nepochs = 70
    batch_size = 64 if DEVICE == 'cuda' else 1
    num_workers = 4 if DEVICE == 'cuda' else 0

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data()
    character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST)

    criterion = nn.CrossEntropyLoss(reduction='none') # TODO: TA change reduction=None to 'none'

    if action == "train":
        print("Start normal training...")
        learning_rate = 0.001
        mile_stones = [10,15,20,30] # [5,10,15] # [4,7,10,13,16,19,22,25] #
        gamma = 0.1 # changed from 0.3 after looking at models 4, 5
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-6) # TODO: tune
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=mile_stones, gamma=gamma)

        input_dropout = 0.
        # [(epoch, input_dropout_prob),]
        input_dropout_schedule = [(15, 0.1), (20, 0.15), (25, 0.2), (30, 0.1), (35, 0.)]

        train_dataset = Speech2TextDataset(speech_train, character_text_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train, num_workers=num_workers, pin_memory=True)

        val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train, num_workers=num_workers, pin_memory=True)

        running_best = ['', 1000.]
        please_learn = 1.
        for epoch in range(nepochs):
            print("--------------------------------------------------------------------------------------------------------------------------------------------------")

            if input_dropout_schedule:
                if input_dropout_schedule[0][0] == epoch:
                    input_dropout = input_dropout_schedule[0][1]
                    input_dropout_schedule = input_dropout_schedule[1:]

            if (epoch+1)%5==0:
                please_learn -= 0.5/(40/5) # by epoch 40 need to be at 50%, reduce every 5 epochs
            model.train()

            loss, run_time = train(model, train_loader, criterion, optimizer, epoch, batch_size, please_learn, model_version, model_number, isAttended, input_dropout=input_dropout)
            
            # plot_grad_flow(model.named_parameters(), './hw4p2_models/model_' + str(model_version) + '_' + str(model_number) + '/gradients.png')
            
            Levenshtein_dist, val_loss = validate(model, val_loader, LETTER_LIST, criterion)

            # Update Me
            print("--------------------------------------------------------------------------------------------------------------------------------------------------")
            print("Epoch", epoch, "Levenshtein_dist:", Levenshtein_dist, "Perplexity:", loss.item(), "Val Perplexity:", val_loss)
            print("\tTuning Status: Input Dropout = {}, Teacher Forcing = {}".format(input_dropout, please_learn))
            if running_best[1] > Levenshtein_dist:
                running_best[0] = 'Model_' + str(model_version) + '_' + str(model_number)
                running_best[1] = Levenshtein_dist
            print("\tTime for Epoch:", run_time)
            print("\tRunning Best:", *running_best)
            scheduler.step()
            
            save_state(Levenshtein_dist, running_best, model_version, model_number, model, optimizer, criterion, batch_size)
            model_number+=1
    
    elif action == "load":
        print("Start training loaded model...")
        learning_rate = 0.001
        mile_stones = [] # [3,8,13,18,23] # [5,10,15] # [4,7,10,13,16,19,22,25] #
        gamma = 0.1
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-6) # TODO: tune
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=mile_stones, gamma=gamma)

        input_dropout = 0. #0.2
        # [(epoch, input_dropout_prob),]
        input_dropout_schedule = []#[(7, 0.15), (10, 0.2)]

        criterion = nn.CrossEntropyLoss(reduction='none') # TODO: TA change reduction=None to 'none'

        train_dataset = Speech2TextDataset(speech_train, character_text_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train, num_workers=num_workers, pin_memory=True)

        val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train, num_workers=num_workers, pin_memory=True)

        running_best = get_running_best(model_version, model_number)
        # if model_number > 10:
        #     please_learn = 1.
        # else:
        #     please_learn = 1. - (model_number//5)*(0.5/8)
        please_learn = 0.8

        model_number += 1
        for epoch in range(model_number, nepochs):
            print("--------------------------------------------------------------------------------------------------------------------------------------------------")
            
            if input_dropout_schedule:
                if input_dropout_schedule[0][0] == epoch:
                    input_dropout = input_dropout_schedule[0][1]
                    input_dropout_schedule = input_dropout_schedule[1:]

            if model_number > 10 and (model_number+1)%5==0:
                please_learn -= 0.5/(40/5)
            model.train()
            loss, run_time = train(model, train_loader, criterion, optimizer, epoch, batch_size, please_learn, model_version, model_number, isAttended, input_dropout=input_dropout)
            
            Levenshtein_dist, val_loss = validate(model, val_loader, LETTER_LIST, criterion)

            # Update Me
            print("--------------------------------------------------------------------------------------------------------------------------------------------------")
            print("Epoch", epoch, "Levenshtein_dist:", Levenshtein_dist, "Perplexity:", loss.item(), "Val Perplexity:", val_loss)
            if running_best[1] > Levenshtein_dist:
                running_best[0] = 'Model_' + str(model_version) + '_' + str(model_number)
                running_best[1] = Levenshtein_dist
            print("\tTuning Status: Input Dropout = {}, Teacher Forcing = {}".format(input_dropout, please_learn))
            print("\tTime for Epoch:", run_time)
            print("\tRunning Best:", *running_best)
            scheduler.step()

            save_state(Levenshtein_dist, running_best, model_version, model_number, model, optimizer, criterion, batch_size)
            model_number+=1

    elif action == "test":
        print("Start prediction...")
        test_dataset = Speech2TextDataset(speech_test, None, False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test)

        n, time = test(model, test_loader, LETTER_LIST, random=False, sub_name=submission_name)

        print("{} Predictions COMPLETE in {}".format(n, time))
    elif action == "validate":
        print("Start Validation...")
        val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train, num_workers=num_workers, pin_memory=True)

        Levenshtein_dist, val_loss = validate(model, val_loader, LETTER_LIST, criterion) #, random=True)
        print("Levenshtein Distance:", Levenshtein_dist, "Validation Loss:", val_loss)
def main():
    model = Seq2Seq(input_dim=40,
                    vocab_size=len(LETTER_LIST),
                    hidden_dim=param['hidden_dim'],
                    value_size=param['value_size'],
                    key_size=param['key_size'],
                    isAttended=True)
    optimizer = optim.Adam(model.parameters(), lr=param['lr'])

    ## Load from pretrained
    if (param['resume'] == True):
        checkPointPath = param['checkPointPath'] + '/epoch' + str(
            param['resume_from'])
        checkpoint = torch.load(checkPointPath)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(DEVICE)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.95)
    criterion = nn.CrossEntropyLoss(ignore_index=0).to(DEVICE)
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
        param['dataPath'])
    print("finished loading data")
    letter2index, index2letter = create_dictionaries(LETTER_LIST)
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)
    print("finished transforming data")

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    print("start training")
    start_epoch = param['resume_from'] + 1

    for epoch in range(start_epoch, start_epoch + param['nepochs']):
        train(model, train_loader, criterion, optimizer, epoch)
        path = param['checkPointPath'] + "/epoch" + str(epoch)
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, path)
        # val()
        scheduler.step()
        dist = validation(model, val_loader, index2letter)
        print("validation dist is: ", dist)
    test(model, test_loader, index2letter)
Beispiel #12
0
# %%
# Load datasets
print("*** Load raw data ***")
speech_train, speech_dev, speech_test, transcript_train, transcript_dev = load_data(
    hyper['dataPath'])

# %%
# Preprocess transcript to char level index
print("*** Process transcript to char level index ***")
character_text_train = transform_letter_to_index(transcript_train)
character_text_dev = transform_letter_to_index(transcript_dev)

# %%
# Get dataloaders
print("*** Get data loaders ***")
train_dataset = Speech2TextDataset(speech_train, character_text_train)
dev_dataset = Speech2TextDataset(speech_dev, character_text_dev)
test_dataset = Speech2TextDataset(speech_test, None)
train_loader = DataLoader(train_dataset,
                          batch_size=hyper['batchSize'],
                          shuffle=True,
                          collate_fn=collate_train)  # 387
dev_loader = DataLoader(dev_dataset,
                        batch_size=hyper['batchSize'],
                        shuffle=False,
                        collate_fn=collate_train)  # 18
test_loader = DataLoader(test_dataset,
                         batch_size=hyper['batchSize'],
                         shuffle=False,
                         collate_fn=collate_test)  # 9