def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128) optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss(reduction=None) nepochs = 25 batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( ) character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) train_dataset = Speech2TextDataset(speech_train, character_text_train) # val_dataset = test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate) # val_loader = test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) for epoch in range(nepochs): train(model, train_loader, criterion, optimizer, epoch) # val() test(model, test_loader, epoch)
def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=256, isAttended=True) # print(model) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-5) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1) criterion = nn.CrossEntropyLoss(reduce=False, reduction=None) nepochs = 18 batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( ) character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) train_dataset = Speech2TextDataset(speech_train, character_text_train) val_dataset = Speech2TextDataset(speech_valid, character_text_valid) test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) #, collate_fn=collate_train) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True) #, collate_fn=collate_train) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) #, collate_fn=collate_test) for epoch in range(nepochs): train(model, train_loader, criterion, optimizer, epoch) # scheduler.step() val(model, val_loader, criterion, epoch) # Test and Save results test_preds = test(model, test_loader) test_preds = test_preds.cpu().numpy() results = [] for i in range(test_preds.shape[0]): result = "" for j in range(test_preds.shape[1]): if (test_preds[i, j] == 0 or (test_preds[i, j] == 33)): continue if (test_preds[i, j] == 34): break result = result + index2letter[test_preds[i, j]] results.append(result) name = "Epoch_" + str(epoch) + "_LAS_submission.csv" ids = list(range(len(test_dataset))) ids.insert(0, 'Id') results.insert(0, 'Predicted') with open(name, 'w') as f: writer = csv.writer(f) writer.writerows(zip(ids, results))
def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128) optimizer = optim.SGD(model.parameters(), lr=1e-4, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.93) criterion = nn.CrossEntropyLoss(reduction='none') init_epoch = 0 nepochs = 50 batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( ) character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) train_dataset = Speech2TextDataset(speech_train, character_text_train) val_dataset = Speech2TextDataset(speech_valid, character_text_valid) test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train_val) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train_val) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) val_distances = [] exp = 27 # model.load_state_dict(torch.load('BestModel9.pth')) with open('stats_{}'.format(exp), 'w') as file: file.write('Experiment: {}\n'.format(exp)) for epoch in range(init_epoch, nepochs): train(model, train_loader, criterion, optimizer, scheduler, epoch, exp) val_distances.append(val(model, val_loader, epoch, exp)) if val_distances[-1] == min(val_distances): torch.save(model.state_dict(), 'BestModel{}.pth'.format(exp)) if epoch % 3 == 0 or epoch == nepochs - 1: test(model, test_loader, exp)
def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128) learningRate = 0.001 weightDecay = 5e-5 optimizer = torch.optim.Adam(model.parameters(), lr=learningRate, weight_decay=weightDecay) criterion = nn.CrossEntropyLoss(reduction='none') nepochs = 40 batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( ) character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) train_dataset = Speech2TextDataset(speech_train, character_text_train) val_dataset = Speech2TextDataset(speech_valid, character_text_valid) test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) model.train() model.load_state_dict(torch.load('./new1.pth')) model.to(DEVICE) scheduler = StepLR(optimizer, step_size=10, gamma=0.5) for epoch in range(nepochs): train(model, train_loader, criterion, optimizer, epoch) scheduler.step() model.eval() data_list = test(model, test_loader) save_to_csv(data_list) print('done')
def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128, value_size=128, key_size=256, isAttended=True) model.load_state_dict(torch.load('model3')) model.eval() model = model.to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=0.0005) criterion = nn.CrossEntropyLoss(reduction='sum') nepochs = 10 batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( ) character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) train_dataset = Speech2TextDataset(speech_train, character_text_train) val_dataset = Speech2TextDataset(speech_valid, character_text_valid) test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) for x in train_loader: pass val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) for epoch in range(nepochs): print('==============', 'Epoch', epoch + 1, '================') train(model, train_loader, criterion, optimizer, epoch) val(model, val_loader, criterion, optimizer, epoch) torch.save(model.state_dict(), 'model3') load_model(model, test_loader)
def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128, value_size=128, key_size=256, is_attended=True) # cur_model_num = 6 # model.load_state_dict(torch.load('model_{}'.format(cur_model_num))) optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss(reduction="sum") n_epochs = 30 batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( ) character_text_train = transform_letter_to_index(transcript_train, LETTER2INDEX) character_text_valid = transform_letter_to_index(transcript_valid, LETTER2INDEX) train_dataset = Speech2TextDataset(speech_train, character_text_train) val_dataset = Speech2TextDataset(speech_valid, character_text_valid) test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) for epoch in range(n_epochs): train(model, train_loader, criterion, optimizer, epoch) val(model, val_loader, criterion, epoch) # test(model, test_loader) torch.save(model.state_dict(), 'model_{}'.format(1)) result_gen(test_loader, 1)
def train_test(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128) optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss(reduction='none') # TODO: TA change reduction=None to 'none' nepochs = 1 batch_size = 64 if DEVICE == 'cuda' else 1 speech_valid = np.load('dev.npy', allow_pickle=True, encoding='bytes') transcript_valid = np.load('./dev_transcripts.npy', allow_pickle=True,encoding='bytes') character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) print("train() Test:") running_best = ['',0.] for epoch in range(nepochs): print("\t epoch", epoch) train(model, val_loader, criterion, optimizer, epoch, batch_size, 1.0) validate(model, val_loader) print("Runs!")
# print("Encoder-Decoder Not Attended Test:") # encoder = Encoder(input_dim=40, hidden_dim=128, isAttended=False) # , value_size=128,key_size=128) # outputs, lens = encoder.forward(x, x_lens) # print(outputs.shape) # print(lens) # decoder = Decoder(len(LETTER_LIST), hidden_dim=128, isAttended=False).to(DEVICE) # # must pass None for key when not attended # predictions = decoder(outputs.to(DEVICE), None, lens=lens, isTrain=False) # print(predictions.shape) print("Seq2Seq Test:") transcript_valid = np.load('./dev_transcripts.npy', allow_pickle=True, encoding='bytes') character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) speech_valid = np.load('dev.npy', allow_pickle=True, encoding='bytes') batch_size = 16 valid_dataset = Speech2TextDataset(speech_valid, text=character_text_valid) result = collate_train([valid_dataset[0], valid_dataset[1]]) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) # loop through loader for i, (x, y, x_len, y_len) in enumerate(valid_loader): print("Input shapes:", x.shape, x_len.shape, "\t", y.shape, y_len.shape)
def main(action="train", model_version=-1, model_number=0, submission_name='submission.csv'): # _____-----**********-----__________-----**********-----_____ CHECK THIS *****-----__________-----**********-----__________-----***** isAttended = True # _____-----**********-----__________-----**********-----_____ CHECK THIS *****-----__________-----**********-----__________-----***** if action in ["load", "test", "validate"]: path = "./hw4p2_models/model_" + str(model_version) + "_" + str(model_number) + "/model.pt" print("Loading model from: {}".format(path)) model = torch.load(path) else: print("Initializing NEW model version {}, model number {}".format(model_version, model_number)) model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=512, value_size=128, key_size=128, isAttended=isAttended) # model 3: hidden_dim=256, value_size=128, key_size=128 # model 4: hidden_dim=512, value_size=128, key_size=128 (helped - best so far) # model 5: hidden_dim=512, value_size=256, key_size=256 (not much gained) # model 6: hidden_dim=512, value_size=150, key_size=150 input_dropout before first LSTM [(7, 0.15), (10, 0.2)] (no help) # model 7: hidden_dim=512, value_size=128, key_size=128 conv1d k=5, pad=2, stride=1, accidental input_dropout of 0.2 later # model 8: hidden_dim=512, value_size=128, key_size=128 conv1d k=5, pad=2, stride=1 # model 9: hidden_dim=512, value_size=128, key_size=128 locked dropout, batchnorm1d between pBLSTM layers # model 10: hidden_dim=512, value_size=128, key_size=128 locked dropout (up then down), batchnorm1d between pBLSTM layers, weight decay nepochs = 70 batch_size = 64 if DEVICE == 'cuda' else 1 num_workers = 4 if DEVICE == 'cuda' else 0 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data() character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) criterion = nn.CrossEntropyLoss(reduction='none') # TODO: TA change reduction=None to 'none' if action == "train": print("Start normal training...") learning_rate = 0.001 mile_stones = [10,15,20,30] # [5,10,15] # [4,7,10,13,16,19,22,25] # gamma = 0.1 # changed from 0.3 after looking at models 4, 5 optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-6) # TODO: tune scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=mile_stones, gamma=gamma) input_dropout = 0. # [(epoch, input_dropout_prob),] input_dropout_schedule = [(15, 0.1), (20, 0.15), (25, 0.2), (30, 0.1), (35, 0.)] train_dataset = Speech2TextDataset(speech_train, character_text_train) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train, num_workers=num_workers, pin_memory=True) val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train, num_workers=num_workers, pin_memory=True) running_best = ['', 1000.] please_learn = 1. for epoch in range(nepochs): print("--------------------------------------------------------------------------------------------------------------------------------------------------") if input_dropout_schedule: if input_dropout_schedule[0][0] == epoch: input_dropout = input_dropout_schedule[0][1] input_dropout_schedule = input_dropout_schedule[1:] if (epoch+1)%5==0: please_learn -= 0.5/(40/5) # by epoch 40 need to be at 50%, reduce every 5 epochs model.train() loss, run_time = train(model, train_loader, criterion, optimizer, epoch, batch_size, please_learn, model_version, model_number, isAttended, input_dropout=input_dropout) # plot_grad_flow(model.named_parameters(), './hw4p2_models/model_' + str(model_version) + '_' + str(model_number) + '/gradients.png') Levenshtein_dist, val_loss = validate(model, val_loader, LETTER_LIST, criterion) # Update Me print("--------------------------------------------------------------------------------------------------------------------------------------------------") print("Epoch", epoch, "Levenshtein_dist:", Levenshtein_dist, "Perplexity:", loss.item(), "Val Perplexity:", val_loss) print("\tTuning Status: Input Dropout = {}, Teacher Forcing = {}".format(input_dropout, please_learn)) if running_best[1] > Levenshtein_dist: running_best[0] = 'Model_' + str(model_version) + '_' + str(model_number) running_best[1] = Levenshtein_dist print("\tTime for Epoch:", run_time) print("\tRunning Best:", *running_best) scheduler.step() save_state(Levenshtein_dist, running_best, model_version, model_number, model, optimizer, criterion, batch_size) model_number+=1 elif action == "load": print("Start training loaded model...") learning_rate = 0.001 mile_stones = [] # [3,8,13,18,23] # [5,10,15] # [4,7,10,13,16,19,22,25] # gamma = 0.1 optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-6) # TODO: tune scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=mile_stones, gamma=gamma) input_dropout = 0. #0.2 # [(epoch, input_dropout_prob),] input_dropout_schedule = []#[(7, 0.15), (10, 0.2)] criterion = nn.CrossEntropyLoss(reduction='none') # TODO: TA change reduction=None to 'none' train_dataset = Speech2TextDataset(speech_train, character_text_train) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train, num_workers=num_workers, pin_memory=True) val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train, num_workers=num_workers, pin_memory=True) running_best = get_running_best(model_version, model_number) # if model_number > 10: # please_learn = 1. # else: # please_learn = 1. - (model_number//5)*(0.5/8) please_learn = 0.8 model_number += 1 for epoch in range(model_number, nepochs): print("--------------------------------------------------------------------------------------------------------------------------------------------------") if input_dropout_schedule: if input_dropout_schedule[0][0] == epoch: input_dropout = input_dropout_schedule[0][1] input_dropout_schedule = input_dropout_schedule[1:] if model_number > 10 and (model_number+1)%5==0: please_learn -= 0.5/(40/5) model.train() loss, run_time = train(model, train_loader, criterion, optimizer, epoch, batch_size, please_learn, model_version, model_number, isAttended, input_dropout=input_dropout) Levenshtein_dist, val_loss = validate(model, val_loader, LETTER_LIST, criterion) # Update Me print("--------------------------------------------------------------------------------------------------------------------------------------------------") print("Epoch", epoch, "Levenshtein_dist:", Levenshtein_dist, "Perplexity:", loss.item(), "Val Perplexity:", val_loss) if running_best[1] > Levenshtein_dist: running_best[0] = 'Model_' + str(model_version) + '_' + str(model_number) running_best[1] = Levenshtein_dist print("\tTuning Status: Input Dropout = {}, Teacher Forcing = {}".format(input_dropout, please_learn)) print("\tTime for Epoch:", run_time) print("\tRunning Best:", *running_best) scheduler.step() save_state(Levenshtein_dist, running_best, model_version, model_number, model, optimizer, criterion, batch_size) model_number+=1 elif action == "test": print("Start prediction...") test_dataset = Speech2TextDataset(speech_test, None, False) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) n, time = test(model, test_loader, LETTER_LIST, random=False, sub_name=submission_name) print("{} Predictions COMPLETE in {}".format(n, time)) elif action == "validate": print("Start Validation...") val_dataset = Speech2TextDataset(speech_valid, text=character_text_valid) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train, num_workers=num_workers, pin_memory=True) Levenshtein_dist, val_loss = validate(model, val_loader, LETTER_LIST, criterion) #, random=True) print("Levenshtein Distance:", Levenshtein_dist, "Validation Loss:", val_loss)
def main(): model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=param['hidden_dim'], value_size=param['value_size'], key_size=param['key_size'], isAttended=True) optimizer = optim.Adam(model.parameters(), lr=param['lr']) ## Load from pretrained if (param['resume'] == True): checkPointPath = param['checkPointPath'] + '/epoch' + str( param['resume_from']) checkpoint = torch.load(checkPointPath) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.to(DEVICE) scheduler = lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.95) criterion = nn.CrossEntropyLoss(ignore_index=0).to(DEVICE) batch_size = 64 if DEVICE == 'cuda' else 1 speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data( param['dataPath']) print("finished loading data") letter2index, index2letter = create_dictionaries(LETTER_LIST) character_text_train = transform_letter_to_index(transcript_train, LETTER_LIST) character_text_valid = transform_letter_to_index(transcript_valid, LETTER_LIST) print("finished transforming data") train_dataset = Speech2TextDataset(speech_train, character_text_train) val_dataset = Speech2TextDataset(speech_valid, character_text_valid) test_dataset = Speech2TextDataset(speech_test, None, False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_train) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_train) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_test) print("start training") start_epoch = param['resume_from'] + 1 for epoch in range(start_epoch, start_epoch + param['nepochs']): train(model, train_loader, criterion, optimizer, epoch) path = param['checkPointPath'] + "/epoch" + str(epoch) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, path) # val() scheduler.step() dist = validation(model, val_loader, index2letter) print("validation dist is: ", dist) test(model, test_loader, index2letter)
"savedCheckpoint": "./checkpoint/init_epoch11.txt", 'testPredCSVfn': './data/predicted_test.csv', 'devPredCSVfn': './data/predicted_dev.csv', 'testPredNpyfn': './data/predicted_test.npy' } # %% # Load datasets print("*** Load raw data ***") speech_train, speech_dev, speech_test, transcript_train, transcript_dev = load_data( hyper['dataPath']) # %% # Preprocess transcript to char level index print("*** Process transcript to char level index ***") character_text_train = transform_letter_to_index(transcript_train) character_text_dev = transform_letter_to_index(transcript_dev) # %% # Get dataloaders print("*** Get data loaders ***") train_dataset = Speech2TextDataset(speech_train, character_text_train) dev_dataset = Speech2TextDataset(speech_dev, character_text_dev) test_dataset = Speech2TextDataset(speech_test, None) train_loader = DataLoader(train_dataset, batch_size=hyper['batchSize'], shuffle=True, collate_fn=collate_train) # 387 dev_loader = DataLoader(dev_dataset, batch_size=hyper['batchSize'], shuffle=False,