def test(test, feature, model, hidden, layer, output, index2char, index2phone, phone_map, phone2index): ans = open(output,'w') ans.write('id,phone_sequence\n') test_set = Feature_Dataset(feature,'test') if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 if model == 'LSTM': test_model = LSTM(feature_dim, hidden, layer) elif model == 'BiLSTM': test_model = LSTM(feature_dim,hidden,layer,bi = True) elif model == 'C_RNN': group_size = 5 test_model = C_RNN(group_size, feature_dim, hidden, layer) checkpoint = torch.load(test) test_model.load_state_dict(checkpoint['model']) test_model.eval() if USE_CUDA: test_model = test_model.cuda() for i in tqdm(range(1,len(test_set)+1)): data = test_set[i-1] speaker = data[0] test_feature = Variable(data[1].float()) test_hidden = test_model.init_hidden() output = torch.max(test_model(test_feature,test_hidden),1)[1] result = test_trim(index2char,index2phone, phone_map, phone2index, output.data.cpu().numpy()) ans.write('{},{}\n'.format(speaker,result)) ans.close()
# Here we don't need to train, so the code is wrapped in torch.no_grad() DataObject = DataProcessing() for epoch in range( 300): # again, normally you would NOT do 300 epochs, it is toy data print("Beginning as a batch") StepsOfEpoch = 0 DataMethodObject = DataObject.FetchInputsAndLabels() for wav, label in DataMethodObject: then = time.time() StepsOfEpoch += 1 # Step 1. Remember that Pytorch accumulates gradients. # We need to clear them out before each instance models.zero_grad() models.init_hidden() # Also, we need to clear out the hidden state of the LSTM, # detaching it from its history on the last instance. output = models(torch.tensor(wav).float()) #print(output) #print(label) # Step 4. Compute the loss, gradients, and update the parameters by # calling optimizer.step() loss = loss_function(output, torch.tensor(label).float()) loss.backward() optimizer.step() now = time.time() print("Epoch:", epoch, "Step:", StepsOfEpoch, " of 2000 steps, Loss:", loss.detach().numpy(), "Time taken for forward and backward is ",
class Train(): def __init__(self, difficulty): self.data_path = "../data" self.model_path = "../models" self.output_path = "../outputs" self.difficulty = difficulty self.timestamp = str(int(time.time())) self.model_name = "lstm_" + self.difficulty self.data = Data(difficulty=self.difficulty, data_path=self.data_path) (self.img_features, self.w2i, self.i2w, self.nwords, self.UNK, self.PAD) = self.data() self.train = list(self.data.get_train_data()) self.dev = list(self.data.get_validation_data()) self.test = list(self.data.get_test_data()) self.image_feature_size = 2048 self.output_vector_size = 10 def __call__(self, number_of_iterations = 2, learning_rate = 0.005, embedding_size = 300, hidden_size=100, batch_size=100): print("Starting 'Image Retrieval' in 'LSTM' mode with '" + self.difficulty + "' data") self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".pty" self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".csv" self.number_of_iterations = number_of_iterations self.learning_rate = learning_rate self.embedding_size = embedding_size self.hidden_size = hidden_size self.batch_size = batch_size self.model = LSTM(self.nwords, self.embedding_size, self.image_feature_size, self.output_vector_size, self.hidden_size, self.batch_size) self.criterion = nn.CrossEntropyLoss() self.evaluate = Evaluate(self.model, self.img_features, self.minibatch, self.preprocess, self.image_feature_size, self.output_vector_size) print(self.model) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) self.train_loss_values = [] self.magic() self.save_model() self.save_data() def minibatch(self, data, batch_size = 50): for i in range(0, len(data), batch_size): yield data[i:i+batch_size] def preprocess(self, batch): """Helper function for functional batches""" correct_indexes = [observation[2] for observation in batch] img_ids = [observation[1] for observation in batch] text_features = [observation[0] for observation in batch] last_words = [len(dialog) for dialog in text_features] #Add Padding to max len of sentence in batch max_length = max(map(len, text_features)) text_features = [txt + [self.PAD] * (max_length - len(txt)) for txt in text_features] #return in "stacked" format, added last_words for excluding padding effects on LSTM return text_features, img_ids, correct_indexes, last_words def magic(self): for ITER in range(self.number_of_iterations): random.shuffle(self.train) train_loss = 0.0 start = time.time() iteration = 0 for batch in self.minibatch(self.train, self.batch_size): self.model.zero_grad() self.optimizer.zero_grad() self.model.hidden = self.model.init_hidden() #Load data for model text_features, h5_ids, correct_index, last_words = self.preprocess(batch) lookup_text_tensor = Variable(torch.LongTensor([text_features])).squeeze() full_img_batch = np.empty([len(batch), self.output_vector_size, self.image_feature_size]) for obs, img_ids in enumerate(h5_ids): for index, h5_id in enumerate(img_ids): full_img_batch[obs, index] = self.img_features[h5_id] full_img_batch = Variable(torch.from_numpy(full_img_batch).type(torch.FloatTensor)) #Target target = Variable(torch.LongTensor([correct_index])).squeeze() #Vector for excluding padding effects last_words = Variable(torch.LongTensor(last_words)) #Run model and calculate loss prediction = self.model(lookup_text_tensor, full_img_batch, last_words) loss = self.criterion(prediction, target) train_loss += loss.data[0] iteration += self.batch_size print(iteration) loss.backward() self.optimizer.step() print("ITERATION %r: train loss/sent=%.4f, time=%.2fs" % (ITER+1, train_loss/len(self.train), time.time() - start)) self.train_loss_values.append(train_loss/len(self.train)) def save_model(self): #Save model torch.save(self.model, self.model_full_path) print("Saved model has test score", self.evaluate(self.test, self.batch_size)) def plot(self): plt.plot(self.train_loss_values, label = "Train loss") plt.legend(loc='best') plt.xlabel("Epochs") plt.ylabel("Loss") plt.title(self.model_name + " - has loss with lr = %.4f, embedding size = %r" % (self.learning_rate, self.embedding_size)) plt.show() def save_data(self): file = open(self.output_file_name, "w") file.write(", ".join(map(str, self.train_loss_values))) file.write("\n") file.write(str(self.evaluate(self.test, self.batch_size))) file.write("\n") file.close()
with open(args.seed_file, 'rb') as f: id_to_sheet = pickle.load(f) data = pickle.load(f) ### BOOTSTRAPPING # get seed sequence numpy_seed_sequence = data[args.seed_index][:, 130:] # convert to tensor + add batch dimension seed_sequence = torch.FloatTensor(numpy_seed_sequence).unsqueeze(0) print('-> INFERENCE') ### SAMPLING LOOP for n in range(args.n_samples): # reset RNN hidden states model.hidden = model.init_hidden() # feed sequence through RNN and get last output o = torch.exp( model.forward(seed_sequence, None, None, temperature=args.temperature)[0, -1, :]) # sample rhythm and chord rhythm = torch.multinomial(o[:13], 1)[0] chord = torch.multinomial(o[13:], 1)[0] if chord == 48 or rhythm == 12: #enforce consistent barlines rhythm = 12 chord = 48 # generate one-hot vector
cfg['data']['data_augmentation']) dataloader = DataLoader(dataset, batch_size=cfg['model']['batch_size'], shuffle=False) print('-> START TRAINING') if cfg['hyperparams']['optimiser'] == 'adam': optimiser = torch.optim.Adam(lstm_model.parameters(), lr=cfg['hyperparams']['learning_rate']) for batch_idx, batch_data in enumerate(dataloader): # zero grad model optimiser.zero_grad() # re-init hidden states lstm_model.hidden = lstm_model.init_hidden() # sort batch based on sequence length sort_batch(batch_data) # put batch on GPU batch_data = to_cuda(batch_data) # feed batch through model Y_output = lstm_model(batch_data[0], batch_data[2], cfg['hyperparams']['sequence_length']) Y_target = batch_data[1] Y_lenghts = batch_data[2] # calculate loss loss = ce_loss(Y_output, Y_target, Y_lenghts)
TestX, TestY, net, lossfunc, optimizer, num_epoch=10, clip=5, Finger=Finger) except KeyboardInterrupt: #save the model print("saving...") net.eval() pred, h = net( torch.from_numpy(TestX).float(), net.init_hidden(TestX.shape[0])) ##############################################TRAINED QUANTIZATION############################################################## elif trained_quantization: print( "Trained Quantization===================================================================" ) figure_name = "/Subject_" + str(Idx_subject) + "_Finger_" + str( Finger) + "_trained_quant" PATH_pre_trained = checkpoint_path + '/s' + str( Idx_subject) + '_f' + str(Finger) + '_trained_model' net.load_state_dict(torch.load(PATH_pre_trained)) k = 8 #initialize the quantiezed weights using the weights from the trained netwrok: net = compute_quantized_weights(net, k)
target = sampled_batch['target'].cuda() target = target.view(target.size(0), -1) # root pos root_p = sampled_batch['root_p'].cuda() # X X = sampled_batch['X'].cuda() if False: print('local_q:', local_q.size(), \ 'root_v:', root_v.size(), \ 'contact:', contact.size(), \ 'root_p_offset:', root_p_offset.size(), \ 'local_q_offset:', local_q_offset.size(), \ 'target:', target.size()) lstm.init_hidden(local_q.size(0)) h_list = [] pred_list = [] pred_list.append(X[:, 0]) # for t in range(opt['model']['seq_length'] - 1): for t in range(lafan_data_train.cur_seq_length - 1): # root pos if t == 0: root_p_t = root_p[:, t] local_q_t = local_q[:, t] local_q_t = local_q_t.view(local_q_t.size(0), -1) contact_t = contact[:, t] root_v_t = root_v[:, t] else: root_p_t = root_pred[0] local_q_t = local_q_pred[0]
def train(feature,label, epochs, model, layer, hidden, save,postfix, index2char, index2phone, phone_map, phone2index): dataset = Feature_Dataset(feature,'train') train_size = int(0.9*len(dataset)) if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 print("Building model and optimizer...") if model == 'LSTM': train_model = LSTM(feature_dim,hidden,layer) elif model == 'C_RNN': group_size = 5 train_model = C_RNN(group_size,feature_dim,hidden,layer) elif model == 'BiLSTM': train_model = LSTM(feature_dim, hidden, layer, bi = True) if USE_CUDA: train_model = train_model.cuda() optimizer = optim.Adam(train_model.parameters(), lr = 0.005) #optimizer = optim.SGD(train_model.parameters(),lr = 0.1) criterion = nn.NLLLoss() if USE_CUDA: criterion = criterion.cuda() for epoch in range(1,epochs+1): print("Epoch {}".format(epoch)) epoch_loss = 0 epoch_edit = 0 for i in tqdm(range(1,train_size+1)): data = dataset[i-1] speaker = data[0] train_model.zero_grad() input_hidden = train_model.init_hidden() train_feature = Variable(data[1].float()) output = train_model(train_feature,input_hidden) output_seq = test_trim(index2char, index2phone, phone_map, phone2index, torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index, [[int(l)] for l in label[speaker]]) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target loss = criterion(output,target) edit = editdistance.eval(output_seq,target_seq) epoch_loss += loss.data[0]/train_size epoch_edit += edit/train_size loss.backward() optimizer.step() print("Negative log-likelihood: {}".format(epoch_loss)) print("Edit distance: {} ".format(epoch_edit)) val_loss = 0 val_edit = 0 for i in tqdm(range(train_size+1,len(dataset)+1)): data = dataset[i-1] speaker = data[0] val_feature = Variable(data[1].float()) output = train_model(val_feature,train_model.init_hidden()) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target val_loss += criterion(output,target).data[0] output_seq = test_trim(index2char,index2phone, phone_map, phone2index,torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index,[[int(l)] for l in label[speaker]]) val_edit += editdistance.eval(output_seq,target_seq) print("Validation loss: {}".format(val_loss/(len(dataset)-train_size))) print("Validation edit distance: {}".format(val_edit/(len(dataset)-train_size))) if epoch%save == 0: directory = os.path.join(SAVE_DIR, feature, model, '{}-{}{}'.format(layer,hidden,postfix)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'model': train_model.state_dict(), 'opt': optimizer.state_dict(), 'val_loss': val_loss/(len(dataset)-train_size), 'val_edit': val_edit/(len(dataset)-train_size), }, os.path.join(directory, '{}.tar'.format(epoch))) print("Finish training")
num_layers_bi=cfg['model']['num_layers_bi'], num_layers_lstm=cfg['model']['num_layers_lstm'], inference=True) model.load_state_dict(torch.load(args.model_file)) model.eval() print('-> READ DATA') dataset = MusicDataset(args.seed_file, cfg['hyperparams']['sequence_length'], False) ### BOOTSTRAPPING print('-> INFERENCE') # get seed sequence and reset states model.hidden_bi, model.hidden_lstm = model.init_hidden() X_in = torch.FloatTensor( dataset.process_sequence(dataset.data[args.seed_index])[0]).unsqueeze(0) # get bi-LSTM output (chord & rhythm processing) bi_output = model.get_bi_output(X_in) ### SAMPLING LOOP sampled_notes = [] # reset states model.hidden_bi, model.hidden_lstm = model.init_hidden() # begin with start token lstm_out = model.process_lstm_sequence(bi_output[:, 0:1, :],
VLoader = DataLoader(vset, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=num_workers) model = LSTM(n_mels, batch_size, num_layers=n_layers) loss_function = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=l_rate) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, verbose=True) """ stateD = torch.load("lstm_399.nn") model.load_state_dict(stateD['state_dict']) """ val_loss_list, val_accuracy_list, epoch_list = [], [], [] loss_function.to(device) model.to(device) model.hidden = model.init_hidden(device) #optimizer.load_state_dict(stateD['optim']) for epoch in tqdm(range(n_epochs), desc='Epoch'): train_running_loss, train_acc = 0.0, 0.0 model.train() for idx, (X, y) in enumerate(tqdm(TLoader, desc="Training")): X, y = X.to(device), y.to(device) model.zero_grad() out = model(X) loss = loss_function(out, y) loss.backward() optimizer.step() train_running_loss += loss.detach().item() train_acc += model.get_accuracy(out, y) if LOG and idx != 0 and idx % log_intervall == 0: