def train(args): common.make_dir(args.checkout_dir) # nnet nnet = RNN((args.left_context + args.right_context + 1) * args.feat_dim, \ hidden_layer, hidden_size, args.num_classes, dropout=dropout) print(nnet) nnet.cuda() criterion = nn.CrossEntropyLoss() optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate) train_dataset = THCHS30(root=args.data_dir, data_type='train') train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch, shuffle=True) test_dataset = THCHS30(root=args.data_dir, data_type='test') test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch, shuffle=True) cross_validate(-1, nnet, test_loader, test_dataset.num_frames) for epoch in range(args.num_epochs): common.train_one_epoch(nnet, criterion, optimizer, train_loader, is_rnn=True) cross_validate(epoch, nnet, test_loader, test_dataset.num_frames) th.save( nnet, common.join_path(args.checkout_dir, 'rnn.{}.pkl'.format(epoch + 1)))
class Model(): def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1): self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu) if gpu >= 0: print("Use GPU %d" % torch.cuda.current_device()) self.decoder.cuda() self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01) self.criterion = nn.CrossEntropyLoss() def train(self, inp, target, chunk_len=200): hidden = self.decoder.init_hidden() self.decoder.zero_grad() loss = 0 for c in range(chunk_len): out, hidden = self.decoder(inp[c], hidden) loss += self.criterion(out, target[c]) loss.backward() self.optimizer.step() return loss.data[0] / chunk_len def generate(self, prime_str, predict_len=100, temperature=0.8): predicted = prime_str hidden = self.decoder.init_hidden() prime_input = char_tensor(prime_str, self.decoder.gpu) # Use prime string to build up hidden state for p in range(len(prime_str) - 1): _, hidden = self.decoder(prime_input[p], hidden) inp = prime_input[-1] for p in range(predict_len): out, hidden = self.decoder(inp, hidden) # sample from network as a multinomial distribution out_dist = out.data.view(-1).div(temperature).exp() out_dist = out.data.view(-1).div(temperature).exp() top_i = torch.multinomial(out_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = char_tensor(predicted_char, self.decoder.gpu) return predicted def save(self): model_name = "char-rnn-gru.pt" if not os.path.exists("save"): os.mkdir("save") torch.save(self.decoder, "save/%s" % model_name) print("--------------> [Checkpoint] Save model into save/%s" % model_name) def load(self, model_path="save/char-rnn-gru.pt"): self.decoder = torch.load(model_path)
def load_model(args, train_len): model = RNN(args.emb_dim, args.hidden_dim) if torch.cuda.is_available(): model.cuda() loss_fnc = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) return model, loss_fnc, optimizer
class Sampler: """ Samples all the detections for a given video and query """ def __init__(self, input_size=600, hidden_size=256, weights_path='models/best/model-epoch-last.pth', num_descriptors=10): self.model = RNN(num_descriptors=num_descriptors, hidden_size=hidden_size, lstm_in_size=input_size) self.model.load_state_dict(torch.load(weights_path)) self.num_descriptors = num_descriptors if torch.cuda.is_available(): self.model.cuda() self.model.eval() def sample_video(self, query, video_name, descriptors_path='extracted_descriptors_100', print_sorted_files=False): self.model.eval() files = glob( os.path.join( descriptors_path, 'descriptors_top' + str(self.num_descriptors) + '_' + video_name + '_' + query + '_*')) files = sorted(files) if print_sorted_files: print( os.path.join( descriptors_path, 'descriptors_top' + str(self.num_descriptors) + '_' + video_name + '_' + query + '_*')) print(files) predictions = None for desc_file in files: descriptors = np.load(desc_file) descriptors = torch.from_numpy(descriptors).type(torch.FloatTensor)\ .reshape((1, descriptors.shape[1], int(descriptors.shape[2]/6), 6)) if torch.cuda.is_available(): descriptors = descriptors.cuda() preds = self.model(descriptors) if predictions is None: predictions = preds else: predictions = torch.cat((predictions, preds), 1) return predictions
def run(): category_lines, all_categories, n_categories = init_cate_dict() rnn = RNN(n_letters, n_categories) rnn.cuda() train_set, test_set = get_data_set(category_lines) random.shuffle(train_set) for e in range(EPOCH): batch_train(rnn, train_set, all_categories) model_testing(rnn, test_set, all_categories) model_path = os.path.join(os.getcwd(), 'rnn3.pkl') torch.save(rnn, model_path) # 保存整个网络
def main(): prepare() print(print_str.format("Begin to loading Data")) net = RNN(90, 256, 2, 2, 0.1) if use_cuda(): net = net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=0.1) cross_entropy = nn.CrossEntropyLoss() if mode == "train": train_data, train_label, train_wav_ids, train_lengths = load_rnn_data( "train", train_protocol, mode=mode, feature_type=feature_type) train_dataset = ASVDataSet(train_data, train_label, wav_ids=train_wav_ids, mode=mode, lengths=train_lengths) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True) for epoch in range(num_epochs): correct = 0 total = 0 total_loss = 0 for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)): data = tmp['data'] label = tmp['label'] length = tmp['length'] max_len = int(torch.max(length)) data = data[:, :max_len, :] label = label[:, :max_len] sorted_length, indices = torch.sort(length.view(-1), dim=0, descending=True) sorted_length = sorted_length.long().numpy() data, label = data[indices], label[indices] data, label = Variable(data), Variable(label).view(-1) if use_cuda(): data, label = data.cuda(), label.cuda() optimizer.zero_grad() outputs, out_length = net(data, sorted_length) loss = cross_entropy(outputs, label) loss.backward() optimizer.step() total_loss += loss.data[0] _, predict = torch.max(outputs, 1) correct += (predict.data == label.data).sum() total += label.size(0) print("Loss: {} \t Acc: {}".format(total_loss / len(train_dataloader), correct / total))
def test_model(args): # Hyper Parameters sequence_length = args.seq_len input_size = args.input_size hidden_size = args.hidden_size num_layers = args.num_layers num_classes = args.num_classes batch_size = args.batch_size num_epochs = args.num_epochs learning_rate = args.learning_rate dropout = args.dropout # Load back the best performing model rnn = RNN('LSTM', input_size, hidden_size, num_layers, num_classes, dropout) if args.cuda: rnn = rnn.cuda() rnn.load_state_dict(torch.load(args.model_path)) # train_dataset = create_dataset('data/train/', timesteps=sequence_length) # train_loader = dataloader(train_dataset, batch_size=batch_size) test_dataset = create_dataset('data/test/', timesteps=sequence_length) test_loader = dataloader(test_dataset, batch_size=batch_size) print('-' * 50) # print('training accuracy = %.4f, test accuracy = %.4f' % (eval_model(rnn, train_loader), eval_model(rnn, test_loader))) # print('training accuracy = %.4f' % eval_model(rnn, train_loader)) print('test accuracy = %.4f' % eval_model(rnn, test_loader)) # print('test f1-score = %.4f' % get_f1score(rnn, test_loader)) print_confusion_matrix(rnn, test_loader)
def run(): start = time.time() category_lines, all_categories, n_categories = init_cate_dict() rnn = RNN(n_letters, n_categories) rnn.cuda() line_tensors, category_tensors = get_batch_train_data(category_lines, all_categories, 100) line_tensors = line_tensors.cuda() category_tensors = category_tensors.cuda() for it in range(1, n_iters + 1): output, loss = train(rnn, category_tensors, line_tensors) # Print iter number, loss, name and guess if it % print_every == 0: print('%d %d%% (%s) %.4f' % (it, it / n_iters * 100, time_since(start), loss)) model_path = os.path.join(os.getcwd(), 'rnn1.pkl') torch.save(rnn, model_path) # 保存整个网络
class TrainModel(): def __init__(self): self.model_2048 = RNN(rnn_size) def trainModel(self): trainDataset = DealDataset_enhanced( root=trainfilertoread, transform=transforms.Compose(transforms=[transforms.ToTensor()])) train_loader = DataLoader(dataset=trainDataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(self.model_2048.parameters(), lr=LR) for epoch in range(NUM_EPOCHS): for index, (board, direc) in enumerate(train_loader): board, direc = Variable(board), Variable(direc) if torch.cuda.is_available(): board, direc = board.cuda(), direc.cuda() self.model_2048.cuda() board = board.view(-1, 4, 4) out = self.model_2048(board) loss = criterion(out, direc) optimizer.zero_grad() loss.backward() optimizer.step() if index % 50 == 0: out = self.model_2048(board) pred = torch.max(out, 1)[1] train_correct = (pred == direc).sum().item() print( 'Epoch: ', epoch, '| train loss: %.4f' % loss, '| test accuracy: %.4f' % (train_correct / (BATCH_SIZE * 1.0))) torch.save(self.model_2048, 'rnn_model_' + str(epoch) + '.pkl') torch.save(self.model_2048, 'rnn_model_final.pkl')
dataloader_train = data.DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4) dataloader_val = data.DataLoader(dataset_val, batch_size=1, shuffle=False, num_workers=4) print(dataset.n_categories) categories = dataset.all_categories # Initialize the network. Hidden size: 1024. # 57 is the length of the one-hot-encoded input at each timestep model = RNN(57, 1024, dataset.n_categories) # criterion = nn.NLLLoss() criterion = nn.CrossEntropyLoss() # comment if not using a gpu model = model.cuda() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), 0.005) #$, momentum = 0.9) n_epochs = 10 for i in range(n_epochs): train(i, dataloader_train, model, criterion, optimizer, categories, 'train') if i % 2 == 1: train(i, dataloader_val, model, criterion, optimizer, categories, 'val')
labels, hidden) batch_losses.append(loss) if batch_i % show_every_n_batches == 0: print('Epoch: {:>4}/{:<4} Loss: {}\n'.format( epoch_i, n_epochs, np.average(batch_losses))) batch_losses = [] return rnn rnn = RNN(vocab_size, output_size, opt.embedding_dim, opt.hidden_dim, opt.n_layers, dropout=0.5) if train_on_gpu: rnn.cuda() optimizer = torch.optim.Adam(rnn.parameters(), lr=opt.learning_rate) criterion = nn.CrossEntropyLoss() # training the model trained_rnn = train_rnn(rnn, opt.batch_size, optimizer, criterion, opt.num_epochs, opt.show_every_n_batches) # saving the trained model save_model('./save/trained_rnn', trained_rnn) print('Model Trained and Saved')
print("===> creating dataloaders ...") end = time.time() train_loader = ClassDataLoader(args.train_path,word_to_index,fasttext_word_to_index,char_to_index,pos_to_index,xpos_to_index,rel_to_index,args.batch_size,predict_flag=0,train=1) val_loader = ClassDataLoader(args.dev_path, word_to_index,fasttext_word_to_index,char_to_index,pos_to_index,xpos_to_index,rel_to_index,args.batch_size,predict_flag=0,train=0) print('===> dataloaders creatinng in: {t:.3f}s'.format(t=time.time()-end)) #create model print("===> creating rnn model ...") model = RNN(word_to_index,fasttext_word_to_index,char_to_index,args.cembedding_size,args.posembedding_size,args.char_hidden_size, args.wembedding_size, fasttext_embed, args.layers, args.hidden_size, args.dropout,args.var_dropout, args.mlp_arc_size, args.mlp_label_size,pos_to_index, xpos_to_index,rel_to_index,args.cuda, batch_first=True) print(model) if args.cuda: model.cuda() #optimizer and losses optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.param_group_dense), lr=args.lr, betas=(0.9, 0.9), eps=1e-12) optimizer_sparse = torch.optim.SparseAdam(filter(lambda p: p.requires_grad, model.param_group_sparse), lr=args.lr, betas=(0.9, 0.9), eps=1e-12) criterion_arc = nn.CrossEntropyLoss(ignore_index=-1) # ignore PADDED targets criterion_label = nn.CrossEntropyLoss(ignore_index=model.rel_to_index['__PADDING__']) # ignore PADDED targets def test(val_loader, model): # switch to evaluate mode model.eval() gold_arcs = np.array([]) pred_arcs = np.array([])
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.json_labels_path, args.bs) model = RNN(lstm_hidden_size=args.hidden_size) if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() # model_loss = Loss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch in (3, 7, 15): if epoch == 3: p = 2 / 3 if epoch == 7: p = 1 / 3 if epoch == 15: p = 0 loss_epoch = [] loss1_epoch = [] loss2_epoch = [] for step, (tensors, masks, gt) in enumerate(data_loader): if torch.cuda.is_available(): tensors = tensors.cuda() masks = masks.cuda() gt = gt.cuda() model.zero_grad() out, att = model(tensors, masks, gt, p) loss1 = model_loss(out, gt) # att[:, :-1, :] -> attention produced (location in the next frame) until the last frame -1 (49) # gt[:, 1:, :] -> gt from the second frame until the last frame (49) loss2 = model_loss(att[:, :-1, :], gt[:, 1:, :]) loss = loss1 + loss2 loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) loss1_epoch.append(loss1.cpu().detach().numpy()) loss2_epoch.append(loss2.cpu().detach().numpy()) #print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + # str(len(data_loader)) + ' - Loss: ' + str(float(loss)) + " (Loss1: " + str(float(loss1)) # + ", Loss2: " + str(float(loss2)) + ")") loss_epoch_mean = np.mean(np.array(loss_epoch)) loss1_epoch_mean = np.mean(np.array(loss_epoch)) loss2_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean) + " (loss1: " + str(loss1_epoch_mean) + ", loss2: " + str(loss2_epoch_mean) + ")") if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
else: torch.set_default_tensor_type('torch.FloatTensor') weights = args.weights n_hidden = 128 batch_size = args.batch_size num_workers = args.num_workers log_iters = args.log_iters weights = PROJECT_DIR + args.weights print('Loading weights...') rnn = RNN(N_LETTERS, n_hidden, N_GENDERS) if args.cuda: rnn = rnn.cuda() rnn.load_state_dict(torch.load(weights)) rnn.eval() def _evaluate(name_tensor): hidden = rnn.init_hidden() for letter_tensor in name_tensor: letter_tensor.data.unsqueeze_(0) output, hidden = rnn(letter_tensor, hidden) return output def predict(name, n_predictions=2):
class dl_model(): def __init__(self, mode): # Read config fielewhich contains parameters self.config = config self.mode = mode # Architecture name decides prefix for storing models and plots feature_dim = self.config['vocab_size'] self.arch_name = '_'.join( [self.config['rnn'], str(self.config['num_layers']), str(self.config['hidden_dim']), str(feature_dim)]) print("Architecture:", self.arch_name) # Change paths for storing models self.config['models'] = self.config['models'].split('/')[0] + '_' + self.arch_name + '/' self.config['plots'] = self.config['plots'].split('/')[0] + '_' + self.arch_name + '/' # Make folders if DNE if not os.path.exists(self.config['models']): os.mkdir(self.config['models']) if not os.path.exists(self.config['plots']): os.mkdir(self.config['plots']) if not os.path.exists(self.config['pickle']): os.mkdir(self.config['pickle']) self.cuda = (self.config['cuda'] and torch.cuda.is_available()) # load/initialise metrics to be stored and load model if mode == 'train' or mode == 'test': self.plots_dir = self.config['plots'] # store hyperparameters self.total_epochs = self.config['epochs'] self.test_every = self.config['test_every_epoch'] self.test_per = self.config['test_per_epoch'] self.print_per = self.config['print_per_epoch'] self.save_every = self.config['save_every'] self.plot_every = self.config['plot_every'] # dataloader which returns batches of data self.train_loader = dataloader('train', self.config) self.test_loader = dataloader('test', self.config) #declare model self.model = RNN(self.config) self.start_epoch = 1 self.edit_dist = [] self.train_losses, self.test_losses = [], [] else: self.model = RNN(self.config) if self.cuda: self.model.cuda() # resume training from some stored model if self.mode == 'train' and self.config['resume']: self.start_epoch, self.train_losses, self.test_losses = self.model.load_model(mode, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) self.start_epoch += 1 # load best model for testing/inference elif self.mode == 'test' or mode == 'test_one': self.model.load_model(mode, self.config['rnn'], self.model.num_layers, self.model.hidden_dim) #whether using embeddings if self.config['use_embedding']: self.use_embedding = True else: self.use_embedding = False # Train the model def train(self): print("Starting training at t =", datetime.datetime.now()) print('Batches per epoch:', len(self.train_loader)) self.model.train() # when to print losses during the epoch print_range = list(np.linspace(0, len(self.train_loader), self.print_per + 2, dtype=np.uint32)[1:-1]) if self.test_per == 0: test_range = [] else: test_range = list(np.linspace(0, len(self.train_loader), self.test_per + 2, dtype=np.uint32)[1:-1]) for epoch in range(self.start_epoch, self.total_epochs + 1): try: print("Epoch:", str(epoch)) epoch_loss = 0.0 # i used for monitoring batch and printing loss, etc. i = 0 while True: i += 1 # Get batch of inputs, labels, missed_chars and lengths along with status (when to end epoch) inputs, labels, miss_chars, input_lens, status = self.train_loader.return_batch() if self.use_embedding: inputs = torch.from_numpy(inputs).long() #embeddings should be of dtype long else: inputs = torch.from_numpy(inputs).float() #convert to torch tensors labels = torch.from_numpy(labels).float() miss_chars = torch.from_numpy(miss_chars).float() input_lens = torch.from_numpy(input_lens).long() if self.cuda: inputs = inputs.cuda() labels = labels.cuda() miss_chars = miss_chars.cuda() input_lens = input_lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, input_lens, miss_chars) loss, miss_penalty = self.model.calculate_loss(outputs, labels, input_lens, miss_chars, self.cuda) loss.backward() # clip gradient # torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config['grad_clip']) self.model.optimizer.step() # store loss epoch_loss += loss.item() # print loss if i in print_range and epoch == 1: print('After %i batches, Current Loss = %.7f' % (i, epoch_loss / i)) elif i in print_range and epoch > 1: print('After %i batches, Current Loss = %.7f, Avg. Loss = %.7f, Miss Loss = %.7f' % ( i, epoch_loss / i, np.mean(np.array([x[0] for x in self.train_losses])), miss_penalty)) # test model periodically if i in test_range: self.test(epoch) self.model.train() # Reached end of dataset if status == 1: break #refresh dataset i.e. generate a new dataset from corpurs if epoch % self.config['reset_after'] == 0: self.train_loader.refresh_data(epoch) #take the last example from the epoch and print the incomplete word, target characters and missed characters random_eg = min(np.random.randint(self.train_loader.batch_size), inputs.shape[0]-1) encoded_to_string(inputs.cpu().numpy()[random_eg], labels.cpu().numpy()[random_eg], miss_chars.cpu().numpy()[random_eg], input_lens.cpu().numpy()[random_eg], self.train_loader.char_to_id, self.use_embedding) # Store tuple of training loss and epoch number self.train_losses.append((epoch_loss / len(self.train_loader), epoch)) # save model if epoch % self.save_every == 0: self.model.save_model(False, epoch, self.train_losses, self.test_losses, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) # test every 5 epochs in the beginning and then every fixed no of epochs specified in config file # useful to see how loss stabilises in the beginning if epoch % 5 == 0 and epoch < self.test_every: self.test(epoch) self.model.train() elif epoch % self.test_every == 0: self.test(epoch) self.model.train() # plot loss and accuracy if epoch % self.plot_every == 0: self.plot_loss_acc(epoch) except KeyboardInterrupt: #save model before exiting print("Saving model before quitting") self.model.save_model(False, epoch-1, self.train_losses, self.test_losses, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) exit(0) # test model def test(self, epoch=None): self.model.eval() print("Testing...") print('Total batches:', len(self.test_loader)) test_loss = 0 #generate a new dataset form corpus self.test_loader.refresh_data(epoch) with torch.no_grad(): while True: # Get batch of input, labels, missed characters and lengths along with status (when to end epoch) inputs, labels, miss_chars, input_lens, status = self.test_loader.return_batch() if self.use_embedding: inputs = torch.from_numpy(inputs).long() else: inputs = torch.from_numpy(inputs).float() labels = torch.from_numpy(labels).float() miss_chars = torch.from_numpy(miss_chars).float() input_lens= torch.from_numpy(input_lens).long() if self.cuda: inputs = inputs.cuda() labels = labels.cuda() miss_chars = miss_chars.cuda() input_lens = input_lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, input_lens, miss_chars) loss, miss_penalty = self.model.calculate_loss(outputs, labels, input_lens, miss_chars, self.cuda) test_loss += loss.item() # Reached end of dataset if status == 1: break #take a random example from the epoch and print the incomplete word, target characters and missed characters #min since the last batch may not be of length batch_size random_eg = min(np.random.randint(self.train_loader.batch_size), inputs.shape[0]-1) encoded_to_string(inputs.cpu().numpy()[random_eg], labels.cpu().numpy()[random_eg], miss_chars.cpu().numpy()[random_eg], input_lens.cpu().numpy()[random_eg], self.train_loader.char_to_id, self.use_embedding) # Average out the losses and edit distance test_loss /= len(self.test_loader) print("Test Loss: %.7f, Miss Penalty: %.7f" % (test_loss, miss_penalty)) # Store in lists for keeping track of model performance self.test_losses.append((test_loss, epoch)) # if testing loss is minimum, store it as the 'best.pth' model, which is used during inference # store only when doing train/test together i.e. mode is train if test_loss == min([x[0] for x in self.test_losses]) and self.mode == 'train': print("Best new model found!") self.model.save_model(True, epoch, self.train_losses, self.test_losses, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) return test_loss def predict(self, string, misses, char_to_id): """ called during inference :param string: word with predicted characters and blanks at remaining places :param misses: list of characters which were predicted but game feedback indicated that they are not present :param char_to_id: mapping from characters to id """ id_to_char = {v:k for k,v in char_to_id.items()} #convert string into desired input tensor if self.use_embedding: encoded = np.zeros((len(char_to_id))) for i, c in enumerate(string): if c == '*': encoded[i] = len(id_to_char) - 1 else: encoded[i] = char_to_id[c] inputs = np.array(encoded)[None, :] inputs = torch.from_numpy(inputs).long() else: encoded = np.zeros((len(string), len(char_to_id))) for i, c in enumerate(string): if c == '*': encoded[i][len(id_to_char) - 1] = 1 else: encoded[i][char_to_id[c]] = 1 inputs = np.array(encoded)[None, :, :] inputs = torch.from_numpy(inputs).float() #encode the missed characters miss_encoded = np.zeros((len(char_to_id) - 1)) for c in misses: miss_encoded[char_to_id[c]] = 1 miss_encoded = np.array(miss_encoded)[None, :] miss_encoded = torch.from_numpy(miss_encoded).float() input_lens = np.array([len(string)]) input_lens= torch.from_numpy(input_lens).long() #pass through model output = self.model(inputs, input_lens, miss_encoded).detach().cpu().numpy()[0] #sort predictions sorted_predictions = np.argsort(output)[::-1] #we cannnot consider only the argmax since a missed character may also get assigned a high probability #in case of a well-trained model, we shouldn't observe this return [id_to_char[x] for x in sorted_predictions] def plot_loss_acc(self, epoch): """ take train/test loss and test accuracy input and plot it over time :param epoch: to track performance across epochs """ plt.clf() fig, ax1 = plt.subplots() ax1.set_xlabel('Epoch') ax1.set_ylabel('Loss') ax1.plot([x[1] for x in self.train_losses], [x[0] for x in self.train_losses], color='r', label='Train Loss') ax1.plot([x[1] for x in self.test_losses], [x[0] for x in self.test_losses], color='b', label='Test Loss') ax1.tick_params(axis='y') ax1.legend(loc='upper left') fig.tight_layout() # otherwise the right y-label is slightly clipped plt.grid(True) plt.legend() plt.title(self.arch_name) filename = self.plots_dir + 'plot_' + self.arch_name + '_' + str(epoch) + '.png' plt.savefig(filename) print("Saved plots")
# Print train-dataset statistics text_len = len(train_data) all_characters = tuple(sorted(set(train_data))) n_characters = len(all_characters) print('Total characters: {} - Total vocab: {}'.format(text_len, n_characters)) decoder = RNN(n_characters, hidden_size, n_characters, n_layers) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr) decoder_criterion = torch.nn.CrossEntropyLoss() if not torch.cuda.is_available(): print('CUDA is not available. Training on CPU ...\n') else: print('CUDA is available! Training on GPU ...\n') decoder.cuda() start = time.time() for epoch in range(1, n_epochs): # Get a random training chunk for each epoch input_chunk, target_chunk = random_chunk(chunk_len, batch_size, 'train', train_data, len(train_data), all_characters) # Train and calculate the loss loss = net_train(input_chunk, target_chunk, chunk_len, decoder, batch_size, decoder_criterion, decoder_optimizer) # Print every 100 epochs and calculate the validation loss if epoch % 100 == 0: input_valid_chunk, target_valid_chunk = random_chunk( chunk_len, batch_size, 'valid', validation_data,
def train_model(args): # Hyper Parameters sequence_length = args.seq_len input_size = args.input_size hidden_size = args.hidden_size num_layers = args.num_layers num_classes = args.num_classes batch_size = args.batch_size num_epochs = args.num_epochs learning_rate = args.learning_rate dropout = args.dropout # Create the dataset train_dataset = create_dataset('data/train/', timesteps=sequence_length) train_loader = dataloader(train_dataset, batch_size=batch_size) test_dataset = create_dataset('data/test/', timesteps=sequence_length) test_loader = dataloader(test_dataset, batch_size=batch_size) # Define model and loss rnn = RNN('LSTM', input_size, hidden_size, num_layers, num_classes, dropout) criterion = nn.CrossEntropyLoss() if args.cuda: # switch to cuda rnn, criterion = rnn.cuda(), criterion.cuda() # Adam Optimizer optimizer = torch.optim.Adam(rnn.parameters(), learning_rate) # Train the Model i = 0 # updates best_test_acc = 0.0 for epoch in range(num_epochs): # Generate random batches every epoch train_loader = dataloader(train_dataset, batch_size) for batch_X, batch_y in train_loader: # points = pack_padded_sequence(Variable(torch.from_numpy(batch_X)), batch_seq_lens) points = Variable(torch.from_numpy(batch_X)) labels = Variable(torch.from_numpy(batch_y)) if args.cuda: points, labels = points.cuda(), labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() outputs = rnn(points) # final hidden state # outputs = pad_packed_sequence(outputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() print('Epoch [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, loss.data[0])) if i % 100 == 0: # every 100 updates, evaluate on test set # print("training accuracy = %.4f" % eval_model(rnn, train_loader)) test_acc = eval_model(rnn, test_loader) print("test accuracy = %.4f" % test_acc) if test_acc > best_test_acc: print("best test accuracy found") best_test_acc = test_acc torch.save(rnn.state_dict(), 'rnn_best.pkl') i += 1
def train(args): if args.create_dataset: df = pd.read_csv("../data/endpoints_calculated_std.csv") smiles = df["smiles"].to_list() data = df[df.columns[3:]].to_numpy() print("Building LegoModel") legoModel = LegoGram(smiles = smiles, nworkers=8) torch.save(legoModel, "legoModel.pk") print("Building sampler") sampler = LegoGramRNNSampler(legoModel) torch.save(sampler, "sampler.pk") print("Constracting dataset") dataset = MolecularNotationDataset(smiles,sampler,data) torch.save(dataset,'lg.bin') else: dataset = torch.load('lg.bin') train_loader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collect) device = torch.device('cpu') if args.cuda: device = torch.device('cuda') model = RNN(voc_size=dataset.vocsize, device=device) model.train() model.cuda() print(f"Model has been created on device {device}") smiles_dataset = dataset.smiles optimizer = optim.Adam(model.parameters(), lr=args.lr) loss_f = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) writer = SummaryWriter(comment = args.name_task) losses = [] out_counter = 0 cnt = 0 for epoch in range(args.num_epochs): loss_list =[] for iteration, (batch, lengths) in enumerate(tqdm(train_loader)): batch = batch.cuda() logits, endp_model = model(batch, lengths) print(logits.shape) print(batch.shape) loss = loss_f(logits[:, :, :-1], batch[:, 1:]) loss_list.append(loss.item()) writer.add_scalar("CrossEntropyLoss", loss_list[-1], iteration+epoch*len(train_loader)) optimizer.zero_grad() loss.backward() optimizer.step() if iteration % args.print_every == 0 and iteration > 0: model.eval() number_generate = 100 res = model.sample(number_generate, dataset.model) writer.add_text("Molecules after generator", json.dumps([res])) valid = len(res) * 100 / number_generate print(res) print("valid : {} %".format(valid)) writer.add_scalar("Valid", valid, cnt) res = [robust_standardizer(mol) for mol in res] res = list(filter(lambda x: x is not None, res)) unique = len([elem for elem in res if elem not in smiles_dataset]) writer.add_text("Unique mols", json.dumps([res])) print(f"There are unique mols {unique}") print(res) writer.add_scalar("Unique", unique, cnt) cnt += 1 model.train() writer.flush() epoch_loss = np.mean(loss_list) print(f"Loss on epoch {epoch } is {epoch_loss}") if out_counter < args.stop_after and epoch>0: if losses[-1] <= epoch_loss: out_counter += 1 else: out_counter = 0 torch.save(model, "experiments/" + args.name_task + "/model.pt") if epoch == 0: torch.save(model, "experiments/" + args.name_task + "/model.pt") losses.append(epoch_loss) return losses
def main(args): if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.descriptors_path, args.json_labels_path, args.bs) model = RNN(num_descriptors=args.num_descriptors, hidden_size=args.hidden_size, lstm_in_size=args.input_size) if torch.cuda.is_available(): model.cuda() model.train() # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) optimizer = optim.Adam(model.parameters(), lr=args.lr) # model_loss = torch.nn.BCEWithLogitsLoss() model_loss = Loss() losses = [] try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate loss_epoch = [] for step, (descriptors, labels) in enumerate(data_loader): if torch.cuda.is_available(): descriptors = descriptors.cuda() labels = labels.cuda() model.zero_grad() attention = model(descriptors) loss = model_loss(attention, labels) loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + ' - Loss: ' + str(float(loss))) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/models_361_dropout', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.bs, args.json_labels_path, num_workers=8) model = RNN() if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch < 3: p = 1.0 elif epoch >= 3 and epoch < 6: p = 0.5 elif epoch >= 6 and epoch < 9: p = 0.25 else: p = 0.0 loss_epoch = [] for step, (feat_maps, gt) in enumerate(data_loader): if torch.cuda.is_available(): feat_maps = feat_maps.cuda() gt = gt.cuda() model.zero_grad() out = model(feat_maps, gt, p) loss = model_loss(out, gt) loss.backward() optimizer.step() loss_step = loss.cpu().detach().numpy() loss_epoch.append(loss_step) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + " - Loss: " + str(loss_step)) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def main(arguments=sys.argv[1:]): parser = argparse.ArgumentParser(prog="Sparse Evaluation on MNIST dataset") parser.add_argument('--nhid', type=int, default=128, help='number of hidden units per layer') parser.add_argument('-ws', '--w_sp', type=float, nargs='+', default=[0, 0, 0, 0], help="Weight sparsity setting.") parser.add_argument('-wt', '--w_th', type=float, default=0, help="Weight threshold setting.") parser.add_argument('-hs', '--h_sp', type=float, nargs='+', default=[0., 0.], help="Hidden state sparsity setting.") parser.add_argument('-ht', '--h_th', type=float, nargs='+', default=[0., 0.], help="Hidden state threshold setting.") parser.add_argument('-b', '--size_block', type=int, default=-1, help="Block size for hidden state sparsification.") parser.add_argument('-v', '--verbose', action='store_true', help="Verbose mode.") parser.add_argument('-model', '--model_path', default='MNIST/models/nhid:128-nlayer:2-epoch:10.ckpt', help="Model path.") args = parser.parse_args(arguments) # load model for GPU / CPU if torch.cuda.is_available(): state_dict = torch.load(args.model_path) else: state_dict = torch.load(args.model_path, map_location='cpu') #sparsity_dict = {} for k, v in state_dict.items(): if 'lstm1' in k: if 'weight_x' in k: state_dict[k] = set_to_zero_sparsity(v, sparsity=args.w_sp[0]) if 'weight_h' in k: state_dict[k] = set_to_zero_sparsity(v, sparsity=args.w_sp[1]) if 'lstm2' in k: if 'weight_x' in k: state_dict[k] = set_to_zero_sparsity(v, sparsity=args.w_sp[2]) if 'weight_h' in k: state_dict[k] = set_to_zero_sparsity(v, sparsity=args.w_sp[3]) # save weight and bias for RISCV simulation # l1_w = np.hstack((state_dict['lstm1.cell_f.weight_xf'].numpy(), state_dict['lstm1.cell_f.weight_xi'].numpy(), state_dict['lstm1.cell_f.weight_xu'].numpy(), state_dict['lstm1.cell_f.weight_xo'].numpy())) # l1_u = np.hstack((state_dict['lstm1.cell_f.weight_hf'].numpy(), state_dict['lstm1.cell_f.weight_hi'].numpy(), state_dict['lstm1.cell_f.weight_hu'].numpy(), state_dict['lstm1.cell_f.weight_ho'].numpy())) # # l2_w = np.hstack((state_dict['lstm2.cell_f.weight_xf'].numpy(), state_dict['lstm2.cell_f.weight_xi'].numpy(), state_dict['lstm2.cell_f.weight_xu'].numpy(), state_dict['lstm2.cell_f.weight_xo'].numpy())) # l2_u = np.hstack((state_dict['lstm2.cell_f.weight_hf'].numpy(), state_dict['lstm2.cell_f.weight_hi'].numpy(), state_dict['lstm2.cell_f.weight_hu'].numpy(), state_dict['lstm2.cell_f.weight_ho'].numpy())) # # l1_w = np.transpose(l1_w) # l1_u = np.transpose(l1_u) # l2_w = np.transpose(l2_w) # l2_u = np.transpose(l2_u) # # l1_b = np.hstack((state_dict['lstm1.cell_f.bias_f'].numpy(), state_dict['lstm1.cell_f.bias_i'].numpy(), state_dict['lstm1.cell_f.bias_u'].numpy(), state_dict['lstm1.cell_f.bias_o'].numpy())) # l2_b = np.hstack((state_dict['lstm2.cell_f.bias_f'].numpy(), state_dict['lstm2.cell_f.bias_i'].numpy(), state_dict['lstm2.cell_f.bias_u'].numpy(), state_dict['lstm2.cell_f.bias_o'].numpy())) # # np.savez('mnist-l1.npz', w=l1_w, u=l1_u, b=l1_b) # np.savez('mnist-l2.npz', w=l2_w, u=l2_u, b=l2_b) test_dataset = torchvision.datasets.MNIST(root='./MNIST/data/', train=False, transform=transforms.ToTensor()) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = RNN(input_size, args.nhid, num_layers, num_classes) model.load_state_dict(state_dict) if torch.cuda.is_available(): model.cuda() # model.half() # TODO: trans the model to half-precision float with torch.no_grad(): correct = 0 total = 0 #hidden = model.init_hidden(batch_size) sparse_dict = {"LSTM1": 0., "LSTM2": 0.} iteration = 0 for images, labels in test_loader: # output vec_x for RISC-V simulation # f = open('mnist_x.txt', 'w') # images_flat = images.view(-1) # for elem in images_flat: # f.write('{:f},'.format(elem)) # f.close() hidden = (torch.zeros(num_layers, batch_size, args.nhid).to(device), torch.zeros(num_layers, batch_size, args.nhid).to(device)) images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs, hidden, cur_dict = model(images, hidden, sparse=True, h_th=args.h_th, h_sp=args.h_sp, block=args.size_block) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() iteration += 1 for k, v in sparse_dict.items(): sparse_dict[k] += cur_dict[k] accuracy = 100.0 * correct / total print( '|| Test Accuracy : {:.5f} || LSTM1 sparsity: {:.5f} || LSTM2 Sparsity: {:.5f} ||' .format(accuracy, sparse_dict['LSTM1'] / iteration, sparse_dict['LSTM2'] / iteration))
def main(batch_size, embed_size, num_hiddens, num_layers, ln_hidden, ln_output, rec_unit, learning_rate=1e-4, log_step=10, num_epochs=50, save_step=100, ngpu=1): # hyperparameters num_workers = 0 checkpoint_dir = 'checkpoint' # Image Preprocessing transform = { 'train': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), } # load data vocab = build_vocab(path='relative_captions_shoes.json') train_data, train_loader = data_and_loader( path='relative_captions_shoes.json', mode='train', vocab=vocab, transform=transform['train'], batch_size=batch_size) val_data, val_loader = data_and_loader(path='relative_captions_shoes.json', mode='valid', vocab=vocab, transform=transform['val'], batch_size=batch_size) losses_val = [] losses_train = [] # Build the models initial_step = initial_epoch = 0 encoder = CNN(embed_size) ### embed_size: power of 2 middle = fcNet(embed_size, ln_hidden, ln_output) decoder = RNN(ln_output, num_hiddens, len(vocab), num_layers, rec_unit=rec_unit, drop_out=0.1) # Loss, parameters & optimizer loss_fun = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): print('Epoch: {}'.format(epoch)) for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = Variable(images) captions = Variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() middle.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPUs features = nn.parallel.data_parallel( encoder, images, range(ngpu)) rnn_input = nn.parallel.data_parallel( middle, features, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) rnn_input = middle(features) outputs = decoder(rnn_input, captions, lengths) train_loss = loss_fun(outputs, targets) losses_train.append(train_loss.item()) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = Variable(images) captions = Variable(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] #features = encoder(target_images) - encoder(refer_images) features = encoder(images) rnn_input = middle(features) outputs = decoder(rnn_input, captions, lengths) val_loss = loss_fun(outputs, targets) batch_loss_val.append(val_loss.item()) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(rnn_input) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: save_models(encoder, middle, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, middle, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))