def main(): utils.init_out_dir() last_epoch = utils.get_last_checkpoint_step() if last_epoch >= args.epoch: exit() if last_epoch >= 0: my_log('\nCheckpoint found: {}\n'.format(last_epoch)) else: utils.clear_log() model = RNN(args.device, Number_qubits = args.N,charset_length = args.charset_length,\ hidden_size = args.hidden_size, num_layers = args.num_layers) model.train(False) print('number of qubits: ', model.Number_qubits) my_log('Total nparams: {}'.format(utils.get_nparams(model))) model.to(args.device) params = [x for x in model.parameters() if x.requires_grad] optimizer = torch.optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) if last_epoch >= 0: utils.load_checkpoint(last_epoch, model, optimizer) # Quantum state ghz = GHZ(Number_qubits=args.N) c_fidelity = classical_fidelity(model, ghz) # c_fidelity = cfid(model, ghz, './data.txt') print(c_fidelity)
def rnn_train_single(rnn: RNN, x, y, learning_rate, criterion=nn.MSELoss()): hidden = rnn.init_hidden() rnn.zero_grad() for i in range(x.size()[0]): output, hidden = rnn(x[i], hidden) loss = criterion(output, y) loss.backward() # Update paramteres based on gradient for p in rnn.parameters(): p.data.add_(p.grad.data, alpha=-learning_rate) return output, loss.item()
embedding_dim = args.embedding_dim #256 hidden_dim = args.hidden_dim #512 n_layers = args.n_layers #2 show_every_n_batches = args.stat_freq # Show stats for every n number of batches rnn = RNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5) if use_gpu: rnn.cuda() # defining loss and optimization functions for training optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() # training the model trained_rnn = train_rnn(rnn, train_loader, batch_size, optimizer, criterion, num_epochs, use_gpu, show_every_n_batches) # saving the trained model preprocessor.save_model('./save/trained_rnn', trained_rnn) print("Generating a script...") gen_length = args.script_len # modify the length to your preference prime_word = args.prime_word.lower() # elaine
print("Neural net instanciated.") if torch.cuda.is_available(): rnn.cuda() def char_from_output(output): top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data char_i = top_i[0][0] return int_to_char[char_i], char_i def randomTrainingExample(): i = random.randint(0,len(dataX)-1) return dataX[i],dataY[i],Variable(sequence_to_tensor(dataX[i])),Variable(letter_to_tensor(dataY[i])) learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn criterion = nn.NLLLoss() optimizer = optim.SGD(rnn.parameters(), lr=learning_rate) def train(target_tensor, input_tensor): hidden = rnn.initHidden() #reset gradients, else they accumulate optimizer.zero_grad() rnn.zero_grad() #feed all chars in the sequence for i in range(input_tensor.size()[0]): output, hidden = rnn(input_tensor[i], hidden) loss = criterion(output, target_tensor) loss.backward()
## Test with only 30% of training set initially for testing #trainingSet = trainingSet[:math.floor(len(trainingSet)*0.1)] n_epochs = 200 print_every = 500 frac_train = 0.90 n_hidden = 512 learning_rate = 0.001 model = RNN(20,n_hidden,2) criterion = nn.NLLLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.8,nesterov=True) if os.path.isfile('all_pos_148.ckpt'): checkpoint = torch.load('all_pos_148.ckpt') model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) print("=> loaded checkpoint ") with open(logfile_name,'a') as outfile: outfile.write("=> loaded checkpoint\n") #optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate) #optimizer = torch.optim.ASGD(model.parameters(),lr=learning_rate) def train(category_tensor,line_tensor): model.zero_grad() hidden = model.init_hidden()
n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") model = model.to(device) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss() if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE lr = args.initial_lr # These variables are for learning rate schedule (which you are not asked to use) # see SGD_LR_SCHEDULE in the main loop lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs ############################################################################### # # DEFINE COMPUTATIONS FOR PROCESSING ONE EPOCH # ###############################################################################
povm = Pauli4() dataset = POVMData('../notebooks/data/TFIM_training_data.npz', povm) dataset = POVMData(dataset.filename, dataset.povm_set, data=dataset.data[:50000]) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, num_workers=1) model = RNN(hidden_size, n_outcomes, n_qubits, num_gru_layers=num_gru_layers, loss_by_step=True, batchSize=batchSize).to(device).double() optimizer = optim.Adam(model.parameters(), lr=lr) train_losses = [] KLs = [] fidelities = [] for epoch in range(1, num_epochs + 1): #train(epoch, train_losses) train_loss = 0 for batch_idx, data in enumerate(train_loader): data = data.to(device).permute( 1, 0, 2) #.reshape(n_qubits, batchSize, n_outcomes) #if batch_idx == 1: # print(data[:,0]) optimizer.zero_grad() outputs, hidden, outcome_probs, loss, inputX = model(data)
args.transform, args.dataset_name, ) train_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': '%s - Train Loss' % basename}) test_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': '%s - Test Loss' % basename}) use_cuda = args.cuda hidden_size = args.hidden_size # input after only good parts of vae taken input_size = 50 lr = 1e-4 rnn = RNN(input_size, hidden_size) optim = optim.Adam(rnn.parameters(), lr=lr, weight_decay=1e-6) if use_cuda: rnn.cuda() rnn_epoch = 0 total_passes = 0 train_loss = [] test_loss = [] if args.rnn_model_loadpath is not None: if os.path.exists(args.rnn_model_loadpath): rnn_model_dict = torch.load(args.rnn_model_loadpath) rnn.load_state_dict(rnn_model_dict['state_dict']) optim.load_state_dict(rnn_model_dict['optimizer']) rnn_epoch = rnn_model_dict['epoch'] try: total_passes = rnn_model_dict['total_passes']
class HierarchicalVariationalAutoEncoder(nn.Module): def __init__(self, vocab_size=1000, input_dimension=300, hidden_dimension=512, num_layers=2, drop_prob=None, use_context_enhanced_rnn=True, use_pretrained_weights=False, min_sentence_length=5, max_sentence_length=9, min_paragraph_length=3, max_paragraph_length=3, max_rows=None, max_sentences_in_paragraph_loading=None, max_paragraphs=None): super(HierarchicalVariationalAutoEncoder, self).__init__() self.vocab_size = vocab_size self.input_dimension = input_dimension self.encoder_hidden_dimension = hidden_dimension self.decoder_hidden_dimension = hidden_dimension self.guide_hidden_dimension = hidden_dimension * 2 self.num_layers = num_layers self.drop_prob = drop_prob self.use_pretrained_weights = use_pretrained_weights self.min_sentence_length = min_sentence_length self.max_sentence_length = max_sentence_length self.min_paragraph_length = min_paragraph_length self.max_paragraph_length = max_paragraph_length self.identifier = '{}tokens_{}smin_{}smax_{}pmin_{}pmax_{}hidden_{}layers_{}drop_{}'.format(self.vocab_size, \ self.min_sentence_length, self.max_sentence_length, self.min_paragraph_length, self.max_paragraph_length, \ self.encoder_hidden_dimension, self.num_layers, 'no' if self.drop_prob is None else round(self.drop_prob * 100), \ 'contextenhancedrnn' if use_context_enhanced_rnn else 'simplernn') self._init_paths() self._load_data(max_rows=max_rows, max_sentences_in_paragraph_loading=max_sentences_in_paragraph_loading, \ max_paragraphs=max_paragraphs) self._init_encoder(use_pretrained_weights) self._init_decoder(use_pretrained_weights, use_context_enhanced_rnn) self._init_guide(use_pretrained_weights) self.loss = SequenceVariationalLoss() self.vae_error_rate = ErrorRate() self.guide_loss = nn.L1Loss() self._init_cuda() def _init_paths(self): self.encoder_weights = 'weights/' + self.identifier + '_encoder.weights' self.decoder_weights = 'weights/' + self.identifier + '_decoder.weights' self.guide_weights = 'weights/' + self.identifier + '_guide.weights' self.vae_train_loss_path = 'results/{}_vae_train_loss.npy'.format( self.identifier) self.vae_test_loss_path = 'results/{}_vae_test_loss.npy'.format( self.identifier) self.vae_train_error_path = 'results/{}_vae_train_error.npy'.format( self.identifier) self.vae_test_error_path = 'results/{}_vae_test_error.npy'.format( self.identifier) self.guide_train_loss_path = 'results/{}_guide_train_loss.npy'.format( self.identifier) self.guide_test_loss_path = 'results/{}_guide_test_loss.npy'.format( self.identifier) self.guide_train_error_path = 'results/{}_guide_train_error.npy'.format( self.identifier) self.guide_test_error_path = 'results/{}_guide_test_error.npy'.format( self.identifier) def _init_encoder(self, use_pretrained_weights): if use_pretrained_weights == True and os.path.exists( self.encoder_weights): if torch.cuda.is_available(): self.encoder = torch.load(self.encoder_weights) else: self.encoder = torch.load( self.encoder_weights, map_location=lambda storage, loc: storage) else: self.encoder = Encoder(self.input_dimension, self.encoder_hidden_dimension, self.num_layers) def _init_decoder(self, use_pretrained_weights, use_context_enhanced_rnn): if use_pretrained_weights == True and os.path.exists( self.decoder_weights): if torch.cuda.is_available(): self.decoder = torch.load(self.decoder_weights) else: self.decoder = torch.load( self.decoder_weights, map_location=lambda storage, loc: storage) else: context_dimension = self.decoder_hidden_dimension if not use_context_enhanced_rnn: context_dimension = None self.decoder = Decoder(self.input_dimension, len(self.embeddings), self.decoder_hidden_dimension, \ self.num_layers, context_dimension) def _init_guide(self, use_pretrained_weights): if use_pretrained_weights == True and os.path.exists( self.guide_weights): if torch.cuda.is_available(): self.guide = torch.load(self.guide_weights) else: self.guide = torch.load( self.guide_weights, map_location=lambda storage, loc: storage) else: self.guide = RNN(self.encoder_hidden_dimension * 2, self.guide_hidden_dimension, self.num_layers) def _init_cuda(self): if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.guide = self.guide.cuda() self.loss = self.loss.cuda() self.vae_error_rate = self.vae_error_rate.cuda() self.guide_loss = self.guide_loss.cuda() def _load_data(self, glove_file = 'glove_9902.txt', sentence_file = 'books_in_sentences.txt', \ token_file = "most_common_tokens.txt", max_rows=None, \ max_sentences_in_paragraph_loading=None, max_paragraphs=None): self.most_common_tokens = BookSentences.load_most_common_tokens( max_vocab_size=self.vocab_size) self.embeddings = GloveEmbeddings(glove_file, vocabulary=self.most_common_tokens) self.book_sentence_datasets = BookSentences.load_by_length(sentence_file = sentence_file, \ token_file = token_file, min_length = self.min_sentence_length, \ max_length = self.max_sentence_length, max_rows=max_rows, max_rarity=self.vocab_size) self.book_paragraph_dataset = BookParagraphs.load_from_file(sentence_file=sentence_file, \ max_sentences=max_sentences_in_paragraph_loading, max_paragraphs=max_paragraphs, \ min_sentence_length=self.min_sentence_length, max_sentence_length=self.max_sentence_length, \ min_paragraph_length=self.min_paragraph_length, max_paragraph_length=self.max_paragraph_length) def _get_sentence_data_loaders(self, batch_size, test_split_ratio=0.1): test_loaders = [] train_loaders = [] for i, dataset in enumerate(self.book_sentence_datasets): train, test = self._get_loaders(batch_size, dataset) train_loaders.append(train) test_loaders.append(test) return train_loaders, test_loaders def _get_paragraph_data_loader(self, batch_size, test_split_ratio=0.1): train_loader, test_loader = self._get_loaders( batch_size, self.book_paragraph_dataset) return train_loader, test_loader def _get_loaders(self, batch_size, dataset, test_split_ratio=0.1): test_split = int(test_split_ratio * len(dataset)) indices = list(range(len(dataset))) np.random.shuffle(indices) test_sampler = SubsetRandomSampler(indices[:test_split]) train_sampler = SubsetRandomSampler(indices[test_split:]) test_loader = DataLoader(dataset, sampler=test_sampler, batch_size=batch_size) train_loader = DataLoader(dataset, sampler=train_sampler, batch_size=batch_size) return train_loader, test_loader def _get_vae_history(self): if self.use_pretrained_weights and os.path.exists( self.vae_train_loss_path) and os.path.exists( self.vae_train_error_path): train_losses = np.load(self.vae_train_loss_path).tolist() train_error_rates = np.load(self.vae_train_error_path).tolist() else: train_losses = [[], [], []] train_error_rates = [] if self.use_pretrained_weights and os.path.exists( self.vae_test_loss_path) and os.path.exists( self.vae_test_error_path): test_losses = np.load(self.vae_test_loss_path).tolist() test_error_rates = np.load(self.vae_test_error_path).tolist() else: test_losses = [[], [], []] test_error_rates = [] return train_losses, test_losses, train_error_rates, test_error_rates def _get_guide_history(self): if self.use_pretrained_weights and os.path.exists( self.guide_train_loss_path) and os.path.exists( self.guide_train_error_path): train_losses = np.load(self.guide_train_loss_path).tolist() train_error_rates = np.load(self.guide_train_error_path).tolist() else: train_losses = [[], [], []] train_error_rates = [] if self.use_pretrained_weights and os.path.exists( self.guide_test_loss_path) and os.path.exists( self.guide_test_error_path): test_losses = np.load(self.guide_test_loss_path).tolist() test_error_rates = np.load(self.guide_test_error_path).tolist() else: test_losses = [[], [], []] test_error_rates = [] return train_losses, test_losses, train_error_rates, test_error_rates def train_guide(self, num_epochs=40, train_epoch_size=950, test_epoch_size=50, learning_rate=1e-5, batch_size=5): optimizer = torch.optim.Adam(self.guide.parameters(), lr=learning_rate) train_losses, test_losses, train_error_rates, test_error_rates = self._get_guide_history( ) test_split_ratio = test_epoch_size / float(train_epoch_size + test_epoch_size) train_loader, test_loader = self._get_paragraph_data_loader( 1, test_split_ratio) print('Train GUIDE') start_time = time.time() for e in range(num_epochs): print('Epoch {}'.format(e)) print('Train') train_loss, train_r_loss, train_kld_loss, train_error_rate = self._guide_epoch( train_loader, train_epoch_size * batch_size, batch_size, optimizer) train_losses[0] += train_loss train_losses[1] += train_r_loss train_losses[2] += train_kld_loss train_error_rates += train_error_rate torch.save(self.guide, self.guide_weights) np.save(self.guide_train_loss_path, np.array(train_losses)) np.save(self.guide_train_error_path, np.array(train_error_rates)) if test_epoch_size > 0: print('Test') test_loss, test_r_loss, test_kld_loss, test_error_rate = self._guide_epoch( test_loader, test_epoch_size * batch_size, batch_size, None) test_losses[0] += test_loss test_losses[1] += test_r_loss test_losses[2] += test_kld_loss test_error_rates += test_error_rate np.save(self.guide_test_loss_path, np.array(test_losses)) np.save(self.guide_test_error_path, np.array(test_error_rates)) print('Elapsed Time: {}\n'.format(time.time() - start_time)) def train_vae(self, num_epochs=70, train_epoch_size=4750, test_epoch_size=250, learning_rate=1e-5, batch_size=16): optimizer = torch.optim.Adam(itertools.chain( self.encoder.parameters(), self.decoder.parameters()), lr=learning_rate) train_losses, test_losses, train_error_rates, test_error_rates = self._get_vae_history( ) test_split_ratio = test_epoch_size / float(train_epoch_size + test_epoch_size) train_loaders, test_loaders = self._get_sentence_data_loaders( batch_size, test_split_ratio) train_lengths = np.array( [len(data_loader) for data_loader in train_loaders], dtype=np.float32) test_lengths = np.array( [len(data_loader) for data_loader in test_loaders], dtype=np.float32) print('Train VAE') start_time = time.time() for e in range(num_epochs): print('Epoch {}'.format(e)) print('Train') sentence_length_indices = np.random.multinomial(1, \ .9999 * train_lengths / float(np.sum(train_lengths)), size=(train_epoch_size)).argmax(axis=1) train_loss, train_r_loss, train_kld_loss, train_error_rate = self._vae_epoch( train_loaders, sentence_length_indices, batch_size, optimizer) train_losses[0] += train_loss train_losses[1] += train_r_loss train_losses[2] += train_kld_loss train_error_rates += train_error_rate torch.save(self.encoder, self.encoder_weights) torch.save(self.decoder, self.decoder_weights) np.save(self.vae_train_loss_path, np.array(train_losses)) np.save(self.vae_train_error_path, np.array(train_error_rates)) if test_epoch_size > 0: print('Test') sentence_length_indices = np.random.multinomial(1, \ .9999 * test_lengths / float(np.sum(test_lengths)), size=(test_epoch_size)).argmax(axis=1) test_loss, test_r_loss, test_kld_loss, test_error_rate = self._vae_epoch( test_loaders, sentence_length_indices, batch_size, None) test_losses[0] += test_loss test_losses[1] += test_r_loss test_losses[2] += test_kld_loss test_error_rates += test_error_rate np.save(self.vae_test_loss_path, np.array(test_losses)) np.save(self.vae_test_error_path, np.array(test_error_rates)) print('Elapsed Time: {}\n'.format(time.time() - start_time)) def _vae_epoch(self, loaders, sentence_length_indices, batch_size, optimizer=None): losses = [] reconstruction_losses = [] kld_losses = [] error_rates = [] for index in sentence_length_indices: loader = loaders[index] sequence = next(iter(loader)) sequence_of_embedded_batches = [ get_variable( torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence ] sequence_of_indexed_batches = [ get_variable( torch.LongTensor(self.embeddings.index_batch(batch))) for batch in sequence ] mu, logvar = self._encoder_forward(sequence_of_embedded_batches, batch_size) context = self._get_context(mu, logvar, batch_size) if optimizer is not None: logits, predictions = self._decoder_forward( context, batch_size, sequence_of_indexed_batches, len(sequence), self.drop_prob) else: logits, predictions = self._decoder_forward( context, batch_size, None, len(sequence), None) loss, reconstruction_loss, kld_loss = self.loss( logits, sequence_of_indexed_batches, mu, logvar, self.decoder.step_count) losses.append(loss.cpu().data.numpy()) reconstruction_losses.append( reconstruction_loss.cpu().data.numpy()) kld_losses.append(kld_loss.cpu().data.numpy()) error_rate = self.vae_error_rate(predictions, sequence_of_indexed_batches) error_rates.append(error_rate.cpu().data.numpy()) if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() self.encoder.increment_step(batch_size=batch_size) self.decoder.increment_step(batch_size=batch_size) print('Mean Loss: {}'.format(np.mean(losses))) print('Mean Error Rate: {}'.format(np.mean(error_rates))) return losses, reconstruction_losses, kld_losses, error_rates def _guide_epoch(self, loader, num_iterations, batch_size, optimizer=None): losses = [] reconstruction_losses = [] kld_losses = [] error_rates = [] tmp_losses = [] tmp_reconstruction_losses = [] tmp_kld_losses = [] tmp_error_rates = [] for index in range(num_iterations): sequences = next(iter(loader)) sequence = sequences[0] sequence_of_embedded_batches = [ get_variable( torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence ] mu, logvar = self._encoder_forward(sequence_of_embedded_batches, 1) h_tm1 = get_variable( torch.zeros(self.num_layers, 1, self.guide_hidden_dimension)) for sequence_i in range(1, len(sequences)): h_t = self.guide(torch.cat([mu, logvar], dim=1), h_tm1) mu, logvar = h_t[-1].split(self.decoder_hidden_dimension, dim=1) context = self._get_context(mu, logvar, 1) sequence = sequences[sequence_i] sequence_of_embedded_batches = [ get_variable( torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence ] sequence_of_indexed_batches = [ get_variable( torch.LongTensor(self.embeddings.index_batch(batch))) for batch in sequence ] if optimizer is not None: logits, predictions = self._decoder_forward( context, 1, sequence_of_indexed_batches, len(sequence), self.drop_prob) else: logits, predictions = self._decoder_forward( context, 1, None, len(sequence), None) loss, reconstruction_loss, kld_loss = self.loss( logits, sequence_of_indexed_batches, mu, logvar, self.decoder.step_count) tmp_losses.append(loss) tmp_reconstruction_losses.append(reconstruction_loss) tmp_kld_losses.append(kld_loss) error_rate = self.vae_error_rate(predictions, sequence_of_indexed_batches) tmp_error_rates.append(error_rate) mu, logvar = self._encoder_forward( sequence_of_embedded_batches, 1) h_tm1 = h_t if (index + 1) % batch_size == 0: loss = torch.cat(tmp_losses).mean() reconstruction_loss = torch.cat( tmp_reconstruction_losses).mean() kld_loss = torch.cat(tmp_kld_losses).mean() error_rate = torch.cat(tmp_error_rates).mean() tmp_losses = [] tmp_reconstruction_losses = [] tmp_kld_losses = [] tmp_error_rates = [] losses.append(loss.cpu().data.numpy()) reconstruction_losses.append( reconstruction_loss.cpu().data.numpy()) kld_losses.append(kld_loss.cpu().data.numpy()) error_rates.append(error_rate.cpu().data.numpy()) if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() print('Mean Loss: {}'.format(np.mean(losses))) print('Mean Error Rate: {}'.format(np.mean(error_rates))) return losses, reconstruction_losses, kld_losses, error_rates def _encoder_forward(self, sequence_of_embedded_batches, batch_size): return self.encoder(sequence_of_embedded_batches, batch_size) def _get_context(self, mu, logvar, batch_size): z = get_variable(torch.randn(batch_size, self.decoder_hidden_dimension)) std = torch.exp(0.5 * logvar) context = z * std + mu return context def _decoder_forward(self, context, batch_size, targets=None, sequence_length=None, drop_prob=None): logits, predictions = self.decoder(context, self.embeddings, targets, \ sequence_length, drop_prob, batch_size) return logits, predictions def generate_sentence(self, batch_size=16): context = get_variable( torch.randn(batch_size, self.decoder_hidden_dimension)) logits, predictions = self.decoder(context, self.embeddings, batch_size=batch_size) return self._format_sentences( self._to_sentences(predictions, batch_size)) def interpolate(self, start_sentence, end_sentence, steps=5): start_split_sentence = start_sentence.split(" ") start_sequence_of_batches = [[word] for word in start_split_sentence] start_sequence_of_embedded_batches = [ get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in start_sequence_of_batches ] start_mu, start_logvar = self._encoder_forward( start_sequence_of_embedded_batches, 1) end_split_sentence = end_sentence.split(" ") end_sequence_of_batches = [[word] for word in end_split_sentence] end_sequence_of_embedded_batches = [ get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in end_sequence_of_batches ] end_mu, end_logvar = self._encoder_forward( end_sequence_of_embedded_batches, 1) step_size = (end_mu - start_mu) / float(steps) sentences = [start_sentence] for i in range(steps - 1): logits, predictions = self.decoder(start_mu + i * step_size, self.embeddings, batch_size=1) sentences.extend(self._to_sentences(predictions, 1)) sentences.append(end_sentence) return self._format_sentences(sentences) def reconstruct(self, sentence): split_sentence = sentence.split(" ") sequence_of_batches = [[word] for word in split_sentence] sequence_of_embedded_batches = [ get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence_of_batches ] mu, logvar = self._encoder_forward(sequence_of_embedded_batches, 1) contexts = self._get_context(mu, logvar, 3) logits, mean_predictions = self._decoder_forward(mu, 1) logits, sample_predictions = self._decoder_forward(contexts, 3) return (self._format_sentences([sentence]), self._format_sentences(self._to_sentences(mean_predictions, 1)), self._format_sentences( self._to_sentences(sample_predictions, 3))) def _to_sentences(self, predictions, batch_size): sentences = [[] for i in range(batch_size)] for batch in predictions: np_batch = batch.cpu().data.numpy().reshape(-1) for i in range(len(np_batch)): sentences[i].append(self.embeddings.get_word(np_batch[i])) sentences = [ sentence[:-1] if sentence[-2] in set(['!', '?']) else sentence for sentence in sentences ] sentences = [ ' '.join(sentence).split('.', 1)[0] + '.' for sentence in sentences ] return sentences def _format_sentences(self, sentences): sentences = [ re.sub(r' (\.)*(?P<capture>([a-z]*\'[a-z]+)|[,;:\.\\?\!"]|(\'\'))', r'\g<capture>', sentence.replace('`` ', '``')) for sentence in sentences ] return sentences
def main(): train_names = [] train_labels = [] # Read all the file names and the labels of the speakers for the training set with open('train.txt', 'r') as file: for line in file: speaker = line.split('-')[0] speech = line.split('-')[1] file_path = os.path.join('./LibriSpeech/dev-clean/', speaker, speech, line.split('\n')[0]) train_names.append(file_path) train_labels.append(speaker) file.close() test_names = [] test_labels = [] # Read all the file names and the labels of the speakers for the testing set with open('test.txt', 'r') as file: for line in file: speaker = line.split('-')[0] speech = line.split('-')[1] file_path = os.path.join('./LibriSpeech/dev-clean/', speaker, speech, line.split('\n')[0]) test_names.append(file_path) test_labels.append(speaker) file.close() # The following lines are used for encoding our speakers into one-hot encodings. # One-hot is useful for representation as we do not have a large number of speakers: 40. label_encoder = LabelEncoder() train_data_labels = label_encoder.fit_transform(train_labels) n_classes = len(np.unique(train_data_labels)) print('Number of Train classes', len(np.unique(train_data_labels))) binarize = LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False) train_data_labels = binarize.fit_transform(train_data_labels) label_encoder = LabelEncoder() test_data_labels = label_encoder.fit_transform(test_labels) n_classes = len(np.unique(test_data_labels)) print('Number of Test classes', len(np.unique(test_data_labels))) binarize = LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False) test_data_labels = binarize.fit_transform(test_data_labels) # Loading the data for training and testing train, train_labels = load_data_truncate(train_names, train_data_labels) val, val_labels = load_data_truncate(test_names, test_data_labels) # Preparing the data for the DataLoader so that it can be used in batches train = np.array(train).astype(np.float32) val = np.array(val).astype(np.float32) train_labels = np.array(train_labels).astype(np.float32) val_labels = np.array(val_labels).astype(np.float32) train_load = [] for i in range(0, len(train)): train_load.append((train[i], train_labels[i])) val_load = [] for i in range(0, len(val)): val_load.append((val[i], val_labels[i])) # Data Loader for the train set. Batch Size of 4, shuffled # and dropping the samples which do not fit the batch size. train_dataset = DataLoader(train_load, batch_size=4, shuffle=True, drop_last=True) # Data Loader for the test set. val_dataset = DataLoader(val_load) # Initialize the RNN. model = RNN(input_size=100, output_size=n_classes, hidden_dim=256, n_layers=1) # Specifying the hyperparameters for training n_epochs = 100 lr = 0.00001 # Define Loss, Optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Training part train_accuracy = [] test_accuracy = [] train_loss = [] test_loss = [] for epoch in range(0, n_epochs): model.train() optimizer.zero_grad() # Clears existing gradients from previous epoch epoch_loss = [] # Store the losses for all batches of an epoch correct_predictions = 0 total_predictions = 0 # Iterate through data loader for i, (x, y) in enumerate(train_dataset): # Reshaping for training x = Variable(x.view(-1, 20, 100)) y = Variable(y) output, _ = model(x) # Obtain predictions target = torch.argmax(y, dim=1) loss = criterion(output, target) loss.backward() # Does backpropagation and calculates gradients optimizer.step() # Updates the weights accordingly epoch_loss.append(loss.item()) # Compute number of correct predictions and total number of predictions correct, predicted = compute_accuracy(output, target) correct_predictions += correct total_predictions += predicted # Every 10th epoch present statistics if epoch % 10 == 0: print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ') print("Loss: {:.4f}, Accuracy: {}/{}".format( np.average(epoch_loss), correct_predictions.item(), total_predictions)) train_accuracy.append(correct_predictions.item() / total_predictions) train_loss.append(np.average(epoch_loss)) # Evaluate the model on the test set model.eval() correct_predictions = 0 total_predictions = 0 epoch_val_loss = [] for i, (x, y) in enumerate(val_dataset): x = Variable(x.view(-1, 20, 100)) y = Variable(y) output, _ = model(x) target = torch.argmax(y, dim=1) loss = criterion(output, target) epoch_val_loss.append(loss.item()) correct, predicted = compute_accuracy(output, target) correct_predictions += correct total_predictions += predicted print("Eval Accuracy: {}/{}".format(correct_predictions.item(), total_predictions)) test_accuracy.append(correct_predictions.item() / total_predictions) test_loss.append(np.average(epoch_val_loss)) model.eval() correct_predictions = 0 total_predictions = 0 preds = [] targets = [] for i, (x, y) in enumerate(val_dataset): x = Variable(x.view(-1, 20, 100)) y = Variable(y) output, _ = model(x) target = torch.argmax(y, dim=1) correct, predicted = compute_accuracy(output, target) preds.append(output) targets.append(target) correct_predictions += correct total_predictions += predicted print("Final Eval Accuracy: {}/{}".format(correct_predictions.item(), total_predictions)) with open('accuracy.pickle', 'wb') as f: pickle.dump(train_accuracy, f) pickle.dump(test_accuracy, f) f.close() with open('loss.pickle', 'wb') as f: pickle.dump(train_loss, f) pickle.dump(test_loss, f) f.close() with open('preds.pickle', 'wb') as f: pickle.dump(preds, f) pickle.dump(targets, f) f.close()
exit(1) input_length = len(input_text) net = RNN(input_size=ALPHABET_SIZE, hidden_size=HIDDEN_SIZE, output_size=ALPHABET_SIZE) if MODEL_SAVE_PATH.exists(): print("Loading trained model from file") net.load_state_dict(torch.load(MODEL_SAVE_PATH)) net.train() optimizer = optim.RMSprop(net.parameters()) hidden_state = torch.zeros(BATCH_SIZE, HIDDEN_SIZE) total_loss = 0.0 print("Starting to train char RNN") i = 0 last_print = 0 while i < input_length: if i + BATCH_SIZE >= input_length: # TODO: pad last batch to `BATCH_SIZE` break start, end = i, i + BATCH_SIZE chars = input_text[start:end]
def main(): start_time = time.time() utils.init_out_dir() last_epoch = utils.get_last_checkpoint_step() if last_epoch >= args.epoch: exit() if last_epoch >= 0: my_log('\nCheckpoint found: {}\n'.format(last_epoch)) else: utils.clear_log() utils.print_args() model = RNN(args.device, Number_qubits = args.N,charset_length = args.charset_length,\ hidden_size = args.hidden_size, num_layers = args.num_layers) data = prepare_data(args.N, './data.txt') ghz = GHZ(Number_qubits=args.N) model.train(True) my_log('Total nparams: {}'.format(utils.get_nparams(model))) model.to(args.device) params = [x for x in model.parameters() if x.requires_grad] optimizer = torch.optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) if last_epoch >= 0: utils.load_checkpoint(last_epoch, model, optimizer) init_time = time.time() - start_time my_log('init_time = {:.3f}'.format(init_time)) my_log('Training...') start_time = time.time() best_fid = 0 trigger = 0 # once current fid is less than best fid, trigger+=1 for epoch_idx in range(last_epoch + 1, args.epoch + 1): for batch_idx in range(int(args.Ns / args.batch_size)): optimizer.zero_grad() # idx = np.random.randint(low=0,high=int(args.Ns-1),size=(args.batch_size,)) idx = np.arange(args.batch_size) + batch_idx * args.batch_size train_data = data[idx] loss = -model.log_prob( torch.from_numpy(train_data).to(args.device)).mean() loss.backward() if args.clip_grad: clip_grad_norm_(params, args.clip_grad) optimizer.step() print('epoch_idx {} current loss {:.8g}'.format( epoch_idx, loss.item())) print('Evaluating...') # Evaluation current_fid = classical_fidelity(model, ghz, print_prob=False) if current_fid > best_fid: trigger = 0 # reset my_log('epoch_idx {} loss {:.8g} fid {} time {:.3f}'.format( epoch_idx, loss.item(), current_fid, time.time() - start_time)) best_fid = current_fid if (args.out_filename and args.save_epoch and epoch_idx % args.save_epoch == 0): state = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save( state, '{}_save/{}.state'.format(args.out_filename, epoch_idx)) else: trigger = trigger + 1 if trigger > 4: break
ix_to_char = {i: ch for i, ch in enumerate(sorted(chars))} np.random.seed(config['random_seed']) np.random.shuffle(names) data_size, vocab_size = len(names), len(char_to_ix) print( 'There are %d of training examples and %d unique characters in your data.' % (data_size, vocab_size)) model = RNN(input_size=vocab_size, hidden_size=config['hidden_size'], output_size=vocab_size, n_layers=config['rnn_layers'], dropout=config['dropout']) optimizer = optim.Adam(model.parameters(), config['learning_rate']) criterion = nn.CrossEntropyLoss() # print_every = 10 all_losses = [] total_loss = 0 saves = 0 start = time.time() for i in range(config['epochs']): for training_example in names: model.zero_grad() model.hidden = model.init_hidden() training_example_tensor = to_tensor(training_example, char_to_ix) input = training_example_tensor[:-1]
def train(data_path, train_path, val_path, test_path, hidden_size, num_classes, num_layers, num_dir, batch_size, emb_dim, dropout, net_type, embfix): print('Training...') # define fields TEXT = data.Field(lower=True, init_token="<start>", eos_token="<end>") LABEL = data.Field(sequential=False, unk_token=None) # build dataset splits train, val, test = data.TabularDataset.splits(path=data_path, train=train_path, validation=val_path, test=test_path, format='tsv', fields=[('text', TEXT), ('label', LABEL)]) # build vocabs TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=emb_dim), min_freq=2) prevecs = TEXT.vocab.vectors #TEXT.build_vocab(train, min_freq=3) LABEL.build_vocab(train) # build iterators train_iter = data.BucketIterator(train, batch_size=batch_size, sort_key=lambda x: len(x.text), train=True) val_iter = data.Iterator(val, batch_size=batch_size, repeat=False, train=False, sort=False, shuffle=False) test_iter = data.Iterator(test, batch_size=batch_size, repeat=False, train=False, sort=False, shuffle=False) # print info print(max(LABEL.vocab.freqs.values())) print('num_classes: ', len(LABEL.vocab)) print('input_size: ', len(TEXT.vocab)) print('majority class acc:', max(LABEL.vocab.freqs.values()) / len(train)) print('random guess acc:', (max(LABEL.vocab.freqs.values()) / len(train))**2 + (min(LABEL.vocab.freqs.values()) / len(train))**2) num_classes = len(LABEL.vocab) input_size = len(TEXT.vocab) model = RNN(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes, prevecs=prevecs, num_layers=num_layers, num_dir=num_dir, batch_size=batch_size, emb_dim=emb_dim, embfix=embfix, dropout=dropout, net_type=net_type) epochs = 100 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adamax(model.parameters()) #optimizer = torch.optim.SGD(model.parameters(),lr=0.1, momentum=0.5) if int(torch.cuda.is_available()) == 1: model = model.cuda() # train model.train() best_val_acc = 0 for e in range(epochs): print('Epoch:', e) tot_loss = 0 corrects = 0 train_iter.repeat = False for batch_count, batch in enumerate(train_iter): #print('Batch:', batch_count) #print(batch.text) #print(batch.label) model.zero_grad() inp = batch.text.t() preds = model(inp) target = batch.label #print(preds, batch.label) loss = criterion(preds, batch.label) loss.backward() optimizer.step() _, preds = torch.max(preds, 1) corrects += int(preds.data.eq(target.data).sum()) tot_loss += loss.data[0] print('acc (train):', 100 * corrects / len(train_iter.dataset)) print('loss (train):', tot_loss) val_acc, _, val_loss = evaluate(val_iter, model, TEXT, LABEL) print('acc (val):', val_acc) print('loss (val):', val_loss) if val_acc > best_val_acc: test_acc, test_preds, test_loss = evaluate(test_iter, model, TEXT, LABEL) #print('Test acc:', test_acc) f = open('./preds/preds_' + str(e) + '.txt', 'w') for x in test_preds: f.write(str(int(x)) + '\n') f.close() torch.save(model.state_dict(), './models/e' + str(e) + '_' + str(val_acc) + '.pt')
params['batch_size'] = 1 vocab = get_vocabulary(data_source, params) print('Vocabulary loaded.') train_data_loader = get_data_loader(vocab, params, 'train') print("Training data loaded.") test_data_loader = get_data_loader(vocab, params, 'test') print("Testing data loaded.") cnn = ResNet(params['resnet_version'], params['embedding_length']) rnn = RNN(params['embedding_length'], params['num_hidden_units'], len(vocab), params['num_layers']) loss_fn = nn.CrossEntropyLoss() trainable_params = list(rnn.parameters()) + list( cnn.linear_secondlast_layer.parameters()) + list( cnn.last_layer.parameters()) if params['optimizer_type'] == 'SGD': optimizer = torch.optim.SGD(trainable_params, lr=params['lr'], momentum=params['momentum']) elif params['optimizer_type'] == 'Adam': optimizer = torch.optim.Adam(trainable_params, lr=params['lr']) else: raise ValueError('Please specify a valid optimizer. %s is invalid.' % (params['optimizer_type'])) if params['device'] == 'cpu': cnn.cpu() rnn.cpu()
datapath = 'data/names/' train_data, all_categories = get_language_data(datapath) n_categories = len(all_categories) print( f'There are {n_categories} languages.\nNumber of family name per language:' ) for categ in train_data.keys(): print(' {}\t {}'.format(categ, len(train_data[categ]))) ### create model n_hidden = 128 model_net = RNN(n_letters, n_hidden, n_categories) #### training optimizer = torch.optim.Adam(model_net.parameters(), lr=0.001) criterion = torch.nn.NLLLoss() learning_rate = 0.005 def train(category_tensor, line_tensor): hidden = model_net.initHidden() model_net.zero_grad() for i in range(line_tensor.size()[0]): output, hidden = model_net(line_tensor[i], hidden) loss = criterion(output, category_tensor) optimizer.zero_grad() loss.backward()
#models_to_test = ['all_pos_{}.ckpt'.format(x) for x in range(160)] models_to_test = [ 'all_pos_lanthi_neg_{}.ckpt'.format(x) for x in range(99, 100) ] for model_name in models_to_test: n_epochs = 30 print_every = 100 frac_train = 0.95 n_hidden = 512 learning_rate = 0.001 model = RNN(20, n_hidden, 2) criterion = nn.NLLLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.8, nesterov=True) if os.path.isfile(model_name): checkpoint = torch.load(model_name) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) print("=> loaded {}".format(model_name)) with open(logfile_name, 'a') as outfile: outfile.write("=> loaded {}\n".format(model_name)) correct_word = 0 total_word = 0 correct_not_word = 0 total_not_word = 0 total_val_loss = 0
# Transfer to Pytorch Variable X_train_dep_std = Variable(torch.from_numpy(X_train_dep_std).float()) y_train_dep_std = Variable(torch.from_numpy(y_train_dep_std).float()) X_test_dep_std = Variable(torch.from_numpy(X_test_dep_std).float()) # Define rnn model # You can also choose rnn_type as 'rnn' or 'gru' model = RNN(input_size=5, hidden_size=40, num_layers=1, class_size=1, dropout=0.5, rnn_type='lstm') # Define optimization function optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # optimize all rnn parameters # Define loss function loss_func = nn.MSELoss() # Start training for iter in range(20000 + 1): model.train() prediction = model(X_train_dep_std) loss = loss_func(prediction, y_train_dep_std) optimizer.zero_grad() # clear gradients for this training step loss.backward() # back propagation, compute gradients optimizer.step() if iter % 100 == 0: print("iteration: %s, loss: %s" % (iter, loss.item()))