def test_model(): vocab, embeddings = data_helper.load_embeddings(config.get('data', 'embedding_file')) model = RNNModel(embeddings, num_classes=5) model.load(config.get('data', 'model_dir')) test_data = data_helper.load_data(os.path.join(config.get('data', 'treebank_dir'), 'test.txt')) numeric_test_samples = data_helper.convert_to_numeric_samples(test_data, vocab, num_classes=5) model.eval(numeric_test_samples)
def export_onnx(path, batch_size, seq_len): print('The model is also exported in ONNX format at {}'.format( os.path.realpath(args.onnx_export))) model.eval() dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view( -1, batch_size).to(device) hidden = model.init_hidden(batch_size) torch.onnx.export(model, (dummy_input, hidden), path)
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() hidden = repackage_hidden(hidden) return total_loss / (len(data_source) - 1)
def train_model(): vocab, embeddings = data_helper.load_embeddings(config.get('data', 'embedding_file')) train_data = data_helper.load_data(os.path.join(config.get('data', 'treebank_dir'), 'train.txt')) numeric_train_samples = data_helper.convert_to_numeric_samples(train_data, vocab, num_classes=5) model = RNNModel(embeddings, num_classes=5, model_config=config['model']) dev_data = data_helper.load_data(os.path.join(config.get('data', 'treebank_dir'), 'dev.txt')) numeric_dev_samples = data_helper.convert_to_numeric_samples(dev_data, vocab, num_classes=5) eval_func = lambda: model.eval(numeric_dev_samples) model.train(numeric_train_samples, eval_func) model.save(config.get('data', 'model_dir'))
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-l', '--load_path', default=None) args = parser.parse_args() return args cl_args = parse_args() dataset = Corpus() dataset.process_data() sos = dataset.target_dict.word2idx['<sos>'] eos = dataset.target_dict.word2idx['<eos>'] args = np.load(os.path.join(cl_args.load_path, 'args.npy')).tolist() model = RNNModel(args).cuda() model.eval() if cl_args.load_path: file = os.path.join(cl_args.load_path, 'model.pt') model.load_state_dict(torch.load(file)) itr = dataset.create_epoch_iterator('test', 1) for i in xrange(50): source, target = itr.next() output = model.sample(source, sos, eos) print "Source: ", ''.join([ dataset.source_dict.idx2word[x] for x in source.cpu().data.numpy()[:, 0] ]) print "Original: ", ''.join([
# target: term0, term1 lm_input, lens, lm_output = batch predictions, _, lens = model(cudalize(lm_input), cudalize(lens)) loss = loss_function(predictions, cudalize(lm_output), lens) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() avg_loss = total_loss / (batch_num + 1) ppl = math.exp(avg_loss) print(f'\rLM Training: epoch:{epoch} train batch:{batch_num} ' + f'loss:{avg_loss} ppl:{ppl}', end='') print() valid_loss = 0 model = model.eval() for batch_num, batch in enumerate(valid_loader): lm_input, lens, lm_output = batch predictions, _, lens = model(cudalize(lm_input), cudalize(lens)) loss = loss_function(predictions, cudalize(lm_output), lens) valid_loss += loss.item() valid_avg_loss = valid_loss / (batch_num + 1) valid_ppl = math.exp(valid_avg_loss) print(f'\rvalid batch:{batch_num} loss: {valid_avg_loss:.4f} ' + f'ppl:{valid_ppl:.4f}', end='') print() if valid_loss / (batch_num + 1) < min_loss: min_loss = valid_loss / (batch_num + 1) earlystop_count = 0 torch.save(model.state_dict(), './model.pt')
if USE_HOLIDAY == 'no_use': X_train[:, :, [1]] = 0 # X_val[:, :, [1]] = 0 X_test[:, :, [1]] = 0 model = RNNModel( lookback=LOOKBACK, lookahead=LOOKAHEAD, input_dim=2, hid_dim=40, device='cuda', data_dir=DATA_DIR, task=USE_HOLIDAY ) model.fit( X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test, metric='mae', max_epoch=MAX_EPOCH, patience=3000000, batch_size=128, lr=1e-3, weight_decay=1e-3 ) forecast = model.eval(X_test=X_test) elif USE_HOLIDAY == 'feature': model = RNNModel( lookback=LOOKBACK, lookahead=LOOKAHEAD, input_dim=2, hid_dim=40, device='cuda', data_dir=DATA_DIR, task=USE_HOLIDAY ) model.fit( X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test, metric='mae', max_epoch=MAX_EPOCH, patience=3000000, batch_size=128, lr=1e-3, weight_decay=1e-3 ) forecast = model.eval(X_test=X_test)
class DartsTrainer(): def __init__(self, arm): # Default params for eval network args = { 'emsize': 850, 'nhid': 850, 'nhidlast': 850, 'dropoute': 0.1, 'wdecay': 8e-7 } args['data'] = '/home/liamli4465/darts/data/penn' args['lr'] = 20 args['clip'] = 0.25 args['batch_size'] = 64 args['search_batch_size'] = 256 * 4 args['small_batch_size'] = 64 args['bptt'] = 35 args['dropout'] = 0.75 args['dropouth'] = 0.25 args['dropoutx'] = 0.75 args['dropouti'] = 0.2 args['seed'] = arm['seed'] args['nonmono'] = 5 args['log_interval'] = 50 args['save'] = arm['dir'] args['alpha'] = 0 args['beta'] = 1e-3 args['max_seq_length_delta'] = 20 args['unrolled'] = True args['gpu'] = 0 args['cuda'] = True args['genotype'] = arm['genotype'] args = AttrDict(args) self.args = args self.epoch = 0 np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True torch.cuda.manual_seed_all(args.seed) corpus = data.Corpus(args.data) self.corpus = corpus self.eval_batch_size = 10 self.test_batch_size = 1 self.train_data = batchify(corpus.train, args.batch_size, args) self.search_data = batchify(corpus.valid, args.search_batch_size, args) self.val_data = batchify(corpus.valid, self.eval_batch_size, args) self.test_data = batchify(corpus.test, self.test_batch_size, args) self.ntokens = len(corpus.dictionary) def model_save(self, fn, to_save): if self.epoch % 150 == 0: with open( os.path.join(self.args.save, "checkpoint-incumbent-%d" % self.epoch), 'wb') as f: torch.save(to_save, f) with open(fn, 'wb') as f: torch.save(to_save, f) def model_load(self, fn): with open(fn, 'rb') as f: self.model, self.optimizer, rng_state, cuda_state = torch.load(f) torch.set_rng_state(rng_state) torch.cuda.set_rng_state(cuda_state) def model_resume(self, filename): logging.info('Resuming model from %s' % filename) self.model_load(filename) self.optimizer.param_groups[0]['lr'] = self.args.lr for rnn in self.model.rnns: rnn.genotype = self.args.genotype def train_epochs(self, epochs): args = self.args resume_filename = os.path.join(self.args.save, "checkpoint.incumbent") if os.path.exists(resume_filename): self.model_resume(resume_filename) logging.info('Loaded model from checkpoint') else: self.model = RNNModel(self.ntokens, args.emsize, args.nhid, args.nhidlast, args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute, genotype=args.genotype) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.lr, weight_decay=args.wdecay) size = 0 for p in self.model.parameters(): size += p.nelement() logging.info('param size: {}'.format(size)) logging.info('initial genotype:') logging.info(self.model.rnns[0].genotype) total_params = sum(x.data.nelement() for x in self.model.parameters()) logging.info('Args: {}'.format(args)) logging.info('Model total parameters: {}'.format(total_params)) self.model = self.model.cuda() # Loop over epochs. lr = args.lr best_val_loss = [] stored_loss = 100000000 # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(epochs): epoch_start_time = time.time() self.train() if 't0' in self.optimizer.param_groups[0]: tmp = {} for prm in self.model.parameters(): tmp[prm] = prm.data.clone() prm.data = self.optimizer.state[prm]['ax'].clone() val_loss2 = self.evaluate(self.val_data) logging.info('-' * 89) logging.info( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f} | valid bpc {:8.3f}'.format( self.epoch, (time.time() - epoch_start_time), val_loss2, math.exp(val_loss2), val_loss2 / math.log(2))) logging.info('-' * 89) if val_loss2 < stored_loss: self.model_save( os.path.join(args.save, 'checkpoint.incumbent'), [ self.model, self.optimizer, torch.get_rng_state(), torch.cuda.get_rng_state() ]) logging.info('Saving Averaged!') stored_loss = val_loss2 for prm in self.model.parameters(): prm.data = tmp[prm].clone() else: val_loss = self.evaluate(self.val_data, self.eval_batch_size) logging.info('-' * 89) logging.info( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f} | valid bpc {:8.3f}'.format( self.epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss), val_loss / math.log(2))) logging.info('-' * 89) if val_loss < stored_loss: self.model_save( os.path.join(args.save, 'checkpoint.incumbent'), [ self.model, self.optimizer, torch.get_rng_state(), torch.cuda.get_rng_state() ]) logging.info('Saving model (new best validation)') stored_loss = val_loss if (self.epoch > 75 and 't0' not in self.optimizer.param_groups[0] and (len(best_val_loss) > args.nonmono and val_loss > min(best_val_loss[:-args.nonmono]))): logging.info('Switching to ASGD') self.optimizer = torch.optim.ASGD( self.model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay) best_val_loss.append(val_loss) except Exception as e: logging.info('-' * 89) logging.info(e) logging.info('Exiting from training early') return 0, 10000, 10000 # Load the best saved model. self.model_load(os.path.join(args.save, 'checkpoint.incumbent')) # Run on test data. val_loss = self.evaluate(self.val_data, self.eval_batch_size) logging.info(math.exp(val_loss)) test_loss = self.evaluate(self.test_data, self.test_batch_size) logging.info('=' * 89) logging.info( '| End of training | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}' .format(test_loss, math.exp(test_loss), test_loss / math.log(2))) logging.info('=' * 89) return 0, math.exp(val_loss), math.exp(test_loss) def train(self): args = self.args corpus = self.corpus total_loss = 0 start_time = time.time() hidden = [ self.model.init_hidden(args.small_batch_size) for _ in range(args.batch_size // args.small_batch_size) ] batch, i = 0, 0 while i < self.train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + args.max_seq_length_delta) lr2 = self.optimizer.param_groups[0]['lr'] self.optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt self.model.train() data, targets = get_batch(self.train_data, i, args, seq_len=seq_len) self.optimizer.zero_grad() start, end, s_id = 0, args.small_batch_size, 0 while start < args.batch_size: cur_data, cur_targets = data[:, start: end], targets[:, start: end].contiguous( ).view(-1) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden[s_id] = repackage_hidden(hidden[s_id]) log_prob, hidden[s_id], rnn_hs, dropped_rnn_hs = self.model( cur_data, hidden[s_id], return_h=True) raw_loss = nn.functional.nll_loss( log_prob.view(-1, log_prob.size(2)), cur_targets) loss = raw_loss # Activiation Regularization if args.alpha > 0: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss *= args.small_batch_size / args.batch_size total_loss += raw_loss.data * args.small_batch_size / args.batch_size loss.backward() s_id += 1 start = end end = start + args.small_batch_size gc.collect() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. torch.nn.utils.clip_grad_norm(self.model.parameters(), args.clip) self.optimizer.step() # total_loss += raw_loss.data self.optimizer.param_groups[0]['lr'] = lr2 if np.isnan(total_loss[0]): raise #if batch % args.log_interval == 0 and batch > 0: # cur_loss = total_loss[0] / args.log_interval # elapsed = time.time() - start_time # logging.info('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' # 'loss {:5.2f} | ppl {:8.2f}'.format( # self.epoch, batch, len(self.train_data) // args.bptt, self.optimizer.param_groups[0]['lr'], # elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) # total_loss = 0 # start_time = time.time() batch += 1 i += seq_len self.epoch += 1 def evaluate(self, data_source, batch_size=10): # Turn on evaluation mode which disables dropout. self.model.eval() total_loss = 0 hidden = self.model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, self.args.bptt): data, targets = get_batch(data_source, i, self.args, evaluation=True) targets = targets.view(-1) log_prob, hidden = self.model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def train(): best_val_loss = 100 ntokens = len(corpus.dictionary) train_data = batchify(corpus.train, args.batch_size) # num_batches, batch_size val_data = batchify(corpus.valid, args.batch_size) model = RNNModel(rnn_type=args.model, ntoken=ntokens, ninp=args.emsize, nfeat=args.nfeat, nhid=args.nhid, nlayers=args.nlayers, font_path=args.font_path, font_size=args.font_size, dropout=args.dropout, tie_weights=args.tied, ).to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) print('start training...') hidden = model.init_hidden(args.batch_size) epoch_start_time = time.time() for epoch in range(args.epochs): model.eval() # 在validation上测试 total_loss = 0. with torch.no_grad(): for idx in range(0, val_data.size(0) - 1, args.bptt): data, targets = get_batch(val_data, idx) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) # (seq_len, batch, ntokens) -> (seq_len*batch, ntokens) total_loss += len(data) * criterion(output_flat, targets.view(-1)).item() hidden = repackage_hidden(hidden) val_loss = total_loss / len(val_data) best_val_loss = min(best_val_loss, val_loss) print('-' * 100) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f} | best valid ppl {:8.2f}' .format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss), math.exp(best_val_loss))) print('-' * 100) epoch_start_time = time.time() if val_loss == best_val_loss: # Save the model if the validation loss is best so far. torch.save(model, os.path.join(args.save, 'model.pkl')) else: args.lr /= 4.0 model.train() # 在training set上训练 total_loss = 0. start_time = time.time() for i, idx in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, idx) hidden = repackage_hidden(hidden) model.zero_grad() # 求loss和梯度 output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets.view(-1)) loss.backward() total_loss += loss.item() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) # 用梯度更新参数 optimizer.step() # for p in model.parameters(): # p.data.add_(-args.lr, p.grad.data) if i % args.log_interval == 0 and i > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} |loss {:5.2f} | ppl {:8.2f}' .format(epoch + 1, i, len(train_data) // args.bptt, args.lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def decompress(self, compressedfile): start = time.time() filename_split = compressedfile.split('_') checkpoint = torch.load(compressedfile, map_location=self.device) body = checkpoint['bytes'] dictionary = Dictionary() dictionary.word2idx = checkpoint['word2idx'] dictionary.idx2word = checkpoint['idx2word'] context_map = Context(dictionary) ntokens = len(dictionary) model = RNNModel('LSTM', ntokens, 200, 200, 2, dropout=0.2, tie_weights=False) model.load_state_dict(checkpoint['model_state_dict']) model.to(self.device) model.eval() bit_string = '' join_body = list(body) for i in join_body: bit_string += "{0:08b}".format(i) encoded_text = self.remove_padding(bit_string) # decompress start here current_code = '' decoded_text = '' # we define an initial context # then we predict the initial huffman tree # read bits until we get to a leaf # convert the leaf to a char and add it to decompressed text # update the context and repeat the process context = ['<s>'] * 10 def tree_from_context(context): huffman = HuffmanCoding() prob = huffman.make_context_frequency_dict( context, model, context_map, self.device, threshold=self.args.threshold) huffman.make_heap_node(prob) huffman.merge_nodes() huffman.encode() huffman.reverse_mapping = {v: k for k, v in huffman.codes.items()} return huffman huffman = tree_from_context(context) fixed_huffman = HuffmanCoding() counts = checkpoint['fixed_huffman_counts'] fixed_huffman.make_heap_node(counts) fixed_huffman.merge_nodes() fixed_huffman.encode() fixed_huffman.reverse_mapping = { v: k for k, v in fixed_huffman.codes.items() } flag = None for bit in encoded_text: if flag == '0': current_code += bit if current_code in huffman.reverse_mapping: next_char = huffman.reverse_mapping[current_code] decoded_text += next_char current_code = '' context = context[1:] + [next_char] huffman = tree_from_context(context) flag = None continue elif flag == '1': current_code += bit if current_code in fixed_huffman.reverse_mapping: next_char = fixed_huffman.reverse_mapping[current_code] decoded_text += next_char current_code = '' context = context[1:] + [next_char] huffman = tree_from_context(context) flag = None continue else: flag = bit # write decompressed file with open(filename_split[0] + "_decompressed.txt", 'w') as f: f.writelines(decoded_text) print('Decompression Done!') end = time.time() print(round((end - start), 3), "s")