def main(): parse = argparse.ArgumentParser() parse.add_argument("--batch_size", default=16, type=int) parse.add_argument("--do_train", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--do_eval", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--learnning_rate", default=1e-4, type=float) parse.add_argument("--num_epoch", default=5, type=int) parse.add_argument("--max_vocab_size", default=50000, type=int) parse.add_argument("--embed_size", default=300, type=int) parse.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parse.add_argument("--hidden_size", default=1000, type=int) parse.add_argument("--num_layers", default=2, type=int) parse.add_argument("--GRAD_CLIP", default=1, type=float) args = parse.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device setseed() VOCAB_SIZE, train_iter, dev_iter, test_iter, weight_matrix = preprocess_data( args) model = RNNModel(weight_matrix, 'GRU', VOCAB_SIZE, args.embed_size, args.hidden_size, args.num_layers) model.to(device) loss_fn = nn.CrossEntropyLoss() # 交叉熵损失 if args.do_train: train(args, model, train_iter, dev_iter, loss_fn, VOCAB_SIZE) if args.do_eval: model.load_state_dict(torch.load('lm-best-GRU.th')) model.to(device) test_loss = evaluate(args, model, test_iter, loss_fn, VOCAB_SIZE) LOG_FILE = "language_model_GRU.log" with open(LOG_FILE, 'a') as fout: fout.write("test perplexity: {} ".format(np.exp(test_loss))) print("perplexity: ", np.exp(test_loss))
def model_fn(model_dir): """Load the PyTorch model from the `model_dir` directory.""" print("Loading model.") # First, load the parameters used to create the model. model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) print("model_info: {}".format(model_info)) # Determine the device and construct the model. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = RNNModel(model_info['vocab_size'], model_info['embedding_dim'], model_info['hidden_dim'], model_info['n_layers'], model_info['drop_rate']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict( torch.load(f, map_location=lambda storage, loc: storage)) # Load the saved word_dict. word_dict_path = os.path.join(model_dir, 'char_dict.pkl') with open(word_dict_path, 'rb') as f: model.char2int = pickle.load(f) word_dict_path = os.path.join(model_dir, 'int_dict.pkl') with open(word_dict_path, 'rb') as f: model.int2char = pickle.load(f) model.to(device).eval() print("Done loading model.") return model
def train(): # 模型定义 model = RNNModel(len(word2ix), embed_size, hidden_dims) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() model.to(device) model.train() for epoch in (range(epochs)): total_loss = 0 count = 0 for ii, data_ in tqdm.tqdm(enumerate(data)): data_ = torch.tensor(data_).long() x = data_.unsqueeze(1).to(device) optimizer.zero_grad() y = torch.zeros(x.shape).to(device).long() y[:-1], y[-1] = x[1:], x[0] output, _ = model(x) loss = criterion(output, y.view(-1)) """ hidden=None for k in range(2,max_lenth): data1=data_[:k] input_, target = data1[:-1, :], data1[1:, :] output, hidden = model(input_,hidden) loss = criterion(output, target.view(-1)) optimizer.step() """ loss.backward() optimizer.step() total_loss += (loss.item()) count += 1 print(epoch, 'loss=', total_loss / count) torch.save(model.state_dict(), 'model.bin') chars = test(model) print(chars)
def decompress(self, compressedfile): start = time.time() filename_split = compressedfile.split('_') checkpoint = torch.load(compressedfile, map_location=self.device) body = checkpoint['bytes'] dictionary = Dictionary() dictionary.word2idx = checkpoint['word2idx'] dictionary.idx2word = checkpoint['idx2word'] context_map = Context(dictionary) ntokens = len(dictionary) model = RNNModel('LSTM', ntokens, 200, 200, 2, dropout=0.2, tie_weights=False) model.load_state_dict(checkpoint['model_state_dict']) model.to(self.device) model.eval() bit_string = '' join_body = list(body) for i in join_body: bit_string += "{0:08b}".format(i) encoded_text = self.remove_padding(bit_string) # decompress start here current_code = '' decoded_text = '' # we define an initial context # then we predict the initial huffman tree # read bits until we get to a leaf # convert the leaf to a char and add it to decompressed text # update the context and repeat the process context = ['<s>'] * 10 def tree_from_context(context): huffman = HuffmanCoding() prob = huffman.make_context_frequency_dict( context, model, context_map, self.device, threshold=self.args.threshold) huffman.make_heap_node(prob) huffman.merge_nodes() huffman.encode() huffman.reverse_mapping = {v: k for k, v in huffman.codes.items()} return huffman huffman = tree_from_context(context) fixed_huffman = HuffmanCoding() counts = checkpoint['fixed_huffman_counts'] fixed_huffman.make_heap_node(counts) fixed_huffman.merge_nodes() fixed_huffman.encode() fixed_huffman.reverse_mapping = { v: k for k, v in fixed_huffman.codes.items() } flag = None for bit in encoded_text: if flag == '0': current_code += bit if current_code in huffman.reverse_mapping: next_char = huffman.reverse_mapping[current_code] decoded_text += next_char current_code = '' context = context[1:] + [next_char] huffman = tree_from_context(context) flag = None continue elif flag == '1': current_code += bit if current_code in fixed_huffman.reverse_mapping: next_char = fixed_huffman.reverse_mapping[current_code] decoded_text += next_char current_code = '' context = context[1:] + [next_char] huffman = tree_from_context(context) flag = None continue else: flag = bit # write decompressed file with open(filename_split[0] + "_decompressed.txt", 'w') as f: f.writelines(decoded_text) print('Decompression Done!') end = time.time() print(round((end - start), 3), "s")
model = RNNModel(encoder.encoding_size, args.hidden_size, len(corpora.vocab), args.layers, encoder, dropout=args.dropout) criterion = torch.nn.CrossEntropyLoss() trainer = Trainer(model, corpora, criterion, device, logger, args.batch_size, args.seq_len, args.lr, args.log_interval, args.clip_grad) best_valid_loss = float("inf") for epoch in range(args.epochs): print('Time at the start of epoch {} is {}'.format( epoch, datetime.now())) trainer.train() valid_loss = evaluate(model, corpora, criterion, device) print('Validation loss: {:.2f}. Perplexity: {:.2f}'.format( valid_loss, math.exp(valid_loss))) if args.log_dir: logger.log_valid(epoch, valid_loss) save_checkpoint(model.to(torch.device('cpu')), args.checkpoint, valid_loss, args) model = model.to(device) # Anneal the learning rate if the validation loss hasn't improved. if (valid_loss - best_valid_loss) < -0.01: best_valid_loss = valid_loss else: trainer.learning_rate /= 4.0