def __init__(self, env): self.env = env self.num_obs = env.observation_space.shape[0] self.num_actions = env.action_space.n self.network = PGN(self.num_obs, self.num_actions) self.gamma = 0.99 self.lr = 1e-3 self.train_episodes = 4 self.optimizer = tf.keras.optimizers.Adam(lr=self.lr) self.print_every = 10
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) random.seed(31241) np.random.seed(41982) tf.set_random_seed(1327634) color = True # Must change this and the dataset Flags to the correct path to use color if FLAGS.is_debug: reader = Bouncing_Balls_Data_Reader(FLAGS.dataset, FLAGS.batch_size, color=color, train_size=160*5, validation_size=8*5, test_size=8*5, num_partitions=5) else: reader = Bouncing_Balls_Data_Reader(FLAGS.dataset, FLAGS.batch_size, color=color) data_fn = lambda epoch, batch_index: reader.read_data(batch_index, reader.TRAIN) frame_shape = reader.read_data(0, reader.TRAIN).shape[2:] print("Frame shape: ", frame_shape) num_batches = reader.num_batches(reader.TRAIN) print("Num batches: %d" % num_batches) input_sequence_range = range(5, 16) print("Input sequence range min: %d, max: %d" % (min(input_sequence_range), max(input_sequence_range))) save_sample_fn = utils.gen_save_sample_fn(FLAGS.sample_dir, image_prefix="train") with tf.Session() as sess: pgn = PGN(sess, FLAGS.dataset_name, FLAGS.epoch, num_batches, FLAGS.batch_size, input_sequence_range, data_fn, frame_shape=frame_shape, save_sample_fn=save_sample_fn, checkpoint_dir=FLAGS.checkpoint_dir, lambda_adv_loss= FLAGS.lambda_adv_loss) if FLAGS.is_train: pgn.train() else: print("Loading from: %s" %(FLAGS.checkpoint_dir,)) if pgn.load(FLAGS.checkpoint_dir) : print(" [*] Successfully loaded") else: print(" [!] Load failed") if FLAGS.is_test: result = test.test(pgn, reader) result_str = pp.pformat(result) fid = open(os.path.join(FLAGS.sample_dir, 'test_out.txt'), mode='w') fid.write(unicode(result_str)) fid.close() if FLAGS.is_visualize: for i in range(3): vid_seq = reader.read_data(i, data_set_type=reader.TEST, batch_size=1)[:, 0, :, :, :] utils.make_prediction_gif(pgn, os.path.join(FLAGS.sample_dir, 'vis_%d.gif' % i), video_sequence=vid_seq) utils.plot_convergence(pgn.get_MSE_history(), "MSE Convergence", path=os.path.join(FLAGS.sample_dir, "vis_MSE_convergence.png"))
def train(params): assert params["mode"].lower() == "train", "change training mode to 'train'" tf.compat.v1.logging.info("Building the model ...") model = PGN(params) tf.compat.v1.logging.info("Creating the batcher ...") b = batcher(params["data_dir"], params["vocab_path"], params) tf.compat.v1.logging.info("Creating the checkpoint manager") logdir = "{}/logdir".format(params["model_dir"]) checkpoint_dir = "{}/checkpoint".format(params["model_dir"]) ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=11) ckpt.restore(ckpt_manager.latest_checkpoint) if ckpt_manager.latest_checkpoint: print("Restored from {}".format(ckpt_manager.latest_checkpoint)) else: print("Initializing from scratch.") tf.compat.v1.logging.info("Starting the training ...") train_model(model, b, params, ckpt, ckpt_manager)
def test(params): assert params["mode"].lower() in [ "test", "eval" ], "change training mode to 'test' or 'eval'" print(params["beam_size"], params["batch_size"]) assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) embeddings_matrix = get_embedding(params["vocab_size"], params["embed_size"], vocab, params['vector_path']) tf.compat.v1.logging.info("Building the model ...") model = PGN(params, embeddings_matrix) print("Creating the batcher ...") b = batcher(params["data_dir"], vocab, params) print("Creating the checkpoint manager") checkpoint_dir = "{}".format(params["checkpoint_dir"]) ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=11) path = params["model_path"] if params[ "model_path"] else ckpt_manager.latest_checkpoint ckpt.restore(path) print("Model restored") for batch in b: yield beam_decode(model, batch, vocab, params)
def train(params): assert params["mode"].lower() == "train", "change training mode to 'train'" print("Creating the vocab from :", params["vocab_path"]) vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the embedding_matrix from:", params["vector_path"]) embeddings_matrix = get_embedding(params["vocab_size"], params["embed_size"], vocab, params['vector_path']) tf.compat.v1.logging.info("Building the model ...") model = PGN(params, embeddings_matrix) print("Creating the batcher ...") b = batcher(params["data_dir"], vocab, params) print("Creating the checkpoint manager") checkpoint_dir = "{}".format(params["checkpoint_dir"]) ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=11) ckpt.restore(ckpt_manager.latest_checkpoint) if ckpt_manager.latest_checkpoint: print("Restored from {}".format(ckpt_manager.latest_checkpoint)) else: print("Initializing from scratch.") tf.compat.v1.logging.info("Starting the training ...") train_model(model, b, params, ckpt, ckpt_manager, "output.txt")
def __init__(self): self.DEVICE = config.DEVICE dataset = PairDataset(config.data_path, max_src_len=config.max_src_len, max_tgt_len=config.max_tgt_len, truncate_src=config.truncate_src, truncate_tgt=config.truncate_tgt) self.vocab = dataset.build_vocab(embed_file=config.embed_file) self.model = PGN(self.vocab) self.stop_word = list( set([ self.vocab[x.strip()] for x in open( config.stop_word_file, encoding='utf-8').readlines() ])) self.model.load_model() self.model.to(self.DEVICE)
def __init__(self): self.DEVICE = config.device # self.dataset = SamplesDataset(config.train_data_path) # self.vocab = self.dataset.vocab self.vocab = None if (os.path.exists(config.vocab)): with open(config.vocab, 'rb') as f: self.vocab = pickle.load(f) self.dataset = SamplesDataset(config.train_data_path, vocab=self.vocab) self.vocab = self.dataset.vocab self.model = PGN(self.vocab) self.stop_word = list( set([ self.vocab[x.strip()] for x in open(config.stop_word_file).readlines() ])) self.model.load_model() self.model.to(self.DEVICE)
def main(args): if not os.path.exists(args.model_path): os.makedirs(args.model_path) train_set = TrainImageFolder(args.train_dir) data_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = nn.DataParallel(PGN()).cuda() criterion = nn.CrossEntropyLoss(reduce=False).cuda() params = list(model.parameters()) total_step = len(data_loader) for epoch in range(134, args.num_epochs): lr_ = lr_poly(args.learning_rate, epoch * total_step, args.num_epochs * total_step, 0.9) optimizer = torch.optim.SGD(params, lr=lr_, momentum=args.momentum, weight_decay=args.weight_decay) for i, (images, parse) in enumerate(data_loader): images = images.cuda() parse = parse.long().cuda() parsing_out1, parsing_out2, edge_out1_final, edge_out_res5, edge_out_res4, edge_out_res3, edge_out2_final = model( images) #parsing_out1=model(images) loss = criterion(parsing_out1, parse).mean() model.zero_grad() loss.backward() optimizer.step() # Print log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch, args.num_epochs, i, total_step, loss.item())) if (i + 1) % args.save_step == 0: torch.save( model.state_dict(), os.path.join(args.model_path, 'model-{}-{}.ckpt'.format(epoch + 1, i + 1)))
def predict(): model = nn.DataParallel(PGN()).cuda() #model.load_state_dict(torch.load('models/model-134-2539.ckpt')) data_dir = 'LIP/testing_images' dirs = os.listdir(data_dir) for file in dirs: image = Image.open(data_dir + '/' + file).convert('RGB') a, b = image.size[0], image.size[1] image = torch.Tensor( np.array(image).astype(np.float32).transpose( (2, 0, 1))).unsqueeze(0).cuda() #pre_image=Image.fromarray(model(image).cpu().detach().numpy()[0]) c = Image.fromarray( np.argmax(model(image).cpu().detach().numpy()[0], axis=0).astype(np.uint8)) c = c.resize((a, b), Image.NEAREST) #print(np.array(c)) # save_image=pre_image.resize((a,b),Image.NEAREST) c.save('LIP/test_save/' + file[:-4] + '.png', quality=95, subsampling=0)
def train(dataset, val_dataset, v, start_epoch=0): """Train the model, evaluate it and store it. Args: dataset (dataset.PairDataset): The training dataset. val_dataset (dataset.PairDataset): The evaluation dataset. v (vocab.Vocab): The vocabulary built from the training dataset. start_epoch (int, optional): The starting epoch number. Defaults to 0. """ DEVICE = torch.device("cuda" if config.is_cuda else "cpu") model = PGN(v) model.load_model() model.to(DEVICE) if config.fine_tune: # In fine-tuning mode, we fix the weights of all parameters except attention.wc. print('Fine-tuning mode.') for name, params in model.named_parameters(): if name != 'attention.wc.weight': params.requires_grad = False # forward print("loading data") train_data = SampleDataset(dataset.pairs, v) val_data = SampleDataset(val_dataset.pairs, v) print("initializing optimizer") # Define the optimizer. optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) train_dataloader = DataLoader(dataset=train_data, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn) val_losses = np.inf if (os.path.exists(config.losses_path)): with open(config.losses_path, 'rb') as f: val_losses = pickle.load(f) # torch.cuda.empty_cache() # SummaryWriter: Log writer used for TensorboardX visualization. writer = SummaryWriter(config.log_path) # tqdm: A tool for drawing progress bars during training. # scheduled_sampler : A tool for choosing teacher_forcing or not num_epochs = len(range(start_epoch, config.epochs)) scheduled_sampler = ScheduledSampler(num_epochs) if config.scheduled_sampling: print('scheduled_sampling mode.') # teacher_forcing = True with tqdm(total=config.epochs) as epoch_progress: for epoch in range(start_epoch, config.epochs): print(config_info(config)) batch_losses = [] # Get loss of each batch. num_batches = len(train_dataloader) # set a teacher_forcing signal if config.scheduled_sampling: teacher_forcing = scheduled_sampler.teacher_forcing( epoch - start_epoch) else: teacher_forcing = True print('teacher_forcing = {}'.format(teacher_forcing)) with tqdm(total=num_batches) as batch_progress: for batch, data in enumerate(tqdm(train_dataloader)): x, y, x_len, y_len, oov, len_oovs = data assert not np.any(np.isnan(x.numpy())) if config.is_cuda: # Training with GPUs. x = x.to(DEVICE) y = y.to(DEVICE) x_len = x_len.to(DEVICE) len_oovs = len_oovs.to(DEVICE) model.train() # Sets the module in training mode. optimizer.zero_grad() # Clear gradients. # Calculate loss. Call model forward propagation loss = model(x, x_len, y, len_oovs, batch=batch, num_batches=num_batches, teacher_forcing=teacher_forcing) batch_losses.append(loss.item()) loss.backward() # Backpropagation. # Do gradient clipping to prevent gradient explosion. clip_grad_norm_(model.encoder.parameters(), config.max_grad_norm) clip_grad_norm_(model.decoder.parameters(), config.max_grad_norm) clip_grad_norm_(model.attention.parameters(), config.max_grad_norm) optimizer.step() # Update weights. # Output and record epoch loss every 100 batches. if (batch % 32) == 0: batch_progress.set_description(f'Epoch {epoch}') batch_progress.set_postfix(Batch=batch, Loss=loss.item()) batch_progress.update() # Write loss for tensorboard. writer.add_scalar(f'Average loss for epoch {epoch}', np.mean(batch_losses), global_step=batch) # Calculate average loss over all batches in an epoch. epoch_loss = np.mean(batch_losses) epoch_progress.set_description(f'Epoch {epoch}') epoch_progress.set_postfix(Loss=epoch_loss) epoch_progress.update() avg_val_loss = evaluate(model, val_data, epoch) print('training loss:{}'.format(epoch_loss), 'validation loss:{}'.format(avg_val_loss)) # Update minimum evaluating loss. if (avg_val_loss < val_losses): torch.save(model.encoder, config.encoder_save_name) torch.save(model.decoder, config.decoder_save_name) torch.save(model.attention, config.attention_save_name) torch.save(model.reduce_state, config.reduce_state_save_name) val_losses = avg_val_loss with open(config.losses_path, 'wb') as f: pickle.dump(val_losses, f) writer.close()
class Predict(): @timer(module='initalize predicter') def __init__(self): self.DEVICE = config.DEVICE dataset = PairDataset(config.data_path, max_src_len=config.max_src_len, max_tgt_len=config.max_tgt_len, truncate_src=config.truncate_src, truncate_tgt=config.truncate_tgt) self.vocab = dataset.build_vocab(embed_file=config.embed_file) self.model = PGN(self.vocab) self.stop_word = list( set([ self.vocab[x.strip()] for x in open( config.stop_word_file, encoding='utf-8').readlines() ])) self.model.load_model() self.model.to(self.DEVICE) def greedy_search(self, x, max_sum_len, len_oovs, x_padding_masks): """Function which returns a summary by always picking the highest probability option conditioned on the previous word. Args: x (Tensor): Input sequence as the source. max_sum_len (int): The maximum length a summary can have. len_oovs (Tensor): Numbers of out-of-vocabulary tokens. x_padding_masks (Tensor): The padding masks for the input sequences with shape (batch_size, seq_len). Returns: summary (list): The token list of the result summary. """ # Get encoder output and states.Call encoder forward propagation ########################################### # TODO: module 4 task 2 # ########################################### # use decoder to generate vocab distribution for the next token encoder_output, encoder_states = self.model.encoder( replace_oovs(x, self.vocab), self.model.decoder.embedding) # Initialize decoder's hidden states with encoder's hidden states. decoder_states = self.model.reduce_state(encoder_states) # Initialize decoder's input at time step 0 with the SOS token. x_t = torch.ones(1) * self.vocab.SOS x_t = x_t.to(self.DEVICE, dtype=torch.int64) summary = [self.vocab.SOS] coverage_vector = torch.zeros((1, x.shape[1])).to(self.DEVICE) # Generate hypothesis with maximum decode step. while int(x_t.item()) != (self.vocab.EOS) \ and len(summary) < max_sum_len: context_vector, attention_weights, coverage_vector = \ self.model.attention(decoder_states, encoder_output, x_padding_masks, coverage_vector) p_vocab, decoder_states, p_gen = \ self.model.decoder(x_t.unsqueeze(1), decoder_states, context_vector) final_dist = self.model.get_final_distribution( x, p_gen, p_vocab, attention_weights, torch.max(len_oovs)) # Get next token with maximum probability. x_t = torch.argmax(final_dist, dim=1).to(self.DEVICE) decoder_word_idx = x_t.item() summary.append(decoder_word_idx) x_t = replace_oovs(x_t, self.vocab) return summary # @timer('best k') def best_k(self, beam, k, encoder_output, x_padding_masks, x, len_oovs): """Get best k tokens to extend the current sequence at the current time step. Args: beam (untils.Beam): The candidate beam to be extended. k (int): Beam size. encoder_output (Tensor): The lstm output from the encoder. x_padding_masks (Tensor): The padding masks for the input sequences. x (Tensor): Source token ids. len_oovs (Tensor): Number of oov tokens in a batch. Returns: best_k (list(Beam)): The list of best k candidates. """ # use decoder to generate vocab distribution for the next token x_t = torch.tensor(beam.tokens[-1]).reshape(1, 1) x_t = x_t.to(self.DEVICE) # Get context vector from attention network. context_vector, attention_weights, coverage_vector = \ self.model.attention(beam.decoder_states, encoder_output, x_padding_masks, beam.coverage_vector) # Replace the indexes of OOV words with the index of OOV token # to prevent index-out-of-bound error in the decoder. p_vocab, decoder_states, p_gen = \ self.model.decoder(replace_oovs(x_t, self.vocab), beam.decoder_states, context_vector) final_dist = self.model.get_final_distribution(x, p_gen, p_vocab, attention_weights, torch.max(len_oovs)) # Calculate log probabilities. log_probs = torch.log(final_dist.squeeze()) # Filter forbidden tokens. if len(beam.tokens) == 1: forbidden_ids = [ self.vocab[u"台独"], self.vocab[u"吸毒"], self.vocab[u"黄赌毒"] ] log_probs[forbidden_ids] = -float('inf') # EOS token penalty. Follow the definition in # https://opennmt.net/OpenNMT/translation/beam_search/. log_probs[self.vocab.EOS] *= \ config.gamma * x.size()[1] / len(beam.tokens) log_probs[self.vocab.UNK] = -float('inf') # Get top k tokens and the corresponding logprob. topk_probs, topk_idx = torch.topk(log_probs, k) # Extend the current hypo with top k tokens, resulting k new hypos. best_k = [ beam.extend(x, log_probs[x], decoder_states, coverage_vector) for x in topk_idx.tolist() ] return best_k def beam_search(self, x, max_sum_len, beam_width, len_oovs, x_padding_masks): """Using beam search to generate summary. Args: x (Tensor): Input sequence as the source. max_sum_len (int): The maximum length a summary can have. beam_width (int): Beam size. max_oovs (int): Number of out-of-vocabulary tokens. x_padding_masks (Tensor): The padding masks for the input sequences. Returns: result (list(Beam)): The list of best k candidates. """ # run body_sequence input through encoder. Call encoder forward propagation ########################################### # TODO: module 4 task 2 # ########################################### encoder_output, encoder_states = self.model.encoder( replace_oovs(x, self.vocab), self.model.decoder.embedding) coverage_vector = torch.zeros((1, x.shape[1])).to(self.DEVICE) # initialize decoder states with encoder forward states decoder_states = self.model.reduce_state(encoder_states) # initialize the hypothesis with a class Beam instance. init_beam = Beam([self.vocab.SOS], [0], decoder_states, coverage_vector) # get the beam size and create a list for stroing current candidates # and a list for completed hypothesis k = beam_width curr, completed = [init_beam], [] # use beam search for max_sum_len (maximum length) steps for _ in range(max_sum_len): # get k best hypothesis when adding a new token topk = [] for beam in curr: # When an EOS token is generated, add the hypo to the completed # list and decrease beam size. if beam.tokens[-1] == self.vocab.EOS: completed.append(beam) k -= 1 continue for can in self.best_k(beam, k, encoder_output, x_padding_masks, x, torch.max(len_oovs)): # Using topk as a heap to keep track of top k candidates. # Using the sequence scores of the hypos to campare # and object ids to break ties. add2heap(topk, (can.seq_score(), id(can), can), k) curr = [items[2] for items in topk] # stop when there are enough completed hypothesis if len(completed) == beam_width: break # When there are not engouh completed hypotheses, # take whatever when have in current best k as the final candidates. completed += curr # sort the hypothesis by normalized probability and choose the best one result = sorted(completed, key=lambda x: x.seq_score(), reverse=True)[0].tokens return result @timer(module='doing prediction') def predict(self, text, tokenize=True, beam_search=True): """Generate summary. Args: text (str or list): Source. tokenize (bool, optional): Whether to do tokenize or not. Defaults to True. beam_search (bool, optional): Whether to use beam search or not. Defaults to True (means using greedy search). Returns: str: The final summary. """ if isinstance(text, str) and tokenize: text = list(jieba.cut(text)) x, oov = source2ids(text, self.vocab) x = torch.tensor(x).to(self.DEVICE) len_oovs = torch.tensor([len(oov)]).to(self.DEVICE) x_padding_masks = torch.ne(x, 0).byte().float() if beam_search: summary = self.beam_search(x.unsqueeze(0), max_sum_len=config.max_dec_steps, beam_width=config.beam_size, len_oovs=len_oovs, x_padding_masks=x_padding_masks) else: summary = self.greedy_search(x.unsqueeze(0), max_sum_len=config.max_dec_steps, len_oovs=len_oovs, x_padding_masks=x_padding_masks) summary = outputids2words(summary, oov, self.vocab) return summary.replace('<SOS>', '').replace('<EOS>', '').strip()
def train(dataset, val_dataset, v, start_epoch=0): """Train the model, evaluate it and store it. Args: dataset (dataset.PairDataset): The training dataset. val_dataset (dataset.PairDataset): The evaluation dataset. v (vocab.Vocab): The vocabulary built from the training dataset. start_epoch (int, optional): The starting epoch number. Defaults to 0. """ torch.autograd.set_detect_anomaly(True) DEVICE = torch.device("cuda" if config.is_cuda else "cpu") model = PGN(v) model.load_model() model.to(DEVICE) if config.fine_tune: # In fine-tuning mode, we fix the weights of all parameters except attention.wc. logging.info('Fine-tuning mode.') for name, params in model.named_parameters(): if name != 'attention.wc.weight': params.requires_grad = False # forward logging.info("loading data") train_data = dataset val_data = val_dataset logging.info("initializing optimizer") # Define the optimizer. # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) optimizer = optim.Adagrad( model.parameters(), lr=config.learning_rate, initial_accumulator_value=config.initial_accumulator_value) scheduler = StepLR(optimizer, step_size=10, gamma=0.2) # 学习率调整 train_dataloader = DataLoader(dataset=train_data, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn) val_loss = np.inf if (os.path.exists(config.losses_path)): with open(config.losses_path, 'r') as f: val_loss = float(f.readlines()[-1].split("=")[-1]) logging.info("the last best val loss is: " + str(val_loss)) # torch.cuda.empty_cache() # SummaryWriter: Log writer used for TensorboardX visualization. writer = SummaryWriter(config.log_path) # tqdm: A tool for drawing progress bars during training. early_stopping_count = 0 logging.info("start training model {}, ".format(config.model_name) + \ "epoch : {}, ".format(config.epochs) + "batch_size : {}, ".format(config.batch_size) + "num batches: {}, ".format(len(train_dataloader))) for epoch in range(start_epoch, config.epochs): batch_losses = [] # Get loss of each batch. num_batches = len(train_dataloader) # with tqdm(total=num_batches//100) as batch_progress: for batch, data in enumerate(train_dataloader): x, y, x_len, y_len, oov, len_oovs, img_vec = data assert not np.any(np.isnan(x.numpy())) if config.is_cuda: # Training with GPUs. x = x.to(DEVICE) y = y.to(DEVICE) x_len = x_len.to(DEVICE) len_oovs = len_oovs.to(DEVICE) img_vec = img_vec.to(DEVICE) if batch == 0: logging.info("x: %s, shape: %s" % (x, x.shape)) logging.info("y: %s, shape: %s" % (y, y.shape)) logging.info("oov: %s" % oov) logging.info("img_vec: %s, shape: %s" % (img_vec, img_vec.shape)) model.train() # Sets the module in training mode. optimizer.zero_grad() # Clear gradients. loss = model(x, y, len_oovs, img_vec, batch=batch, num_batches=num_batches) batch_losses.append(loss.item()) loss.backward() # Backpropagation. # Do gradient clipping to prevent gradient explosion. clip_grad_norm_(model.encoder.parameters(), config.max_grad_norm) clip_grad_norm_(model.decoder.parameters(), config.max_grad_norm) clip_grad_norm_(model.attention.parameters(), config.max_grad_norm) clip_grad_norm_(model.reduce_state.parameters(), config.max_grad_norm) optimizer.step() # Update weights. # scheduler.step() # # Output and record epoch loss every 100 batches. if (batch % 100) == 0: # batch_progress.set_description(f'Epoch {epoch}') # batch_progress.set_postfix(Batch=batch, # Loss=loss.item()) # batch_progress.update() # # Write loss for tensorboard. writer.add_scalar(f'Average_loss_for_epoch_{epoch}', np.mean(batch_losses), global_step=batch) logging.info('epoch: {}, batch:{}, training loss:{}'.format( epoch, batch, np.mean(batch_losses))) # Calculate average loss over all batches in an epoch. epoch_loss = np.mean(batch_losses) # epoch_progress.set_description(f'Epoch {epoch}') # epoch_progress.set_postfix(Loss=epoch_loss) # epoch_progress.update() avg_val_loss = evaluate(model, val_data, epoch) logging.info('epoch: {} '.format(epoch) + 'training loss:{} '.format(epoch_loss) + 'validation loss:{} '.format(avg_val_loss)) # Update minimum evaluating loss. if not os.path.exists(os.path.dirname(config.encoder_save_name)): os.mkdir(os.path.dirname(config.encoder_save_name)) if (avg_val_loss < val_loss): logging.info("saving model to ../saved_model/ %s" % config.model_name) torch.save(model.encoder, config.encoder_save_name) torch.save(model.decoder, config.decoder_save_name) torch.save(model.attention, config.attention_save_name) torch.save(model.reduce_state, config.reduce_state_save_name) val_loss = avg_val_loss with open(config.losses_path, 'a') as f: f.write(f"best val loss={val_loss}\n") else: early_stopping_count += 1 if early_stopping_count >= config.patience: logging.info( f'Validation loss did not decrease for {config.patience} epochs, stop training.' ) break writer.close()
(val_extended_input_tokens, val_extended_gt_tokens, val_loss_mask, val_index)).batch(int(global_batch_size)) val_dist_dataset = train_strategy.experimental_distribute_dataset( val_tf_dataset) max_oovs_in_text = max(0, np.max(extended_input_tokens) - vocab.size() + 1, np.max(val_extended_input_tokens) - vocab.size() + 1) print('Max oovs in text :', max_oovs_in_text) ################################################################################################# # Создаем модель и слои ошибок, определяем функцию для распределенного обучения ################################################################################################# with train_strategy.scope(): model = PGN(vocab=vocab, max_oovs_in_text=max_oovs_in_text) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.98, epsilon=1e-9) ce_loss = CELoss(alpha=1.) # def train_step(inputs): def pretrain_step(extended_input_tokens, extended_gt_tokens, loss_mask, idx): model.switch_decoding_mode('cross_entropy') with tf.GradientTape() as tape: gt_probs, greedy_seqs, coverage_losses = model(extended_input_tokens, extended_gt_tokens, training=True)
with tf.device('CPU'): val_tf_dataset = tf.data.Dataset.from_tensor_slices( (val_extended_input_tokens, val_extended_gt_tokens, val_loss_mask, val_tensor_oovs, val_index)).batch(int(global_batch_size)) val_dist_dataset = train_strategy.experimental_distribute_dataset( val_tf_dataset) max_oovs_in_text = max(0, np.max(val_extended_input_tokens) - vocab.size() + 1) print('Max oovs in text :', max_oovs_in_text) ################################################################################################# # Создаем модель, определяем функцию для распределенной генерации резюме ################################################################################################# with train_strategy.scope(): model = PGN(vocab=vocab, max_oovs_in_text=max_oovs_in_text) model.load_weights(load_model_path) def eval_step(extended_input_tokens, extended_gt_tokens, loss_mask, oovs, idx): model.switch_decoding_mode('evaluate') _, _, greedy_seqs, _, _ = model(extended_input_tokens, extended_gt_tokens, training=False) return greedy_seqs @tf.function def distributed_step(dist_inputs): greedy_seqs = train_strategy.run(eval_step, args=(dist_inputs)) return greedy_seqs
class Reinforce(object): def __init__(self, env): self.env = env self.num_obs = env.observation_space.shape[0] self.num_actions = env.action_space.n self.network = PGN(self.num_obs, self.num_actions) self.gamma = 0.99 self.lr = 1e-3 self.train_episodes = 4 self.optimizer = tf.keras.optimizers.Adam(lr=self.lr) self.print_every = 10 def discounted_rewards(self, rewards): discounted_rewards = [] sum_reward = 0 for reward in reversed(rewards): sum_reward = reward + self.gamma * sum_reward discounted_rewards.append(sum_reward) return discounted_rewards.reverse() def loss_fn(self, prob, action, reward): dist = tfp.distributions.Categorical(probs=prob, dtype=tf.float32) log_prob = dist.log_prob(action) return -log_prob * reward # -Q(s, a) * logPI(a/s) def update(self, states, actions, rewards): # rewards = self.discounted_rewards(rewards) sum_reward = 0 discounted_rewards = [] for reward in reversed(rewards): sum_reward = reward + self.gamma * sum_reward discounted_rewards.append(sum_reward) discounted_rewards.reverse() for state, action, reward in zip(states, actions, rewards): with tf.GradientTape() as tape: prob = self.network([state], training=True) loss = self.loss_fn(prob, action, reward) grads = tape.gradient(loss, self.network.trainable_variables) self.optimizer.apply_gradients( zip(grads, self.network.trainable_variables)) def train(self, max_episodes, max_steps): scores = [] for ep in range(max_episodes): saved_log_probs = [] states = [] actions = [] rewards = [] state = self.env.reset() for t in range(max_steps): action = self.network.take_action(state) # print(action) # saved_log_probs.append(log_prob) next_state, reward, done, _ = self.env.step(action) states.append(state) actions.append(action) rewards.append(reward) state = next_state if done: break scores.append(sum(rewards)) self.update(states, actions, rewards) # policy_loss = [] if ep % self.print_every == 0: print("Episode {}\tAverage Score: {:.2f}".format( ep, np.mean(scores))) if np.mean(scores) >= 195.0: print( "Environment solved in {} episodes!\tAverage Score: {:.2f}" .format(ep, np.mean(scores))) break return scores def play(self, episodes=100, steps=200): for ep in range(episodes): self.env.render() state = self.env.reset() for t in range(steps): action = self.network.take_action(state) next_state, reward, done, _ = self.env.step(action) state = next_state if done: break