logging.debug('Initialized') try: checkpoint_path = tf.train.latest_checkpoint(FLAGS.output_dir) saver.restore(sess, checkpoint_path) logging.debug('restore from [{0}]'.format(checkpoint_path)) except Exception: logging.debug('no check point found....') exit(0) for title in titles: state = sess.run(model.state_tensor) # feed title for head in title: input = utils.index_data(np.array([[head]]), dictionary) feed_dict = {model.X: input, model.state_tensor: state, model.keep_prob: 1.0} pred, state = sess.run( [model.predictions, model.outputs_state_tensor], feed_dict=feed_dict) sentence = title word_index = pred[0].argsort()[-1] # generate sample for i in range(64): feed_dict = {model.X: [[word_index]], model.state_tensor: state,
state = sess.run(model.state_tensor) for dl in utils.get_train_data(vocabulary, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps): ################## # Your Code here ################## # 批量读取的数据进行训练 # 这里处理feed_dict? self.X,self.Y,self.keep_prob需要赋值 x = dl[0] y = dl[1] #logging.debug('x:{0} x:{1}'.format(x.shape,x[0])) #logging.debug('y:{0} y:{1}'.format(y.shape,y[0])) input_x = utils.index_data(x, dictionary) #input_x = tf.squeeze(input_x,[0,1]) lable_y = utils.index_data(y, dictionary) #lable_y = tf.squeeze(lable_y,[0,1]) #logging.debug('input_x:{0}'.format(input_x.shape)) #logging.debug('lable_y:{0}'.format(lable_y.shape)) feed_dict = { model.X: input_x, model.Y: lable_y, model.keep_prob: 0.7 } gs, _, state, l, summary_string = sess.run([ model.global_step, model.optimizer, model.outputs_state_tensor, model.loss, model.merged_summary_op
sess.run(tf.local_variables_initializer()) logging.debug('Initialized') try: checkpoint_path = tf.train.latest_checkpoint(FLAGS.output_dir) saver.restore(sess, checkpoint_path) logging.debug('restore from [{0}]'.format(checkpoint_path)) except Exception: logging.debug('no check point found....') exit(0) for title in titles: #input = utils.index_data(np.array([[title[0]]]), dictionary) input = utils.index_data(np.array([[title[0], title[1], title[2]]]), dictionary) state = sess.run(model.initial_state, {model.X: input}) # feed title # for head in title: # if head == 0: # continue # input = utils.index_data(np.array([[head]]), dictionary) # feed_dict = {model.X: input, # model.initial_state: state, # model.keep_prob: 1.0} # pred, state = sess.run( # [model.predictions, model.final_state], feed_dict=feed_dict)
checkpoint_path = tf.train.latest_checkpoint(FLAGS.output_dir) saver.restore(sess, checkpoint_path) logging.debug('restore from [{0}]'.format(checkpoint_path)) except Exception: logging.debug('no check point found....') for x in range(1): logging.debug('epoch [{0}]....'.format(x)) state = sess.run(model.state_tensor) for dl in utils.get_train_data(vocabulary, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps): ################## X = utils.index_data(dl[0], dictionary) Y = utils.index_data(dl[1], dictionary) feed_dict = { model.X: X, model.Y: Y, model.state_tensor: state, model.keep_prob: 0.7, model.is_training: 1 } ################## gs, _, state, l, summary_string = sess.run([ model.global_step, model.optimizer, model.outputs_state_tensor, model.loss, model.merged_summary_op ], feed_dict=feed_dict)
def transform_csv(data_path=None, train_path=None, test_path=None, train_output_path=None, test_output_path=None, header="infer", train_frac=0.8, implicit_threshold=0, sep=",", label_col=0, cat_cols=None, num_cols=None, normalize=False, num_neg=None, ffm=True, seed=2020): neg_sample = True if num_neg is not None and num_neg > 0 else False cat_cols = (list(map(int, cat_cols.split(','))) if cat_cols is not None else list()) num_cols = (list(map(int, num_cols.split(','))) if num_cols is not None else list()) train_data, test_data = read_data(data_path, train_path, test_path, sep, header, label_col, train_frac, seed, implicit_threshold, neg_sample) if normalize and num_cols: train_data, test_data = normalize_data(train_data, test_data, num_cols) train_data, test_data = filter_data(train_data, test_data, cat_cols) cat_unique_vals, num_unique_vals = index_data(train_data, cat_cols, num_cols) if not neg_sample: transformed_train_data = convert_normal(train_data, label_col, cat_cols, num_cols, cat_unique_vals, num_unique_vals, ffm) transformed_test_data = convert_normal(test_data, label_col, cat_cols, num_cols, cat_unique_vals, num_unique_vals, ffm) else: transformed_train_data = convert_neg(train_data, label_col, cat_cols, num_cols, cat_unique_vals, num_unique_vals, num_neg, ffm, train=True) transformed_test_data = convert_neg(test_data, label_col, cat_cols, num_cols, cat_unique_vals, num_unique_vals, num_neg, ffm, train=False) pd.Series(transformed_train_data).to_csv(train_output_path, index=False, header=False) pd.Series(transformed_test_data).to_csv(test_output_path, index=False, header=False)
logging.debug('no check point found....') for x in range(1): logging.debug('epoch [{0}]....'.format(x)) state = sess.run(model.state_tensor) for dl in utils.get_train_data(vocabulary, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps): ################## # Your Code here ################## ##print (dl[0]) ##print (type(dl)) ##print (dl[1]) input_x = utils.index_data(dl[0], dictionary) input_y = utils.index_data(dl[1], dictionary) #构建feed_dict feed_dict = { model.X: input_x, model.Y: input_y, model.state_tensor: state, model.keep_prob: 1.0 } gs, _, state, l, summary_string = sess.run([ model.global_step, model.optimizer, model.outputs_state_tensor, model.loss, model.merged_summary_op ], feed_dict=feed_dict) summary_string_writer.add_summary(summary_string, gs)
except Exception: logging.debug('no check point found....') for x in range(1): logging.debug('epoch [{0}]....'.format(x)) state = sess.run(model.state_tensor) for dl in utils.get_train_data(vocabulary, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps): ################## # Your Code here ################## feed_dict = { model.X: utils.index_data(dl[0], dictionary), model.Y: utils.index_data(dl[1], dictionary), model.keep_prob: 0.5, } for i, (c, h) in enumerate(model.state_tensor): feed_dict[c] = state[i].c feed_dict[h] = state[i].h ################# gs, _, state, l, summary_string = sess.run([ model.global_step, model.optimizer, model.outputs_state_tensor, model.loss, model.merged_summary_op ], feed_dict=feed_dict) summary_string_writer.add_summary(summary_string, gs) if gs % ckpt_steps == 0:
def train_on_data( self, data_batch: DataBatch, step: int = 0, writer: Optional[SummaryWriter] = None) -> Dict[str, float]: """ Performs a single update step with PPO on the given batch of data. Args: data_batch: DataBatch, dictionary step: writer: Returns: """ metrics = {} timer = Timer() entropy_coeff = self.config["entropy_coeff"] data_batch: DataBatchT = transpose_batch(data_batch) for agent_id in self.agents: agent = self.agents[agent_id] agent_batch = data_batch[agent_id] optimizer = self.optimizers[agent_id] ####################################### Unpack and prepare the data ####################################### if self.config["use_gpu"]: agent_batch = batch_to_gpu(agent_batch) agent.cuda() # Initialize metrics kl_divergence = 0. ppo_step = -1 value_loss = torch.tensor(0) policy_loss = torch.tensor(0) loss = torch.tensor(0) batcher = Batcher( agent_batch['dones'].size(0) // self.config["minibatches"], [np.arange(agent_batch['dones'].size(0))]) # Start a timer timer.checkpoint() for ppo_step in range(self.config["ppo_steps"]): batcher.shuffle() # for indices, agent_minibatch in minibatches(agent_batch, self.config["batch_size"], shuffle=True): while not batcher.end(): batch_indices = batcher.next_batch()[0] batch_indices = torch.tensor(batch_indices).long() agent_minibatch = index_data(agent_batch, batch_indices) # Evaluate again after the PPO step, for new values and gradients logprob_batch, value_batch, entropy_batch = agent.evaluate_actions( agent_minibatch) advantages_batch = agent_minibatch['advantages'] old_logprobs_minibatch = agent_minibatch[ 'logprobs'] # logprobs of taken actions discounted_batch = agent_minibatch['rewards_to_go'] ######################################### Compute the loss ############################################# # Surrogate loss prob_ratio = torch.exp(logprob_batch - old_logprobs_minibatch) surr1 = prob_ratio * advantages_batch surr2 = prob_ratio.clamp(1. - self.eps, 1 + self.eps) * advantages_batch # surr2 = torch.where(advantages_batch > 0, # (1. + self.eps) * advantages_batch, # (1. - self.eps) * advantages_batch) policy_loss = -torch.min(surr1, surr2) value_loss = 0.5 * (value_batch - discounted_batch)**2 # import pdb; pdb.set_trace() loss = (torch.mean(policy_loss) + (self.config["value_loss_coeff"] * torch.mean(value_loss)) - (entropy_coeff * torch.mean(entropy_batch))) ############################################# Update step ############################################## optimizer.zero_grad() loss.backward() if self.config["max_grad_norm"] is not None: nn.utils.clip_grad_norm_(agent.model.parameters(), self.config["max_grad_norm"]) optimizer.step() # logprob_batch, value_batch, entropy_batch = agent.evaluate_actions(agent_batch) # # kl_divergence = torch.mean(old_logprobs_batch - logprob_batch).item() # if abs(kl_divergence) > self.config["target_kl"]: # break agent.cpu() ep_lens = get_episode_lens(agent_batch['dones'].cpu()) ep_rewards = torch.tensor([ torch.sum(rewards) for rewards in torch.split(agent_batch['rewards'], ep_lens) ]) metrics[f"{agent_id}/episode_reward_mean"] = torch.mean( ep_rewards).item() metrics[f"{agent_id}/episode_reward_median"] = torch.median( ep_rewards).item() metrics[f"{agent_id}/episode_reward_min"] = torch.min( ep_rewards).item() metrics[f"{agent_id}/episode_reward_max"] = torch.max( ep_rewards).item() metrics[f"{agent_id}/episode_reward_std"] = torch.std( ep_rewards).item() # Training-related metrics metrics[f"{agent_id}/time_update"] = timer.checkpoint() metrics[f"{agent_id}/kl_divergence"] = kl_divergence metrics[f"{agent_id}/ppo_steps_made"] = ppo_step + 1 metrics[f"{agent_id}/policy_loss"] = torch.mean( policy_loss).cpu().item() metrics[f"{agent_id}/value_loss"] = torch.mean( value_loss).cpu().item() metrics[f"{agent_id}/total_loss"] = loss.detach().cpu().item() # metrics[f"{agent_id}/rewards"] = agent_batch['rewards'].cpu().sum().item() metrics[f"{agent_id}/mean_std"] = agent.model.std.mean().item() # Other metrics # metrics[f"agent/mean_entropy"] = torch.mean(entropy_batch).item() # Write the metrics to tensorboard write_dict(metrics, step, writer) return metrics
logging.debug('no check point found....') for x in range(1): logging.debug('epoch [{0}]....'.format(x)) state = sess.run(model.state_tensor) for dl in utils.get_train_data(vocabulary, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps): ################## # Your Code here ################## # dl 是一个元组(data,label) ,data 和 label 的元素是单词列表 #logging.debug('dl = {0}'.format(dl)) # logging.debug('here {0}....'.format(x)) # 把单词转换为字典索引,才能用于训练 data_batch = [utils.index_data(ch, dictionary) for ch in dl[0]] label_batch = [utils.index_data(ch, dictionary) for ch in dl[1]] #print('data_batch =', data_batch) #print('label_batch =', label_batch) feed_dict = {model.X:data_batch, model.Y:label_batch, model.state_tensor:state, model.keep_prob:FLAGS.keep_prob} gs, _, state, l, summary_string = sess.run( [model.global_step, model.optimizer, model.outputs_state_tensor, model.loss, model.merged_summary_op], feed_dict=feed_dict) summary_string_writer.add_summary(summary_string, gs) if gs % 10 == 0: logging.debug('step [{0}] loss [{1}]'.format(gs, l)) save_path = saver.save(sess, os.path.join( FLAGS.output_dir, "model.ckpt"), global_step=gs) summary_string_writer.close()
logging.debug('restore from [{0}]'.format(checkpoint_path)) except Exception: logging.debug('no check point found....') for x in range(1): logging.debug('epoch [{0}]....'.format(x)) state = sess.run(model.state_tensor) for dl in utils.get_train_data(vocabulary, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps): ################## # Your Code here ################## input_data = utils.index_data(dl[0], dictionary) input_label = utils.index_data(dl[1], dictionary) # print(input_data.shape) # print(input_label.shape) # feed_dict = dic() feed_dict = { model.X: input_data, model.Y: input_label, model.state_tensor: state, model.keep_prob: 0.8 } gs, _, state, l, summary_string = sess.run([ model.global_step, model.optimizer, model.outputs_state_tensor, model.loss, model.merged_summary_op