def train(self, batch): batch_size = batch['batch_size'] target = self.get_target(batch) actions = np.zeros([batch_size, self.num_actions]) actions[:, list(batch['actions'])] = 1 _, error_val = self.session.run( [self.train_step, self.error], feed_dict=get_feed_dict([(self.target_pl_list, target), (self.actions_pl_list, actions), (self.input_pl_list, batch['states_1'])], len(self.gpus)) ) return error_val
def evaluate_target(self, batch): return self.session.run(self.target_out, feed_dict=get_feed_dict([(self.input_pl_list, batch)], len(self.gpus)))