Example #1
0
 def train(self, batch):
     batch_size = batch['batch_size']
     target = self.get_target(batch)
     actions = np.zeros([batch_size, self.num_actions])
     actions[:, list(batch['actions'])] = 1
     _, error_val = self.session.run(
         [self.train_step, self.error],
         feed_dict=get_feed_dict([(self.target_pl_list, target), (self.actions_pl_list, actions),
                                  (self.input_pl_list, batch['states_1'])], len(self.gpus))
     )
     return error_val
Example #2
0
 def evaluate_target(self, batch):
     return self.session.run(self.target_out, feed_dict=get_feed_dict([(self.input_pl_list, batch)], len(self.gpus)))