Esempio n. 1
0
 def make_feed_dict(self, xs, advantage, returns, mask, train=True):
     feed_dict = create_x_feed_dict(self.ac_model.policy_input_vars, xs)
     feed_dict.update(create_x_feed_dict(self.ac_model.value_input_vars,
                                         xs))
     feed_dict.update(create_y_feed_dict(self.advantage, advantage))
     feed_dict.update(create_y_feed_dict(self.returns, returns))
     feed_dict.update({self.mask: mask})
     # if train:
     #     feed_dict.update(create_supp_train_feed_dict(self.q_model))
     # else:
     #     feed_dict.update(create_supp_test_feed_dict(self.q_model))
     return feed_dict
Esempio n. 2
0
 def make_feed_dict(self, xs, value, returns, mask, train=True):
     feed_dict = create_x_feed_dict(self.ac_model.policy_input_vars, xs)
     feed_dict.update(create_x_feed_dict(
         self.ac_model.value_input_vars, xs))
     # feed_dict.update(create_y_feed_dict(self.advantage, advantage))
     feed_dict.update(create_y_feed_dict(self.pred_value, value))
     feed_dict.update(create_y_feed_dict(self.returns, returns))
     feed_dict.update({self.mask: mask})
     # if train:
     #     feed_dict.update(create_supp_train_feed_dict(self.q_model))
     # else:
     #     feed_dict.update(create_supp_test_feed_dict(self.q_model))
     return feed_dict
Esempio n. 3
0
 def get_single_state_feed_dict(self, single_state):
     xs = self.get_single_state_dict(single_state)
     feed_dict = create_x_feed_dict(
         self.action_model.get_input_vars(), xs)
     feed_dict.update(self.hidden_state_vals)
     feed_dict.update(create_supp_test_feed_dict(self.action_model))
     return feed_dict
Esempio n. 4
0
 def compute_preds(self, xs, sess):
     xs = self.pred_xs_preprocessor(xs)
     feed_dict = create_x_feed_dict(self.input_vars, xs)
     feed_dict.update(create_supp_test_feed_dict(self))
     preds = self.y_hat.eval(feed_dict=feed_dict,
                             session=sess)
     return preds
Esempio n. 5
0
 def make_single_feed_dict(self, model, xs, y_var, y_val, train=True):
     feed_dict = create_x_feed_dict(model.get_input_vars(), xs)
     feed_dict.update(create_y_feed_dict(y_var, y_val))
     if train:
         feed_dict.update(create_supp_train_feed_dict(model))
     else:
         feed_dict.update(create_supp_test_feed_dict(model))
     return feed_dict
Esempio n. 6
0
 def make_feed_dict(self, xs, y, mask, train=True):
     feed_dict = create_x_feed_dict(self.q_model.get_input_vars(), xs)
     feed_dict.update(create_y_feed_dict(self.y, y))
     feed_dict.update({self.mask: mask})
     if train:
         feed_dict.update(create_supp_train_feed_dict(self.q_model))
     else:
         feed_dict.update(create_supp_test_feed_dict(self.q_model))
     return feed_dict
Esempio n. 7
0
 def make_feed_dict(self, xs, y, mask, train=True):
     feed_dict = create_x_feed_dict(self.q_model.get_input_vars(), xs)
     feed_dict.update(create_y_feed_dict(self.y, y))
     feed_dict.update({self.mask: mask})
     if train:
         feed_dict.update(create_supp_train_feed_dict(self.q_model))
     else:
         feed_dict.update(create_supp_test_feed_dict(self.q_model))
     return feed_dict
Esempio n. 8
0
 def make_feed_dict(self, models, y_vars, inputs, train=True):
     feed_dict = {}
     for model_name, model in models.items():
         feed_dict.update(create_x_feed_dict(model.get_input_vars(),
                                             inputs[0][model_name]))
         feed_dict.update(create_y_feed_dict(y_vars[model_name],
                                             inputs[1][model_name]))
     if train:
         for model_name, model in models.items():
             feed_dict.update(create_supp_train_feed_dict(model))
     else:
         for model_name, model in models.items():
             feed_dict.update(create_supp_test_feed_dict(model))
     return feed_dict
Esempio n. 9
0
    def start_episode(self):
        value = 0
        for part_experience in reversed(self.part_experiences):
            state, action, reward, next_state = part_experience
            experience = (state, action, reward, next_state, value)
            self.experience_replay.add_experience(experience)
            value += reward
        self.part_experiences = []

        # zero hidden states
        feed_dict = create_x_feed_dict(
            self.action_model.get_input_vars(),
            {'state': np.zeros((1, 1) + self.state_shape)})
        for hidden_name, hidden_state in self.hidden_states.items():
            self.hidden_state_vals[hidden_state] = np.zeros(
                self.init_hidden[hidden_name].eval(
                    session=self.sess,
                    feed_dict=feed_dict).shape)
Esempio n. 10
0
 def compute_preds(self, xs):
     feed_dict = create_x_feed_dict(self.input_vars, xs)
     # feed_dict.update(create_supp_test_feed_dict(self))
     preds = self.y_hat.eval(feed_dict=feed_dict, session=self.sess)
     return preds
Esempio n. 11
0
 def perform_update(self, batch):
     feed_dict = create_x_feed_dict(self.model.get_input_vars(), batch)
     feed_dict.update(create_y_feed_dict(self.y, batch['y']))
     feed_dict.update(create_supp_train_feed_dict(self.model))
     self.train_step.run(feed_dict=feed_dict)
Esempio n. 12
0
 def perform_update(self, batch):
     feed_dict = create_x_feed_dict(self.model.get_input_vars(), batch)
     feed_dict.update(create_y_feed_dict(self.y, batch['y']))
     feed_dict.update(create_supp_train_feed_dict(self.model))
     self.train_step.run(feed_dict=feed_dict)
Esempio n. 13
0
 def compute_preds(self, xs):
     feed_dict = create_x_feed_dict(self.input_vars, xs)
     # feed_dict.update(create_supp_test_feed_dict(self))
     preds = self.y_hat.eval(feed_dict=feed_dict,
                             session=self.sess)
     return preds
Esempio n. 14
0
 def compute(self, model, xs, y_var, y_val):
     feed_dict = create_x_feed_dict(model.input_vars, xs)
     feed_dict.update(create_y_feed_dict(y_var, y_val))
     feed_dict.update(create_supp_test_feed_dict(model))
     loss = self.loss.eval(feed_dict=feed_dict)
     return loss
Esempio n. 15
0
 def compute(self, model, xs, y_var, y_val):
     feed_dict = create_x_feed_dict(model.input_vars, xs)
     feed_dict.update(create_y_feed_dict(y_var, y_val))
     feed_dict.update(create_supp_test_feed_dict(model))
     loss = self.loss.eval(feed_dict=feed_dict)
     return loss