Esempio n. 1
0
    def train(self, experiences, batch_steps=1, **kwargs):
        #state_actions = ssbm.readStateActions(filename)
        #feed_dict = feedStateActions(state_actions)
        #experiences = util.async_map(ssbm.readStateActions_pickle, filenames)()
        experiences = util.deepZip(*experiences)
        experiences = util.deepMap(np.array, experiences)

        input_dict = dict(
            util.deepValues(util.deepZip(self.experience, experiences)))
        """
    saved_data = self.sess.run(self.saved_data, input_dict)
    handles = [t.handle for t in saved_data]
    
    saved_dict = dict(zip(self.placeholders, handles))
    """

        if self.debug:
            self.debugGrads(input_dict)

        for _ in range(batch_steps):
            results = self.sess.run(self.run_dict, input_dict)

            summary_str = results['summary']
            global_step = results['global_step']
            self.writer.add_summary(summary_str, global_step)
Esempio n. 2
0
 def act(self, history, verbose=False):
     feed_dict = dict(
         util.deepValues(
             util.deepZip(
                 self.input,
                 ct.vectorizeCTypes(ssbm.SimpleStateAction, history))))
     return self.model.act(self.sess.run(self.policy, feed_dict), verbose)