def train(self, experiences, batch_steps=1, **kwargs): #state_actions = ssbm.readStateActions(filename) #feed_dict = feedStateActions(state_actions) #experiences = util.async_map(ssbm.readStateActions_pickle, filenames)() experiences = util.deepZip(*experiences) experiences = util.deepMap(np.array, experiences) input_dict = dict( util.deepValues(util.deepZip(self.experience, experiences))) """ saved_data = self.sess.run(self.saved_data, input_dict) handles = [t.handle for t in saved_data] saved_dict = dict(zip(self.placeholders, handles)) """ if self.debug: self.debugGrads(input_dict) for _ in range(batch_steps): results = self.sess.run(self.run_dict, input_dict) summary_str = results['summary'] global_step = results['global_step'] self.writer.add_summary(summary_str, global_step)
def act(self, history, verbose=False): feed_dict = dict( util.deepValues( util.deepZip( self.input, ct.vectorizeCTypes(ssbm.SimpleStateAction, history)))) return self.model.act(self.sess.run(self.policy, feed_dict), verbose)