def saveModel(self, s_batch, a_batch, r_batch, entropy_record, end_of_video=False): actor_gradient, critic_gradient, td_batch = \ a3c.compute_gradients(s_batch=np.stack(s_batch, axis=0), # ignore the first chuck a_batch=np.vstack(a_batch), # since we don't have the r_batch=np.vstack(r_batch), # control over it terminal=end_of_video, actor=self.actor, critic=self.critic) td_loss = np.mean(td_batch) self.actor_gradient_batch.append(actor_gradient) self.critic_gradient_batch.append(critic_gradient) myprint("====") myprint("Master: Quality: Epoch", self.epoch) myprint("TD_loss", td_loss, "Avg_reward", np.mean(r_batch), "Avg_entropy", np.mean(entropy_record)) myprint("====") summary_str = self.sess.run(self.summary_ops, feed_dict={ self.summary_vars[0]: td_loss, self.summary_vars[1]: np.mean(r_batch), self.summary_vars[2]: np.mean(entropy_record) }) self.writer.add_summary(summary_str, self.epoch) self.writer.flush() self.entropy_record = [] if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE: assert len(self.actor_gradient_batch) == len( self.critic_gradient_batch) for i in range(len(self.actor_gradient_batch)): self.actor.apply_gradients(self.actor_gradient_batch[i]) self.critic.apply_gradients(self.critic_gradient_batch[i]) self.actor_gradient_batch = [] self.critic_gradient_batch = [] self.epoch += 1 if self.epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = self.saver.save( self.sess, self.summary_dir + "/nn_model_ep_" + str(self.epoch) + ".ckpt") myprint("Model saved in file: %s" % save_path) return self.getParams()
def saveModel(self, end_of_video=False): if self._vReadOnly: return if self.ipcQueue: self.ipcQueue[0].put({ "id": self.ipcId, "cmd": IPC_CMD_UPDATE, "pid": self.pid, "data": [ self.s_batch, self.a_batch, self.r_batch, self.entropy_record, end_of_video ] }) res = None while True: res = self.ipcQueue[1].get() pid = res["pid"] res = res["res"] if pid == self.pid: break actor_net_params, critic_net_params = res self.actor.set_network_params(actor_net_params) self.critic.set_network_params(critic_net_params) del self.s_batch[:] del self.a_batch[:] del self.r_batch[:] del self.entropy_record[:] return actor_gradient, critic_gradient, td_batch = \ a3c.compute_gradients(s_batch=np.stack(self.s_batch, axis=0), # ignore the first chuck a_batch=np.vstack(self.a_batch), # since we don't have the r_batch=np.vstack(self.r_batch), # control over it terminal=end_of_video, actor=self.actor, critic=self.critic) td_loss = np.mean(td_batch) self.actor_gradient_batch.append(actor_gradient) self.critic_gradient_batch.append(critic_gradient) myprint("====") myprint("Quality: Epoch", self.epoch) myprint("TD_loss", td_loss, "Avg_reward", np.mean(self.r_batch), "Avg_entropy", np.mean(self.entropy_record)) myprint("====") summary_str = self.sess.run(self.summary_ops, feed_dict={ self.summary_vars[0]: td_loss, self.summary_vars[1]: np.mean(self.r_batch), self.summary_vars[2]: np.mean(self.entropy_record) }) self.writer.add_summary(summary_str, self.epoch) self.writer.flush() self.entropy_record = [] if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE: assert len(self.actor_gradient_batch) == len( self.critic_gradient_batch) for i in range(len(self.actor_gradient_batch)): self.actor.apply_gradients(self.actor_gradient_batch[i]) self.critic.apply_gradients(self.critic_gradient_batch[i]) self.actor_gradient_batch = [] self.critic_gradient_batch = [] self.epoch += 1 if self.epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = self.saver.save( self.sess, self.summary_dir + "/nn_model_ep_" + str(self.epoch) + ".ckpt") myprint("Model saved in file: %s" % save_path) del self.s_batch[:] del self.a_batch[:] del self.r_batch[:]