def __init__(self, A_DIM, **kws): #===================================== # SETUP #===================================== self.sess = tf.Session() self.A_DIM = A_DIM self.actor = a3c.ActorNetwork(self.sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) self.critic = a3c.CriticNetwork(self.sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=CRITIC_LR_RATE) self.sess.run(tf.initialize_all_variables()) self.saver = tf.train.Saver() # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file self.saver.restore(self.sess, nn_model) print("Model restored.")
def __init__(self, actionset = [], infoDept=S_LEN, infoDim=S_INFO, log_path=None, summary_dir=None, nn_model=None): assert summary_dir myprint("Central init Params:", actionset, infoDept, log_path, summary_dir, nn_model) self.summary_dir = os.path.join(summary_dir, "rnnQuality") self.nn_model = nn_model self.a_dim = len(actionset) self._vActionset = actionset self._vInfoDim = infoDim self._vInfoDept = infoDept if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir) self.sess = tf.Session() # log_file = open(os.path.join(log_path, "PensiveLearner", "wb")) self.actor = a3c.ActorNetwork(self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=ACTOR_LR_RATE) self.critic = a3c.CriticNetwork(self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=CRITIC_LR_RATE) self.summary_ops, self.summary_vars = a3c.build_summaries() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter(self.summary_dir, self.sess.graph) # training monitor self.saver = tf.train.Saver() # save neural net parameters self.epoch = 0 # restore neural net parameters if self.nn_model is None: nn_model, epoch = guessSavedSession(self.summary_dir) if nn_model: self.nn_model = nn_model self.epoch = epoch # nn_model = NN_MODEL if self.nn_model is not None: # nn_model is the path to file self.saver.restore(self.sess, self.nn_model) myprint("Model restored.") self.actor_gradient_batch = [] self.critic_gradient_batch = []
def __init__(self, actionset=[], infoDept=S_LEN, infoDim=S_INFO, log_path=None, summary_dir=None, nn_model=None, ipcQueue=None, ipcId=None, readOnly=False): assert summary_dir assert (not ipcQueue and not ipcId) or (ipcQueue and ipcId) myprint("Pensieproc init Params:", actionset, infoDept, log_path, summary_dir, nn_model) self.ipcQueue = ipcQueue self.pid = os.getpid() self.ipcId = ipcId self.summary_dir = os.path.join(summary_dir, "rnnBuffer") self.nn_model = None if not nn_model else os.path.join( self.summary_dir, nn_model) self.a_dim = len(actionset) self._vActionset = actionset self._vInfoDim = infoDim self._vInfoDept = infoDept self._vReadOnly = readOnly if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir) self.sess = tf.Session() # log_file = open(os.path.join(log_path, "PensiveLearner", "wb")) self.actor = a3c.ActorNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=ACTOR_LR_RATE) self.critic = a3c.CriticNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=CRITIC_LR_RATE) self.summary_ops, self.summary_vars = a3c.build_summaries() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter( self.summary_dir, self.sess.graph) # training monitor self.saver = tf.train.Saver() # save neural net parameters # restore neural net parameters self.epoch = 0 if self.nn_model is None and not self.ipcQueue: nn_model, epoch = guessSavedSession(self.summary_dir) if nn_model: self.nn_model = nn_model self.epoch = epoch # nn_model = NN_MODEL if self.nn_model is not None and not self.ipcQueue: # nn_model is the path to file self.saver.restore(self.sess, self.nn_model) myprint("Model restored with `" + self.nn_model + "'") if self.ipcQueue: self.ipcQueue[0].put({ "id": self.ipcId, "pid": self.pid, "cmd": IPC_CMD_PARAM }) myprint("=" * 50) myprint(self.ipcId, ": waiting for ipc") myprint("=" * 50) res = None while True: res = self.ipcQueue[1].get() pid = res["pid"] res = res["res"] if pid == self.pid: break actor_net_params, critic_net_params = res self.actor.set_network_params(actor_net_params) self.critic.set_network_params(critic_net_params) myprint("=" * 50) myprint(self.ipcId, ": ipcOver") myprint("=" * 50) self.s_batch = [] self.a_batch = [] self.r_batch = [] self.entropy_record = [] self.actor_gradient_batch = [] self.critic_gradient_batch = [] self.keyedSBatch = {} self.keyedActionProb = {} self.keyedAction = {} self.keyedInputParam = {}
def __init__(self, actionset=[], infoDept=S_LEN, infoDim=S_INFO, log_path=None, summary_dir=None, nn_model=None, readOnly=False, master=False, ipcQueues=None): assert summary_dir assert readOnly assert master == (ipcQueues is not None) self.master = master cprint.cyan("!!CREATING OBJECT!!") if not master: cprint.blue("!!SLAVE!!") self.recv, self.send = [mp.Queue(), mp.Queue()] self.proc = mp.Process( target=PensiveLearner, args=(actionset, infoDept, infoDim, log_path, summary_dir, nn_model, readOnly, True, [self.send, self.recv])) self.proc.start() while True: cmd = self.recv.get() if cmd == "ready": break return cprint.red("!!MASTER!!") self.recv, self.send = ipcQueues # myprint("Pensieproc init Params:", actionset, infoDept, log_path, summary_dir, nn_model) self.pid = os.getpid() self.summary_dir = summary_dir self.nn_model = None if not nn_model else os.path.join( self.summary_dir, nn_model) self.a_dim = len(actionset) self._vActionset = actionset self._vInfoDim = infoDim self._vInfoDept = infoDept self._vReadOnly = readOnly if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir) self.sess = tf.Session() self.actor = a3c.ActorNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=ACTOR_LR_RATE) self.critic = a3c.CriticNetwork( self.sess, state_dim=[self._vInfoDim, self._vInfoDept], action_dim=self.a_dim, learning_rate=CRITIC_LR_RATE) self.summary_ops, self.summary_vars = a3c.build_summaries() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter( self.summary_dir, self.sess.graph) # training monitor self.saver = tf.train.Saver() # save neural net parameters # restore neural net parameters self.epoch = 0 if self.nn_model is None and self.master: nn_model, epoch = guessSavedSession(self.summary_dir) if nn_model: self.nn_model = nn_model self.epoch = epoch if self.nn_model is not None: # nn_model is the path to file self.saver.restore(self.sess, self.nn_model) cprint.red("Model restored with `" + self.nn_model + "'") try: self.runCmd() except Exception as ex: track = tb.format_exc() cprint.red(track)
def __init__(self, videoInfo, agent, log_file_path=LOG_FILE, *kw, **kws): self.video = None #===================================== # SETUP #===================================== log_file = None if not log_file_path else open(log_file_path, 'wb') sess = tf.Session() A_DIM = len(videoInfo.bitratesKbps) actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=CRITIC_LR_RATE) sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file saver.restore(sess, nn_model) print("Model restored.") init_action = np.zeros(A_DIM) init_action[DEFAULT_QUALITY] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [init_action] r_batch = [] train_counter = 0 # need this storage, because observation only contains total rebuffering time # we compute the difference to get input_dict = { 'sess': sess, 'log_file': log_file, 'actor': actor, 'critic': critic, 'saver': saver, 'train_counter': train_counter, 's_batch': s_batch, 'a_batch': a_batch, 'r_batch': r_batch, } #===================================== # INIT #===================================== self.input_dict = input_dict self.sess = input_dict['sess'] self.log_file = input_dict['log_file'] self.actor = input_dict['actor'] self.critic = input_dict['critic'] self.saver = input_dict['saver'] self.s_batch = input_dict['s_batch'] self.a_batch = input_dict['a_batch'] self.r_batch = input_dict['r_batch']