def run_DQN(): import agent import time import exp import rl_vrep rl_vrep.connect() rl_vrep.start() time.sleep(0.5) agent.setup_task() time.sleep(0.5) agent.setup() time.sleep(0.5) step = 0 for epi in range(30000): print('step:', step) observation = np.empty(shape=(10, 10)) observation_ = np.empty(shape=(10, 10)) for i in range(10): observation_i = agent.observestate() observation_i = agent.unwrap_state(observation_i) observation[i] = np.array(observation_i) observation = observation[np.newaxis, :] observation = Robot.sess.run(Robot.rnn_out, feed_dict={Robot.rnn_in: observation}) #observation = np.mean(observation,axis= 0) action = Robot.choose_action(observation) agent.execute_action(action) print('action:', action) #time.sleep(0.5) r = agent.get_reward() print('reward:', r) for i in range(10): observation_i = agent.observestate() observation_i = agent.unwrap_state(observation_i) observation_[i] = np.array(observation_i) #observation_ = np.mean(observation_,axis= 0) #......... observation_ = observation_[np.newaxis, :] observation_ = Robot.sess.run(Robot.rnn_out, feed_dict={Robot.rnn_in: observation_}) Robot.store(observation, action, r, observation_) if (step > 200) and (step % 10 == 0): Robot.learn() observation = observation_ step += 1 print('run over!')
def setup(): """ task module setup is performed in agent """ agent.setup_task()
update_op = [] for g_v, l_v in zip(global_vars, local_vars): update_op.append(l_v.assign(g_v)) return update_op #def anneal_lr(self): #return tf.cond((self.lr > 0.0), #lambda:tf.assign_sub(self.lr,self.delta_lr), #lambda:tf.assign(self.lr,0.0)) if __name__ == "__main__": rl_vrep.connect() rl_vrep.start() time.sleep(0.5) agent.setup_task() time.sleep(0.5) agent.setup() time.sleep(0.5) global_steps_counter = itertools.count() global_net = ACNet('global') workers = [] for i in range(1, N_WORKERS + 1): worker = WORKER(i, global_steps_counter) workers.append(worker) with tf.Session() as sess: COORD = tf.train.Coordinator() print('Initializing\n') sess.run(tf.global_variables_initializer()) workers_threads = [] for worker in workers:
def setup(): agent.setup_task()