Exemple #1
0
def run_DQN():
    import agent
    import time
    import exp
    import rl_vrep

    rl_vrep.connect()
    rl_vrep.start()
    time.sleep(0.5)
    agent.setup_task()
    time.sleep(0.5)
    agent.setup()
    time.sleep(0.5)
    step = 0
    for epi in range(30000):
        print('step:', step)
        observation = np.empty(shape=(10, 10))
        observation_ = np.empty(shape=(10, 10))
        for i in range(10):
            observation_i = agent.observestate()
            observation_i = agent.unwrap_state(observation_i)
            observation[i] = np.array(observation_i)
        observation = observation[np.newaxis, :]
        observation = Robot.sess.run(Robot.rnn_out,
                                     feed_dict={Robot.rnn_in: observation})

        #observation = np.mean(observation,axis= 0)
        action = Robot.choose_action(observation)

        agent.execute_action(action)
        print('action:', action)
        #time.sleep(0.5)

        r = agent.get_reward()
        print('reward:', r)
        for i in range(10):
            observation_i = agent.observestate()
            observation_i = agent.unwrap_state(observation_i)
            observation_[i] = np.array(observation_i)
        #observation_ = np.mean(observation_,axis= 0)
        #.........
        observation_ = observation_[np.newaxis, :]
        observation_ = Robot.sess.run(Robot.rnn_out,
                                      feed_dict={Robot.rnn_in: observation_})
        Robot.store(observation, action, r, observation_)

        if (step > 200) and (step % 10 == 0):
            Robot.learn()

        observation = observation_

        step += 1
    print('run over!')
Exemple #2
0
def setup():
    """ task module setup is performed in agent """
    agent.setup_task()
Exemple #3
0
        update_op = []
        for g_v, l_v in zip(global_vars, local_vars):
            update_op.append(l_v.assign(g_v))
        return update_op

    #def anneal_lr(self):
    #return tf.cond((self.lr > 0.0),
    #lambda:tf.assign_sub(self.lr,self.delta_lr),
    #lambda:tf.assign(self.lr,0.0))


if __name__ == "__main__":
    rl_vrep.connect()
    rl_vrep.start()
    time.sleep(0.5)
    agent.setup_task()
    time.sleep(0.5)
    agent.setup()
    time.sleep(0.5)
    global_steps_counter = itertools.count()
    global_net = ACNet('global')
    workers = []
    for i in range(1, N_WORKERS + 1):
        worker = WORKER(i, global_steps_counter)
        workers.append(worker)
    with tf.Session() as sess:
        COORD = tf.train.Coordinator()
        print('Initializing\n')
        sess.run(tf.global_variables_initializer())
        workers_threads = []
        for worker in workers:
Exemple #4
0
def setup():
    agent.setup_task()