예제 #1
0
    def __init__(self):
        """ Initializing DDPG """
        self.ddpg = DDPG(a_dim=A_DIM,
                         s_dim=S_DIM,
                         batch_size=10,
                         memory_capacity=500)
        self.ep_reward = 0.0
        self.current_ep = 0
        self.current_step = 0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.var = 5.0
        print("Initialized DDPG")
        """ Setting communication"""
        self.sub1 = rospy.Subscriber('robot_pose',
                                     PA,
                                     self.callback1,
                                     queue_size=1)
        self.sub2 = rospy.Subscriber('robot_pose',
                                     PA,
                                     self.callback2,
                                     queue_size=1)
        self.pub1 = rospy.Publisher('Robot_5/pose', PS, queue_size=10)
        self.pub2 = rospy.Publisher('normalized_state', PC, queue_size=10)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.state = PA()
        self.updated = True  # if s_ is updated
        self.got_callback1 = False
        self.got_callback2 = False
        print("Initialized communication")
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        ends = pickle.load(open('ends.p', 'rb'))
        self.r_ends = ends['r_ends']
        self.rs_ends = ends['rs_ends']
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.scaler = 1 / 0.3
        self.sample_target()
        print("Read target data")

        #self.ddpg.restore_momery()
        #self.ddpg.restore_model()
        """
예제 #2
0
def talker():
    pub1 = rospy.Publisher('state', PC, queue_size=10)
    rospy.init_node('state_pub', anonymous=True)
    rate = rospy.Rate(1)  # 50hz
    count = 0
    pc = PC()
    pc.header.frame_id = 'world'
    pts = list()

    for i in range(10):
        point = Point()
        point.x = 0
        point.y = 0
        point.z = i * 0.1
        pts.append(point)
    print[x.z for x in pts]
    pc.points = pts

    while not rospy.is_shutdown():
        rospy.loginfo('Publishing state...')
        pub1.publish(pc)
        rate.sleep()
예제 #3
0
    def __init__(self):
        self.pub = rospy.Publisher('state_sim', PC, queue_size=10)
        self.pc = PC()
        self.pc.header.frame_id = 'world'

        a_s = pickle.load(open('./data/action_state_data', 'rb'))
        as_array = np.zeros((len(a_s), 5, 3))
        for i, ans in enumerate(a_s):
            as_array[i, 4, :] = ans['action']
            as_array[i, :4, :] = ans['state'][:, :3]

        self.as_array_0 = np.zeros((len(a_s), 5, 3))
        for i, asn in enumerate(a_s):
            self.as_array_0[i, 4, :] = asn['action']
            self.as_array_0[
                i, :4, :] = asn['state'][:, :3] - asn['state'][0, :3]
            self.pub_state(self.as_array_0[i, :4, :])
            #print self.as_array_0[i,:4,:].shape
            #rospy.sleep(0.02)

        self.states = self.as_array_0[:, :4, :]
        self.actions = self.as_array_0[:, 4, :]
        self.current_pose = self.states[0, :, :]
예제 #4
0
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.ddpg = DDPG(a_dim=A_DIM, s_dim=S_DIM, batch_size=64, memory_capacity=100000, gamma=GAMMA, lr_a=0.001) #gamma=0.98
        self.ep_reward = 0.0
        self.current_ep = 0
        self.current_step = 0
        self.current_action = np.array([.0, .0, .0])
        self.done = True # if the episode is done
        self.var = VAR_INIT
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized DDPG")

        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)
        """
        self.sub = rospy.Subscriber('robot_pose', PA, self.callback, queue_size=1)
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.state = PA()
        self.updated = False # if s is updated
        self.got_callback1 = False
        self.got_callback2 = False
        print("Initialized communication")
        """

        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.scaler = 1/0.3
        self.sample_target()
        print("Read target data")

        #self.ddpg.restore_momery()
        #self.ddpg.restore_model()

        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    #rospy.sleep(0.5)
                    p = self.sim.current_pose
                    s = np.vstack((p, self.target))
                    s = s[3:,:]
                    s = self.normalize_state(s)
                    norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                    noise_a = np.random.normal(norm_a, self.var)
                    action = np.clip(noise_a, -1.0, 1.0)
                    self.current_action = action*A_BOUND + A_BOUND
                    try:
                        p_ = self.sim.update_pose(self.current_action)
                    except:
                        print self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                        print s.reshape(6)[-S_DIM:]
                    s_ = np.vstack((p_, self.target))

                    s_ = s_[3:,:]
                    s_ = self.normalize_state(s_)
                    self.compute_reward(s[0,:], s_[1,:])

                    self.current_step += 1

                    if self.reward > GOT_GOAL:
                        #self.reward += 1.0
                        self.ep_reward += self.reward
                        if self.current_ep% 10 ==0:
                            self.reward_record.append(self.ep_reward / self.current_step)
                            self.ep_record.append(self.current_ep)
                            plt.plot(self.ep_record, self.reward_record)
                            plt.ylim([-1.5,0.0])
                            self.fig.canvas.draw()
                            self.fig.savefig('learning.png')
                            print('\n')
                            print "Target Reached"
                            print("Normalized action:")
                            print norm_a
                            print("Noise action:")
                            print action
                            print("Output action:")
                            print self.current_action
                            print("Reward: %f" % self.reward)
                            print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,)
                            print('*' * 40)
                            self.ddpg.save_model()
                            self.ddpg.save_memory()
                        self.done = True
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        self.ep_reward = 0
                        #self.ddpg.save_memory()
                        #self.ddpg.save_model()
                        """
                        self.current_action = np.array([.0, .0, .0])
                        self.action_V3.x, self.action_V3.y, self.action_V3.z \
                            = self.current_action[0], self.current_action[1], self.current_action[2]
                        self.run_action(self.action_V3)
                        """

                    else:
                        self.ep_reward += self.reward
                        if self.current_step == MAX_EP_STEPS:
                            if self.current_ep % 10 ==0:
                                self.reward_record.append(self.ep_reward / self.current_step)
                                self.ep_record.append(self.current_ep)
                                plt.plot(self.ep_record, self.reward_record)
                                plt.ylim([-1.5, 0.0])
                                self.fig.canvas.draw()
                                self.fig.savefig('learning.png')
                                print('\n')
                                print "Target Failed"
                                print("Normalized action:")
                                print norm_a
                                print("Noise action:")
                                print action
                                print("Output action:")
                                print self.current_action
                                print("Reward: %f" % self.reward)
                                print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,)
                                print('*' * 40)
                                self.ddpg.save_model()
                                self.ddpg.save_memory()
                            self.done = False
                            self.current_step = 0
                            self.current_ep += 1
                            self.sample_target()
                            self.ep_reward = 0
                            #self.ddpg.save_memory()
                            #self.ddpg.save_model()
                            """
                            self.current_action = np.array([.0, .0, .0])
                            self.action_V3.x, self.action_V3.y, self.action_V3.z \
                                = self.current_action[0], self.current_action[1], self.current_action[2]
                            self.run_action(self.action_V3)
                            """
                    self.ddpg.store_transition(s.reshape(6)[-S_DIM:], action, self.reward, s_.reshape(6)[-S_DIM:])
                    if self.ddpg.pointer > TRAIN_POINT:
                        #if (self.current_step % 10 == 0):
                        #self.var *= VAR_DECAY
                        self.var = max(0.0,self.var-1.02/(MAX_EP_STEPS*MAX_EPISODES))
                        self.ddpg.learn()
                    self.pub_state(s_)

            else:
                p = self.sim.current_pose
                s = np.vstack((p, self.target))
                s = s[3:,:]
                s = self.normalize_state(s)
                norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                self.current_action = norm_a * A_BOUND + A_BOUND
                print("Normalized action:")
                print norm_a
                print("Current action:")
                print self.current_action

                p_ = self.sim.update_pose(self.current_action)
                s_ = np.vstack((p_, self.target))
                s_ = s_[3:,:]
                print("Distance: %f" % np.linalg.norm(s_[0,:]-s_[1,:]))
                s_ = self.normalize_state(s_)

                self.compute_reward(s_[0, :], s_[1, :])
                print('Explore: %.2f' % self.var,)
                print("Reward: %f" % self.reward)
                rospy.sleep(1)
                #print
                print self.ddpg.get_value(s.reshape(6)[-S_DIM:],norm_a,self.reward.reshape((-1,1)),s_.reshape(6)[-S_DIM:])
                print '\n'
                self.pub_state(s_)

                if self.reward > GOT_GOAL:
                    self.done = True
                    self.current_step = 0
                    self.current_ep += 1
                    self.sample_target()
                    print "Target Reached"
                    print("Episode %i Ended" % self.current_ep)
                    print("Reward: %f" % self.reward)
                    print('Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,)
                    print('*' * 40)
                    self.ep_reward = 0
                    # self.ddpg.save_memory()
                    # self.ddpg.save_model()
                    """
                    self.current_action = np.array([.0, .0, .0])
                    self.action_V3.x, self.action_V3.y, self.action_V3.z \
                        = self.current_action[0], self.current_action[1], self.current_action[2]
                    self.run_action(self.action_V3)
                    """

                else:
                    self.done = False
                    self.current_step += 1
                    if self.current_step == 2:
                        print "Target Failed"
                        print("Reward: %f" % self.reward)
                        print("Episode %i Ends" % self.current_ep)
                        print(
                            'Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,)
                        print('*' * 40)
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        self.ep_reward = 0
                        # self.ddpg.save_memory()
                        # self.ddpg.save_model()
                        """
예제 #5
0
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.pfn = PFN(a_dim=A_DIM,
                       s_dim=S_DIM,
                       batch_size=8,
                       memory_capacity=MEMORY_NUM,
                       lr=0.001,
                       bound=A_BOUND)
        self.ep_reward = 0.0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized PFN")
        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)

        self.sub = rospy.Subscriber('Robot_1/pose',
                                    PS,
                                    self.callback,
                                    queue_size=1)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.updated = False  # if s is updated
        self.got_target = False
        print("Initialized communication")
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.sample_target()
        print("Read target data")

        #self.pfn.restore_momery()
        self.pfn.restore_model('model_pfn')

        memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100
        self.current_ep = 0
        self.current_step = 0
        while not (rospy.is_shutdown()):
            self.updated = False
            while (not self.updated) & (not rospy.is_shutdown()):
                rospy.sleep(0.1)
            real_target = self.real_target.copy()
            s = self.normalize_state(real_target)
            action, act_var = self.pfn.choose_action2(s)
            self.action_V3.x, self.action_V3.y, self.action_V3.z \
                = action[0], action[1], action[2]
            self.run_action(self.action_V3)
            print '\n'
            #rospy.sleep(1.0)
            '''
예제 #6
0
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.pfn = PFN(a_dim=A_DIM,
                       s_dim=S_DIM,
                       batch_size=8,
                       memory_capacity=MEMORY_NUM,
                       lr=0.001,
                       bound=A_BOUND)
        self.ep_reward = 0.0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized PFN")
        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.sample_target()
        print("Read target data")

        self.pfn.restore_momery()
        self.pfn.restore_model()

        memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100
        self.current_ep = 0
        self.current_step = 0
        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    #rospy.sleep(0.5)
                    s = self.normalize_state(self.target)
                    #print 'x'
                    #print s
                    action, prob = self.pfn.choose_action(s)
                    s_ = self.sim.update_pose(action)[-1, :]
                    self.compute_reward(self.target, s_)

                    #print action
                    #print self.pfn.raw_action(s)
                    #print self.target
                    #print s_
                    #print np.linalg.norm(self.target - s_)
                    #print self.reward
                    transition = np.hstack(
                        (self.target, action, self.reward, prob))
                    memory_ep[self.current_step] = transition
                    self.current_step += 1

                    state = np.array([s_, self.target])
                    self.pub_state(state)

                    if self.pfn.pointer > TRAIN_POINT:
                        self.pfn.learn()

                else:

                    best_i = np.argsort(memory_ep[:, -2])[-1]
                    best_action = memory_ep[best_i, 3:6]
                    best_s_ = self.sim.update_pose(best_action)[-1, :]
                    best_distance = np.linalg.norm(self.target - best_s_)
                    """ #best action
                    print '*'
                    j = np.argsort(memory_ep[:,-2])[0]
                    print memory_ep[best_i, :]
                    print memory_ep[j, :]
                    print best_s_
                    print self.target
                    print best_action
                    print best_distance
                    """

                    self.current_step = 0
                    mean_action, act_var = self.pfn.choose_action2(s)
                    mean_s_ = self.sim.update_pose(mean_action)[-1, :]
                    mean_distance = np.linalg.norm(mean_s_ - self.target)

                    target_action, w = self.compute_weighted_action(memory_ep)
                    s_ = self.sim.update_pose(target_action)[-1, :]
                    target_distance = np.linalg.norm(s_ - self.target)
                    self.compute_reward(self.target, s_)

                    self.pfn.store_transition(s, target_action, mean_distance,
                                              np.var(w))
                    self.pfn.store_transition(s, best_action, best_distance,
                                              w[best_i])
                    self.current_ep += 1
                    self.sample_target()
                    memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100

                    if self.current_ep % 10 == 0:
                        self.reward_record.append(mean_distance)
                        self.ep_record.append(self.current_ep)
                        plt.plot(self.ep_record, self.reward_record)
                        plt.ylim([0.0, 0.1])
                        self.fig.canvas.draw()
                        self.fig.savefig('learning.png')
                        print('\n')
                        #print s
                        print('Episode:', self.current_ep)
                        print("Mean Action:")
                        print mean_action
                        print("Mean Distance:")
                        print mean_distance
                        print("Action Variance:")
                        print act_var
                        print("Target Action:")
                        print target_action
                        print("Target Distance:")
                        print target_distance
                        print("Weights Variance:")
                        print np.var(w)
                        print('*' * 40)
                        self.pfn.save_model()
                        self.pfn.save_memory()

            else:
                s = self.normalize_state(self.target)
                action, act_var = self.pfn.choose_action2(s)
                s_ = self.sim.update_pose(action)[-1, :]
                self.compute_reward(self.target, s_)

                print("Mean Action:")
                print action
                print("Action Variance:")
                print act_var
                print("Distance: %f" % np.linalg.norm(self.target - s_))
                print("Reward: %f" % self.reward)
                print '\n'
                state = np.array([s_, self.target])
                self.pub_state(state)
                rospy.sleep(1)
                self.sample_target()
예제 #7
0
    def __init__(self):
        """ Initializing DDPG """
        self.ddpg = DDPG(a_dim=A_DIM,
                         s_dim=S_DIM,
                         batch_size=10,
                         memory_capacity=500)
        self.ep_reward = 0.0
        self.current_ep = 0
        self.current_step = 0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.var = 5.0
        print("Initialized DDPG")
        """ Setting communication"""
        self.sub = rospy.Subscriber('robot_pose',
                                    PA,
                                    self.callback,
                                    queue_size=1)
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.state = PA()
        self.updated = False  # if s is updated
        self.got_callback1 = False
        self.got_callback2 = False
        print("Initialized communication")
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/ends.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.scaler = 1 / 0.3
        self.sample_target()
        print("Read target data")

        #self.ddpg.restore_momery()
        #self.ddpg.restore_model()

        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    while (not self.updated) & (not rospy.is_shutdown()):
                        rospy.sleep(0.1)
                    s = self.s.copy()

                    delta_a = self.ddpg.choose_action(s) * A_BOUND
                    print delta_a / A_BOUND
                    print("Delta action:")
                    print delta_a
                    delta_a = np.random.normal(delta_a, self.var)
                    print("Noise delta action: ")
                    print delta_a
                    self.current_action += delta_a
                    self.current_action = np.clip(self.current_action, 0, 25)
                    self.action_V3.x, self.action_V3.y, self.action_V3.z \
                        = self.current_action[0], self.current_action[1], self.current_action[2]
                    self.run_action(self.action_V3)
                    rospy.sleep(2.5)
                    print "Current action:"
                    print self.current_action

                    self.updated = False
                    while (not self.updated) & (not rospy.is_shutdown()):
                        rospy.sleep(0.1)
                    s_ = self.s.copy()
                    self.compute_reward(self.end, self.n_t)

                    action = delta_a / A_BOUND
                    print action
                    self.ddpg.store_transition(s, action, self.reward, s_)
                    # print("Experience stored")

                    if self.ddpg.pointer > TRAIN_POINT:
                        if (self.current_step % 2 == 0):
                            self.var *= 0.992
                            self.ddpg.learn()

                    self.current_step += 1
                    self.ep_reward += self.reward

                    if self.reward > GOT_GOAL:
                        self.done = True
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        print "Target Reached"
                        print("Episode %i Ended" % self.current_ep)
                        print(
                            'Episode:',
                            self.current_ep,
                            ' Reward: %d' % self.ep_reward,
                            'Explore: %.2f' % self.var,
                        )
                        print('*' * 40)
                        self.ep_reward = 0
                        self.ddpg.save_memory()
                        self.ddpg.save_model()
                        """
                        self.current_action = np.array([.0, .0, .0])
                        self.action_V3.x, self.action_V3.y, self.action_V3.z \
                            = self.current_action[0], self.current_action[1], self.current_action[2]
                        self.run_action(self.action_V3)
                        """

                    else:
                        self.done = False
                        if self.current_step == MAX_EP_STEPS:
                            print "Target Failed"
                            print("Episode %i Ends" % self.current_ep)
                            print(
                                'Episode:',
                                self.current_ep,
                                ' Reward: %d' % self.ep_reward,
                                'Explore: %.2f' % self.var,
                            )
                            print('*' * 40)
                            self.current_step = 0
                            self.current_ep += 1
                            self.sample_target()
                            self.ep_reward = 0
                            self.ddpg.save_memory()
                            self.ddpg.save_model()
                            """
                            self.current_action = np.array([.0, .0, .0])
                            self.action_V3.x, self.action_V3.y, self.action_V3.z \
                                = self.current_action[0], self.current_action[1], self.current_action[2]
                            self.run_action(self.action_V3)
                            """
                    self.updated = False
                    print('\n')