Ejemplo n.º 1
0
 def get_start_state(self):
     angle = np.random.randint(-3, 4)
     angle = angle * np.pi / 180
     req = RobotActionRequest()
     req.reset_robot = True
     req.reset_pole_angle = angle
     resp = self.robot_state(req)
     return resp.robot_state
Ejemplo n.º 2
0
 def get_state_reward(self, action_idx):
     action = self.action_list[action_idx]
     req = RobotActionRequest()
     req.reset_robot = False
     req.action = [action]  # type is list
     resp = self.robot_state(req)
     state_new = resp.robot_state
     reward = 1
     if (abs(state_new[0]) > self.x_max) | (abs(state_new[1]) >
                                            self.angle_max):
         state_new = [np.nan, np.nan, np.nan, np.nan]
         reward = 0
     return [state_new, reward]
Ejemplo n.º 3
0
    def test(self, test_interval_min, test_interval_max):
        steps_score=np.zeros(self.num_random_tests)
        for k in range(0,self.num_random_tests):
            #Pick an initial state to start from 
            req = RobotActionRequest()
            req.reset_robot = True
            req.reset_pole_angle = np.random.uniform(np.deg2rad(test_interval_min), np.deg2rad(test_interval_max))*self.get_random_sign()
            #print "Initial pole angle is", np.rad2deg(req.reset_pole_angle), "degrees"
            response = self.cartpole_action_service(req)
            for n in range(0, self.steps_max):
                pol = RobotPolicyRequest()
                pol.robot_state = response.robot_state
		#print(pol)
                dqn_response = self.cartpole_policy_service(pol)
                req = RobotActionRequest()
                req.action = dqn_response.action
                req.reset_robot=False
                response = self.cartpole_action_service(req)
                if self.state_out_of_bounds(response.robot_state):
                    #print "Cartpole was balanced for",n,"number of steps"
                    steps_score[k]=n
                    time.sleep(1)
                    break
                if n==self.steps_max-1:
                    #print "Episode ended successfully!"
                    steps_score[k]=self.steps_max
                    time.sleep(1)
                time.sleep(0.01)
        return steps_score
Ejemplo n.º 4
0
 def learn(self):
     episode_reward = 0
     total_episodes = 512
     for episode in range(1, total_episodes + 1):
         episode_reward = 0
         s_1 = np.zeros((4, 1))
         s_1[1, 0] = np.random.uniform(-3, 3)
         response = self.reset_robot(s_1)
         print(response.robot_state)
         epsilon = max(1 - float(episode) / total_episodes, 0.01)
         for t in range(1, 1024):
             s_t = response.robot_state
             req = RobotActionRequest()
             req.reset_robot = False
             dice = np.random.uniform(0, 1)
             if dice < epsilon:
                 a_t = np.random.choice([10, -10])
             else:
                 index = self.act_network(torch.tensor(s_t)).max(0)[1]
                 a_t = self.action_space[index]
             req.action = [a_t]
             response = self.cartpole_action_service(req)
             s_t_1 = response.robot_state
             x = s_t_1[0]
             theta = s_t_1[1]
             if abs(x) > 1.2:
                 break
             elif abs(theta) > 3:
                 r_t_1 = 0
                 s_t_1 = None
                 transition = (s_t, a_t, r_t_1, s_t_1)
                 self.replaymemory.push(transition)
                 break
             else:
                 r_t_1 = 1
                 episode_reward += r_t_1
                 transition = (s_t, a_t, r_t_1, s_t_1)
                 self.replaymemory.push(transition)
             if t % 2 == 0:
                 self.train()
             # print('state:',response.robot_state,'action:',req.action)
         print('episode =', episode, 'episode reward =', episode_reward)
         print('loss =', self.loss)
         # report episode reward
         if episode > 5 and episode % 2 == 0:
             # print('=========update target network========')
             self.target_network.load_state_dict(
                 self.act_network.state_dict())
Ejemplo n.º 5
0
    def data_acquisition(self,num_of_data_set,rep_data):
	print 'Training Started'
	features=[]
        labels=[]
        for i in range(num_of_data_set):
            action = np.random.rand(1, 3)
            action[0, 0] = (2 * action[0, 0] - 1.0) * 1.0
            action[0, 1] = (2 * action[0, 1] - 1.0) * 0.5
            action[0, 2] = (2 * action[0, 2] - 1.0) * 0.25
	    print 'Training for dataset', (i+1)
            real_robot_action = rospy.ServiceProxy('real_robot', RobotAction)
       	    req = RobotActionRequest()
            req.reset = True
            resp =real_robot_action(req)
            for j in range(rep_data):
                #print 'Rep set',(j+1)
                f=np.append(np.array(resp.robot_state),action)
                features.append(f)
                req = RobotActionRequest()
                req.reset = False
                req.action = action.reshape((3))
                resp =  real_robot_action(req)
                labels.append(resp.robot_state)
        features=np.asarray(features)
        labels=np.asarray(labels)
        print features
        print "Acquisition Completed"
        return features,labels
Ejemplo n.º 6
0
def get_data(n):
    # get data set
    real_robot_action = rospy.ServiceProxy('real_robot', RobotAction)
    feature = []
    label = []
    #print(resp_real)
    #print('------')
    for k in range(n):
        req = RobotActionRequest()
        req.reset = True
        resp_real = real_robot_action(req)
        action = numpy.random.rand(1, 3)
        action[0, 0] = (2 * action[0, 0] - 1.0) * 1.0
        action[0, 1] = (2 * action[0, 1] - 1.0) * 0.5
        action[0, 2] = (2 * action[0, 2] - 1.0) * 0.25
        robsta = resp_real.robot_state
        robsta = numpy.array(robsta)
        sfeature = [
            robsta[0], robsta[1], robsta[2], robsta[3], robsta[4], robsta[5],
            action[0, 0], action[0, 1], action[0, 2]
        ]
        for i in range(200):
            #print(sfeature)
            req = RobotActionRequest()
            req.reset = False
            req.action = action.reshape((3))
            resp_real = real_robot_action(req)
            #print(resp_real.robot_state)
            #print(action)
        feature.append(sfeature)
        print(resp_real)
        label.append(numpy.array(resp_real.robot_state))
    #print(label)
    #rospy.spin()
    return feature, label
Ejemplo n.º 7
0
 def get_new_state(self, action, k):
     req = RobotActionRequest()
     req.reset = True
     resp = self.robot_new_state(req)
     for i in range(self.num_steps):
         req = RobotActionRequest()
         req.reset = False
         req.action = action.reshape((3))
         resp = self.robot_new_state(req)
         self.train_data_matrix[k, i, 6:9] = action.reshape((3))
         self.train_data_matrix[k, i, 9:15] = resp.robot_state
         time.sleep(0.01)
     self.train_data_matrix[k, 1:self.num_steps,
                            0:6] = self.train_data_matrix[k,
                                                          0:self.num_steps -
                                                          1, 9:15]
Ejemplo n.º 8
0
    def train(self):

        for i in range(0, self.num_random_trains):
            req = RobotActionRequest()
            req.reset_robot = True
            episode_reward = 0
            req.reset_pole_angle = np.random.uniform(
                0.0, np.deg2rad(3.0)) * self.get_random_sign()
            response = self.cartpole_action_service(req)
            state = list(response.robot_state)
            global eps
            eps = max(eps * 0.9,
                      0.2)  #max(1- episode_num/self.num_random_trains,0.01)

            for k in range(self.num_steps):  #right
                req = RobotActionRequest()
                req.reset_robot = False
                action = self.selection(state)
                req.action = action

                response = self.cartpole_action_service(req)
                state_next = list(response.robot_state)

                if abs(state_next[0]) > 1.2 or abs(
                        state_next[1]) > (6 * np.pi / 180):
                    reward = [0]
                    state_next = [0, 0, 0, 0]
                    self.memory.push([state, action, reward, state_next])
                    break
                else:
                    episode_reward += 1
                    reward = [1]
                    self.memory.push([state, action, reward, state_next])

                self.optimize_model()
                state = state_next

            print(episode_reward)
            if i % self.episodes_step == 0:
                self.target_net.load_state_dict(self.action_net.state_dict())

        return self.target_net
Ejemplo n.º 9
0
    def single_test(self, action):
        req = RobotActionRequest()
        req.reset = True
        # send request to reset
        resp = self.get_action(req)

        for i in range(self.num_steps):

            req = RobotActionRequest()
            req.reset = False
            req.action = action

            # get training data
            self.features.append((np.append(resp.robot_state,
                                            req.action)).tolist())
            # send request to move real_robot
            resp = self.get_action(req)
            self.labels.append(resp.robot_state)
    def test(self, test_interval_min, test_interval_max):
	#same as executive.py
	
	for episode in range(300):
            #Pick an initial state to start from 
            req = RobotActionRequest()
            req.reset_robot = True
            req.reset_pole_angle = np.random.uniform(np.deg2rad(test_interval_min), np.deg2rad(test_interval_max))*self.get_random_sign()
            #print "Initial pole angle is", np.rad2deg(req.reset_pole_angle), "degrees"
            s_t = self.cartpole_action_service(req)
	    final_reward=0
            for n in range(0, self.steps_max):
		a_t=self.DQN.execute(s_t.robot_state)
		
                
                req = RobotActionRequest()
                req.action = [a_t]
                req.reset_robot=False

                s_t1 = self.cartpole_action_service(req)
                r_t1=1
   		
                if self.state_out_of_bounds(s_t1.robot_state):
                    r_t1=-1
                self.DQN.store(s_t.robot_state, a_t, r_t1, s_t1.robot_state)
		
		if self.state_out_of_bounds(s_t1.robot_state):
		    break
		final_reward+=1
		s_t=s_t1
		#store training data then learn from it
              	self.DQN.learn()

	    self.score.append(final_reward)
	    if np.median(self.score[-21:])>195:
		#if score is good, break
                break
 def single_test(self, action):
     req = RobotActionRequest()
     req.reset = True
     resp = self.real_robot_action(req)
     resp = self.fake_robot_action(req)
     for i in range(self.num_steps):
         req = RobotActionRequest()
         req.reset = False
         req.action = action.reshape((3))
         resp_real = self.real_robot_action(req)
         message = RobotState()
         message.robot_name = str('real_robot')
         message.robot_state = resp_real.robot_state
         self.pub.publish(message)
         resp_fake = self.fake_robot_action(req)
         message = RobotState()
         message.robot_name = str('fake_robot')
         message.robot_state = resp_fake.robot_state
         self.pub.publish(message)
         #time.sleep(0.01)
     return self.score_state(resp_real.robot_state, resp_fake.robot_state)
Ejemplo n.º 12
0
 def perturb(self, action):
     req_real = RobotActionRequest()
     req_real.reset = True
     # send request to reset real_robot config
     resp_real = self.real_robot_action(req_real)
     collect_interval = 1  # how many steps we skip
     # apply a constant action
     for j in range(self.perturb_steps):
         # create a new request
         req_real = RobotActionRequest()
         req_real.reset = False
         req_real.action = action
         is_collected = j % collect_interval == 0
         if is_collected:
             # collect feature
             self.features.append(
                 np.append(resp_real.robot_state, req_real.action).tolist())
         # send request to move real_robot
         resp_real = self.real_robot_action(req_real)
         if is_collected:
             # collect label
             self.labels.append(resp_real.robot_state)
Ejemplo n.º 13
0
    def training_data(self):  #get the data to train fake robot
        n = 0
        for k in range(0, self.num_random_tests):
            action = np.random.rand(1, 3)
            action[0, 0] = (2 * action[0, 0] - 1.0) * 1.0
            action[0, 1] = (2 * action[0, 1] - 1.0) * 0.5
            action[0, 2] = (2 * action[0, 2] - 1.0) * 0.25
            req_real = RobotActionRequest()
            req_real.reset = True
            resp = self.real_robot_action(req_real)
            self.data_fea[k * self.num_steps, 0:6] = np.array(resp.robot_state)

            print(k)
            for i in range(self.num_steps):
                req_real = RobotActionRequest()
                req_real.reset = False
                req_real.action = action.reshape((3))
                resp_real = self.real_robot_action(req_real)
                self.data_fea[n, 6:9] = req_real.action
                self.label[n] = resp_real.robot_state
                n += 1
                self.data_fea[n, 0:6] = resp_real.robot_state
        print('trainning finished')
        self.data_fea = np.delete(self.data_fea, -1, axis=0)
Ejemplo n.º 14
0
 def __init__(self):
     self.cartpole_action_service = rospy.ServiceProxy('cartpole_robot', RobotAction)
     self.req = RobotActionRequest()
Ejemplo n.º 15
0
 def reset_robot(self, state):
     req = RobotActionRequest()
     req.reset_robot = True
     req.reset_pole_angle = state[1, 0]
     response = self.cartpole_action_service(req)
     return response
Ejemplo n.º 16
0
    def train(self):
        print('start training')
        start = time.time()
        done = False
        n_episodes = 800
        scores = deque(maxlen=20)
        batch_size = 64

        for e in range(n_episodes):
            # initial state
            # self.memory.clear()
            reward = 0

            # reset the robot for each episode
            req = RobotActionRequest()
            req.reset_robot = True
            # np.random.seed(0)
            req.reset_pole_angle = np.random.uniform(
                np.deg2rad(0.0), np.deg2rad(3.0)) * self.get_random_sign()
            # req.reset_pole_angle = np.deg2rad(2.0) * self.get_random_sign()
            state = self.probe_service(req).robot_state
            # print(state)

            i = 0
            done = False
            for j in range(1000):
                action, act_inx = self.act(state)
                # print('action', action, act_inx)
                req = RobotActionRequest()
                req.reset_robot = False
                req.action = [action]
                next_state = self.probe_service(req).robot_state

                done = self.state_out_of_bounds(next_state)
                reward = 10 if not done else -10
                # print(state, action, act_inx, reward, next_state, done)
                self.remember(state, action, act_inx, reward, next_state, done)

                # print(state, action, reward, next_state, done)
                state = next_state
                i = i + 1

                # scores.append(i)
                # mean = np.mean(scores)
                if done:
                    episode_durations.append(i)
                    #self.plot_durations()
                    #print(('episode: {}/{}, score:{}, e:{:.2}').format(e, n_episodes, i, self.epsilon))
                    break

            if e > 100:
                if np.mean(episode_durations[-30:]) > 250 or (time.time() -
                                                              start) >= 290:
                    self.plot_durations()
                    # torch.save(self.model.state_dict(), './x.pth')
                    return

            self.replay(batch_size)

            if e % 10 == 0:
                self.target_net.load_state_dict(self.model.state_dict())
        self.plot_durations()