예제 #1
0
        episode_rew += reward

        # Add state transition pair to demo buffer
        FDM_buff_s.append(prev_state)
        FDM_buff_a.append(action)
        FDM_buff_ns.append(obs)
        verbose = False
        if verbose == True:
            print("Action                           ", action)
            print("Curr state                       ", prev_state)
            print("True next state                  ", obs)
            #print("AE pred Nstate                   ",pred_ns[0],pred_ns[0])
            print("FDM both next state               {}\t{}".format(
                FDM_ns_both[0], FDM_ns_both[1]))
            print("True both next state              {}\t{}".format(
                Kinematic_Model(prev_state, 0), Kinematic_Model(prev_state,
                                                                2)))
            print("Diff                              {}\t{}".format(
                np.abs(Kinematic_Model(prev_state, 0) - FDM_ns_both[0]),
                np.abs(Kinematic_Model(prev_state, 2) - FDM_ns_both[1])))
            #print("state diff                       ",state_diff[0],state_diff[1])
            #print("cost                             ",cost)
            #print("partial cost                     ",p_cost)
            print("action from IDM  full cost       ", action_from_IDM)
            #print("action from IDM  p cost          ",action_from_pIDM)

            #if action == 0:
            #    print("FDM left")        #0 Push cart to the left
            #else:
            #    print("FDM right")        #1 Push cart to the right
            print(
예제 #2
0
            print("Oracle")
            oracle_action = oracle.test_action(np.reshape(obs, [1, state_dim]))
            h_fb = oracle_action + 3
        
        
        if (feedback_dict.get(h_fb) != 0):  # if feedback is not zero i.e. is valid
            # Update policy
            #oracle_action = oracle.test_action(np.reshape(obs, [1, state_dim]))
            #h_fb = oracle_action + 3
            #print("Feedback", h_fb)

            # Get new state transition label using feedback
            state_corrected = copy.deepcopy(obs)
            if (h_fb == H_LEFT): # PUSH CART TO LEFT
                print("Move left")
                state_corrected = Kinematic_Model(state,0)
                #state_corrected[0] -= 0.01 # correction in pos
                #state_corrected[1] -= 0.2 # correction in vel
                #state_corrected[2] += 0.01 # correction in angle
                #state_corrected[3] += 0.27 # correction in anglar vel
            elif (h_fb == H_RIGHT):# PUSH CART TO RIGHT
                print("Move right")
                state_corrected = Kinematic_Model(state,1)
                #state_corrected[0] += 0.01 # correction in pos
                #state_corrected[1] += 0.2 # correction in vel
                #state_corrected[2] -= 0.01 # correction in angle
                #state_corrected[3] -= 0.27 # correction in anglar vel
            
            # Add state transition pair to demo buffer
            AE_buff_s.append(obs)
            AE_buff_ns.append(state_corrected)
예제 #3
0
        prev_state = obs
        obs, reward, terminal, _ = env.step(action)
        episode_rew += reward

        # Add state transition pair to demo buffer
        FDM_buff_s.append(prev_state)
        FDM_buff_a.append(action)
        FDM_buff_ns.append(obs)
        verbose = False
        if verbose ==True:
            print("Action                           ",action)
            print("Curr state                       ",prev_state)
            print("True next state                  ",obs)
            #print("AE pred Nstate                   ",pred_ns[0],pred_ns[0])
            print("FDM both next state               {}\t{}".format(FDM_ns_both[0],FDM_ns_both[1]))
            print("True both next state              {}\t{}".format(Kinematic_Model(prev_state,0),Kinematic_Model(prev_state,2)))
            print("Diff                              {}\t{}".format(np.abs(Kinematic_Model(prev_state,0)-FDM_ns_both[0]),np.abs(Kinematic_Model(prev_state,2)-FDM_ns_both[1])))
            #print("state diff                       ",state_diff[0],state_diff[1])
            #print("cost                             ",cost)
            #print("partial cost                     ",p_cost)
            print("action from IDM  full cost       ",action_from_IDM)
            #print("action from IDM  p cost          ",action_from_pIDM)

            #if action == 0:
            #    print("FDM left")        #0 Push cart to the left
            #else:
            #    print("FDM right")        #1 Push cart to the right
            print("_____________________________________________________________________")
            
        steps += 1
        t_counter+=1
예제 #4
0
        prev_state = obs
        obs, reward, terminal, _ = env.step(action)
        episode_rew += reward

        # Add state transition pair to demo buffer
        FDM_buff_s.append(prev_state)
        FDM_buff_a.append(action)
        FDM_buff_ns.append(obs)
        verbose = True
        if verbose == True:
            print("Curr state                       ", prev_state)
            print("True next state                  ", obs)
            print("AE pred Nstate                   ", pred_ns)
            print("FDM both next state              ", FDM_ns_both)
            FDM_ns_l = Kinematic_Model(obs, 0)
            FDM_ns_r = Kinematic_Model(obs, 1)
            FDM_ns_both = np.array([FDM_ns_l, FDM_ns_r])
            print("True both next state              ", FDM_ns_both)
            print("cost                             ", cost)
            print("partial cost                     ", p_cost)
            print("action from IDM  full cost       ", action_from_IDM)

        if action == 0:
            print("FDM left")  #0 Push cart to the left
        else:
            print("FDM right")  #1 Push cart to the right
        print(
            "_____________________________________________________________________"
        )
예제 #5
0
        time.sleep(0.1)
        # Get feedback signal
        #h_fb = human_feedback.get_h()
        oracle_action = oracle.test_action(np.reshape(obs, [1, state_dim]))
        h_fb = oracle_action + 3

        if (feedback_dict.get(h_fb) !=
                0):  # if feedback is not zero i.e. is valid
            # Update policy
            print("Feedback", h_fb)

            # Get new state transition label using feedback
            state_corrected = copy.deepcopy(obs)
            if (h_fb == H_LEFT):  # PUSH CART TO LEFT
                print("Move left")
                state_corrected = Kinematic_Model(state, oracle_action)
                #state_corrected[0] -= 0.01 # correction in pos
                #state_corrected[1] -= 0.2 # correction in vel
                #state_corrected[2] += 0.01 # correction in angle
                #state_corrected[3] += 0.27 # correction in anglar vel
            elif (h_fb == H_RIGHT):  # PUSH CART TO RIGHT
                print("Move right")
                state_corrected = Kinematic_Model(state, oracle_action)
                #state_corrected[0] += 0.01 # correction in pos
                #state_corrected[1] += 0.2 # correction in vel
                #state_corrected[2] -= 0.01 # correction in angle
                #state_corrected[3] -= 0.27 # correction in anglar vel

            # Add state transition pair to demo buffer
            AE_buff_s.append(obs)
            AE_buff_ns.append(state_corrected)
예제 #6
0
        prev_state = obs
        obs, reward, terminal, _ = env.step(action)
        episode_rew += reward

        # Add state transition pair to demo buffer
        FDM_buff_s.append(prev_state)
        FDM_buff_a.append(action)
        FDM_buff_ns.append(obs)
        verbose = True
        if verbose == True:
            print("Curr state                       ", prev_state)
            print("True next state                  ", obs)
            print("AE pred Nstate                   ", pred_ns[0], pred_ns[0])
            print("FDM both next state              ", FDM_ns_both[0],
                  FDM_ns_both[1])
            FDM_ns_l = Kinematic_Model(prev_state, 0)
            FDM_ns_r = Kinematic_Model(prev_state, 2)
            FDM_ns_both = np.array([FDM_ns_l, FDM_ns_r])
            print("True both next state             ", FDM_ns_both[0],
                  FDM_ns_both[1])
            print("state diff                       ", state_diff[0],
                  state_diff[1])
            print("cost                             ", cost)
            print("partial cost                     ", p_cost)
            print("action from IDM  full cost       ", action_from_IDM)
            print("action from IDM  p cost          ", action_from_pIDM)

        if action == 0:
            print("FDM left")  #0 Push cart to the left
        else:
            print("FDM right")  #1 Push cart to the right