episode_rew += reward # Add state transition pair to demo buffer FDM_buff_s.append(prev_state) FDM_buff_a.append(action) FDM_buff_ns.append(obs) verbose = False if verbose == True: print("Action ", action) print("Curr state ", prev_state) print("True next state ", obs) #print("AE pred Nstate ",pred_ns[0],pred_ns[0]) print("FDM both next state {}\t{}".format( FDM_ns_both[0], FDM_ns_both[1])) print("True both next state {}\t{}".format( Kinematic_Model(prev_state, 0), Kinematic_Model(prev_state, 2))) print("Diff {}\t{}".format( np.abs(Kinematic_Model(prev_state, 0) - FDM_ns_both[0]), np.abs(Kinematic_Model(prev_state, 2) - FDM_ns_both[1]))) #print("state diff ",state_diff[0],state_diff[1]) #print("cost ",cost) #print("partial cost ",p_cost) print("action from IDM full cost ", action_from_IDM) #print("action from IDM p cost ",action_from_pIDM) #if action == 0: # print("FDM left") #0 Push cart to the left #else: # print("FDM right") #1 Push cart to the right print(
print("Oracle") oracle_action = oracle.test_action(np.reshape(obs, [1, state_dim])) h_fb = oracle_action + 3 if (feedback_dict.get(h_fb) != 0): # if feedback is not zero i.e. is valid # Update policy #oracle_action = oracle.test_action(np.reshape(obs, [1, state_dim])) #h_fb = oracle_action + 3 #print("Feedback", h_fb) # Get new state transition label using feedback state_corrected = copy.deepcopy(obs) if (h_fb == H_LEFT): # PUSH CART TO LEFT print("Move left") state_corrected = Kinematic_Model(state,0) #state_corrected[0] -= 0.01 # correction in pos #state_corrected[1] -= 0.2 # correction in vel #state_corrected[2] += 0.01 # correction in angle #state_corrected[3] += 0.27 # correction in anglar vel elif (h_fb == H_RIGHT):# PUSH CART TO RIGHT print("Move right") state_corrected = Kinematic_Model(state,1) #state_corrected[0] += 0.01 # correction in pos #state_corrected[1] += 0.2 # correction in vel #state_corrected[2] -= 0.01 # correction in angle #state_corrected[3] -= 0.27 # correction in anglar vel # Add state transition pair to demo buffer AE_buff_s.append(obs) AE_buff_ns.append(state_corrected)
prev_state = obs obs, reward, terminal, _ = env.step(action) episode_rew += reward # Add state transition pair to demo buffer FDM_buff_s.append(prev_state) FDM_buff_a.append(action) FDM_buff_ns.append(obs) verbose = False if verbose ==True: print("Action ",action) print("Curr state ",prev_state) print("True next state ",obs) #print("AE pred Nstate ",pred_ns[0],pred_ns[0]) print("FDM both next state {}\t{}".format(FDM_ns_both[0],FDM_ns_both[1])) print("True both next state {}\t{}".format(Kinematic_Model(prev_state,0),Kinematic_Model(prev_state,2))) print("Diff {}\t{}".format(np.abs(Kinematic_Model(prev_state,0)-FDM_ns_both[0]),np.abs(Kinematic_Model(prev_state,2)-FDM_ns_both[1]))) #print("state diff ",state_diff[0],state_diff[1]) #print("cost ",cost) #print("partial cost ",p_cost) print("action from IDM full cost ",action_from_IDM) #print("action from IDM p cost ",action_from_pIDM) #if action == 0: # print("FDM left") #0 Push cart to the left #else: # print("FDM right") #1 Push cart to the right print("_____________________________________________________________________") steps += 1 t_counter+=1
prev_state = obs obs, reward, terminal, _ = env.step(action) episode_rew += reward # Add state transition pair to demo buffer FDM_buff_s.append(prev_state) FDM_buff_a.append(action) FDM_buff_ns.append(obs) verbose = True if verbose == True: print("Curr state ", prev_state) print("True next state ", obs) print("AE pred Nstate ", pred_ns) print("FDM both next state ", FDM_ns_both) FDM_ns_l = Kinematic_Model(obs, 0) FDM_ns_r = Kinematic_Model(obs, 1) FDM_ns_both = np.array([FDM_ns_l, FDM_ns_r]) print("True both next state ", FDM_ns_both) print("cost ", cost) print("partial cost ", p_cost) print("action from IDM full cost ", action_from_IDM) if action == 0: print("FDM left") #0 Push cart to the left else: print("FDM right") #1 Push cart to the right print( "_____________________________________________________________________" )
time.sleep(0.1) # Get feedback signal #h_fb = human_feedback.get_h() oracle_action = oracle.test_action(np.reshape(obs, [1, state_dim])) h_fb = oracle_action + 3 if (feedback_dict.get(h_fb) != 0): # if feedback is not zero i.e. is valid # Update policy print("Feedback", h_fb) # Get new state transition label using feedback state_corrected = copy.deepcopy(obs) if (h_fb == H_LEFT): # PUSH CART TO LEFT print("Move left") state_corrected = Kinematic_Model(state, oracle_action) #state_corrected[0] -= 0.01 # correction in pos #state_corrected[1] -= 0.2 # correction in vel #state_corrected[2] += 0.01 # correction in angle #state_corrected[3] += 0.27 # correction in anglar vel elif (h_fb == H_RIGHT): # PUSH CART TO RIGHT print("Move right") state_corrected = Kinematic_Model(state, oracle_action) #state_corrected[0] += 0.01 # correction in pos #state_corrected[1] += 0.2 # correction in vel #state_corrected[2] -= 0.01 # correction in angle #state_corrected[3] -= 0.27 # correction in anglar vel # Add state transition pair to demo buffer AE_buff_s.append(obs) AE_buff_ns.append(state_corrected)
prev_state = obs obs, reward, terminal, _ = env.step(action) episode_rew += reward # Add state transition pair to demo buffer FDM_buff_s.append(prev_state) FDM_buff_a.append(action) FDM_buff_ns.append(obs) verbose = True if verbose == True: print("Curr state ", prev_state) print("True next state ", obs) print("AE pred Nstate ", pred_ns[0], pred_ns[0]) print("FDM both next state ", FDM_ns_both[0], FDM_ns_both[1]) FDM_ns_l = Kinematic_Model(prev_state, 0) FDM_ns_r = Kinematic_Model(prev_state, 2) FDM_ns_both = np.array([FDM_ns_l, FDM_ns_r]) print("True both next state ", FDM_ns_both[0], FDM_ns_both[1]) print("state diff ", state_diff[0], state_diff[1]) print("cost ", cost) print("partial cost ", p_cost) print("action from IDM full cost ", action_from_IDM) print("action from IDM p cost ", action_from_pIDM) if action == 0: print("FDM left") #0 Push cart to the left else: print("FDM right") #1 Push cart to the right