def get_active_exp(env, threshold, ae, xm, xs, render, take_max = False, max_act_steps = 20): err_avg = 0 for i in range(20): state = env.reset() state = robot_reset(env) error = ae.error((np.concatenate((state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1,-1)) - xm)/xs) err_avg+=error err_avg/=20 state = env.reset() error = ae.error((np.concatenate((state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1,-1)) - xm)/xs) #print("predicted error", error) if not take_max: tried = 0 while not error > threshold*err_avg: tried+=1 state = env.reset() state = robot_reset(env) error = ae.error((np.concatenate((state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1,-1)) - xm)/xs) # print("predicted error", error.numpy(), err_avg.numpy()) # print("Tried ", tried, " initial states") new_states, new_acts = man_controller.get_demo(env, state, CTRL_NORM, render) return new_states, new_acts else: errs_states = [] for k in range(max_act_steps): state = env.reset() state = robot_reset(env) error = ae.error((np.concatenate((state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1,-1)) - xm)/xs) s, g = utils.save_state(env) errs_states.append([s, g, error]) max_error = -1000 max_key = () for el in range(len(errs_states)): if errs_states[el][2] > max_error: max_error = errs_states[el][2] max_key = el new_env = utils.set_state(env, errs_states[max_key][0], errs_states[max_key][1]) state, *_ = new_env.step(np.zeros(4)) new_states, new_acts = man_controller.get_demo(new_env, state, CTRL_NORM, render) return new_states, new_acts
def get_active_exp(env, threshold, ae, xm, xs, render): err_avg = 0 for i in range(20): state = env.reset() error = ae.error((np.concatenate( (state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1, -1)) - xm) / xs) err_avg += error err_avg /= 20 state = env.reset() error = ae.error((np.concatenate( (state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1, -1)) - xm) / xs) #print("predicted error", error) tried = 0 while not error > threshold * err_avg: tried += 1 state = env.reset() error = ae.error((np.concatenate( (state["observation"], state["achieved_goal"], state["desired_goal"])).reshape((1, -1)) - xm) / xs) # print("predicted error", error.numpy(), err_avg.numpy()) # print("Tried ", tried, " initial states") new_states, new_acts = man_controller.get_demo(env, state, render) return new_states, new_acts
def get_experience(eps, env): states, actions = [], [] for ep in range(eps): state = env.reset() new_states, new_acts = man_controller.get_demo(env, state) states += new_states actions += new_acts return states, actions
def get_experience(eps, env, render = False): states, actions = [], [] for ep in range(eps): state = env.reset() #state = robot_reset(env) new_states, new_acts = man_controller.get_demo(env, state, CTRL_NORM, render) states+=new_states actions+=new_acts return states, actions
def get_active_exp2(env, avg_error_trainset, model, ae, xm, xs, am, ast, render, take_max = False, max_act_steps = 20): state = env.reset() state = robot_reset(env) succeded = True while succeded: state = env.reset() state = robot_reset(env) succeded, env, state, error = try_complete(model, ae, avg_error_trainset*ACTIVE_ERROR_THR, env, xm, xs, am, ast, render = RENDER_TEST) #Here we have the env and the state where the robot doesn't know what to do. time.sleep(1.) # print("Expert demo.") new_states, new_acts = man_controller.get_demo(env, state, CTRL_NORM, render) # if len(new_states) > 100: #If it's so long the demo failed. #Should consider to reset it and try again, otherwise we waste a demo. # return [], [] #Recursively call until a demo works. while len(new_states) > 100: #If it's so long the demo failed. #Should consider to reset it and try again, otherwise we waste a demo. new_states, new_acts = get_active_exp2(env, avg_error_trainset, model, ae, xm, xs, am, ast, render, take_max = False, max_act_steps = 20) return new_states, new_acts