def execute(env, init_state, steps, init_mean, init_var, model, config, last_action_seq, task_likelihoods, pred_high, pred_low, recorder): # current_state = env.reset() current_state = copy.copy(env.state) if config['online'] else env.reset() try: config["goal"] = env.goal except: pass trajectory = [] traject_cost = 0 sliding_mean = init_mean # np.zeros(config["sol_dim"]) temp_config = copy.deepcopy(config) temp_config["popsize"] = 20000 optimizer = None sol = None bar = ProgBar(steps, track_time=True, title='\nExecuting....', bar_char='▒') for i in range(steps): cost_object = Cost(model=model, init_state=current_state, horizon=config["horizon"], task_likelihoods=task_likelihoods, action_dim=env.action_space.shape[0], goal=config["goal"], pred_high=pred_high, pred_low=pred_low) config["cost_fn"] = cost_object.cost_fn optimizer = RS_opt(config) # sol = optimizer.obtain_solution(sliding_mean, init_var) sol = optimizer.obtain_solution() a = sol[0:env.action_space.shape[0]] next_state, r = 0, 0 for k in range(1): if config["record_video"]: recorder.capture_frame() next_state, r, _, _ = env.step(a) # env.joint_reset() trajectory.append( [current_state.copy(), a.copy(), next_state - current_state, -r]) current_state = next_state traject_cost += -r # sliding_mean = last_action_seq[i*config["sol_dim"] : (i+1) * config["sol_dim"]] # sliding_mean[0:-len(a)] = sol[len(a)::] # sliding_mean[-len(a)::] = sol[-len(a)::] bar.update(item_id=" Step " + str(i) + " ") if config["record_video"]: recorder.capture_frame() recorder.close() return trajectory, traject_cost
def execute_random(env, steps, init_state): current_state = env.reset() trajectory = [] traject_cost = 0 for i in range(steps): a = env.action_space.sample() next_state, r = 0, 0 for k in range(1): next_state, r, _, _ = env.step(a) trajectory.append( [current_state.copy(), a.copy(), next_state - current_state, -r]) current_state = next_state traject_cost += -r return np.array(trajectory), traject_cost
def execute(env, init_state, steps, init_mean, init_var, model, config, last_action_seq, pred_high, pred_low): current_state = env.reset() try: config["goal"] = env.goal except: pass trajectory = [] traject_cost = 0 model_error = 0 sliding_mean = np.zeros(config["sol_dim"]) rand = np.random.rand(config["sol_dim"]) bar = ProgBar(steps, track_time=True, title='\nExecuting....', bar_char='▒') for i in range(steps): cost_object = Cost(ensemble_model=model, init_state=current_state, horizon=config["horizon"], action_dim=env.action_space.shape[0], goal=config["goal"], pred_high=pred_high, pred_low=pred_low) config["cost_fn"] = cost_object.cost_fn optimizer = RS_opt(config) sol = optimizer.obtain_solution() ## Take soft action a = sol[0:env.action_space.shape[ 0]] #if i == 0 else sol[0:env.action_space.shape[0]] * 0.8 + a * 0.2 next_state, r = 0, 0 for k in range(1): next_state, r, _, _ = env.step(a) trajectory.append( [current_state.copy(), a.copy(), next_state - current_state, -r]) model_error += test_model(model, current_state.copy(), a.copy(), next_state - current_state) current_state = next_state traject_cost += -r sliding_mean[0:-len(a)] = sol[len(a)::] bar.update(item_id=" Step " + str(i) + " ") print("Model error: ", model_error) return np.array(trajectory), traject_cost
def main(gym_args, config, mismatch_fn, gym_kwargs={}): '''---------Prepare the directories------------------''' now = datetime.now() timestamp = now.strftime("%d_%m_%Y_%H_%M_%S") experiment_name = timestamp + "_" + config["exp_suffix"] res_dir = os.path.join(os.getcwd(), config["result_dir"], config["env_name"], experiment_name) try: i = 0 while True: res_dir += "_" + str(i) i += 1 if not os.path.isdir(res_dir): os.makedirs(res_dir) os.makedirs(res_dir + "/videos") break except: print("Could not make the result directory!!!") with open(res_dir + "/details.txt", "w+") as f: f.write(config["exp_details"]) with open(res_dir + '/config.json', 'w') as fp: import json json.dump(config, fp) '''---------Prepare the test environment---------------''' env = gym.make(*gym_args, **gym_kwargs) trained_mismatches = np.load(config["data_dir"] + "/mismatches.npy") n_training_tasks = len(trained_mismatches) try: s = os.environ['DISPLAY'] print("Display available") # env.render(mode="rgb_array") env.render(mode="human") env.reset() except: print("Display not available") env.reset() print("\n\n\n") '''---------Initialize global variables------------------''' data = [] models = [] best_action_seq = np.random.rand(config["sol_dim"]) * 2.0 - 1.0 best_cost = 10000 last_action_seq = None all_action_seq = [] all_costs = [] with open(res_dir + "/costs.txt", "w+") as f: f.write("") '''--------------------Meta learn the models---------------------------''' meta_model = None if not path.exists(config["data_dir"] + "/" + config["model_name"] + ".pt"): print("Model not found. Learning from data...") meta_data = np.load(config["data_dir"] + "/trajectories.npy", allow_pickle=True) tasks_in, tasks_out = [], [] for n in range(n_training_tasks): x, y, high, low = process_data(meta_data[n]) tasks_in.append(x) tasks_out.append(y) print("task ", n, " data: ", len(tasks_in[n]), len(tasks_out[n])) meta_model = train_meta(tasks_in, tasks_out, config) meta_model.save(config["data_dir"] + "/" + config["model_name"] + ".pt") else: print("Model found. Loading from '.pt' file...") device = torch.device("cuda") if config["cuda"] else torch.device( "cpu") meta_model = nn_model.load_model( config["data_dir"] + "/" + config["model_name"] + ".pt", device) raw_models = [copy.deepcopy(meta_model) for _ in range(n_training_tasks)] models = [copy.deepcopy(meta_model) for _ in range(n_training_tasks)] for task_id, m in enumerate(raw_models): m.fix_task(task_id) for task_id, m in enumerate(models): m.fix_task(task_id) '''------------------------Test time------------------------------------''' high, low = np.ones(config["dim_out"])*1000., - \ np.ones(config["dim_out"])*1000. task_likelihoods = np.random.rand(n_training_tasks) for index_iter in range(config["iterations"]): print("Episode: ", index_iter) new_mismatch = mismatch_fn(config) print("Mismatch: ", new_mismatch.tolist()) env.set_mismatch(new_mismatch) recorder = VideoRecorder( env, res_dir + "/videos/" + str(index_iter) + ".mp4") if config["record_video"] else None trajectory, c = execute(env=env, init_state=config["init_state"], model=models, steps=config["episode_length"], init_mean=np.zeros(config["sol_dim"]), init_var=0.01 * np.ones(config["sol_dim"]), config=config, last_action_seq=None, task_likelihoods=task_likelihoods, pred_high=high, pred_low=low, recorder=recorder) data += trajectory '''-----------------Compute likelihood before relearning the models-------''' task_likelihoods = compute_likelihood(data, raw_models, config['adapt_steps']) print("\nlikelihoods: ", task_likelihoods) x, y, high, low = process_data(data) task_index = sample_model_index( task_likelihoods) if config["sample_model"] else np.argmax( task_likelihoods) print("\nEstimated task-id: ", task_index) task_likelihoods = task_likelihoods * 0 task_likelihoods[task_index] = 1.0 data_size = config['adapt_steps'] if data_size is None: data_size = len(x) print("Learning model with recent ", data_size, " data") models[task_index] = train_model(model=copy.deepcopy( raw_models[task_index]), train_in=x[-data_size::], train_out=y[-data_size::], task_id=task_index, config=config) print("\nCost : ", c) with open(res_dir + "/costs.txt", "a+") as f: f.write(str(c) + "\n") if c < best_cost: best_cost = c best_action_seq = [] for d in trajectory: best_action_seq += d[1].tolist() best_action_seq = np.array(best_action_seq) last_action_seq = extract_action_seq(trajectory) all_action_seq.append(extract_action_seq(trajectory)) all_costs.append(c) np.save(res_dir + "/trajectories.npy", data) print("\n********************************************************\n")