コード例 #1
0
def execute(env, init_state, steps, init_mean, init_var, model, config,
            last_action_seq, task_likelihoods, pred_high, pred_low, recorder):
    # current_state = env.reset()
    current_state = copy.copy(env.state) if config['online'] else env.reset()
    try:
        config["goal"] = env.goal
    except:
        pass
    trajectory = []
    traject_cost = 0
    sliding_mean = init_mean  # np.zeros(config["sol_dim"])

    temp_config = copy.deepcopy(config)
    temp_config["popsize"] = 20000
    optimizer = None
    sol = None
    bar = ProgBar(steps,
                  track_time=True,
                  title='\nExecuting....',
                  bar_char='▒')
    for i in range(steps):
        cost_object = Cost(model=model,
                           init_state=current_state,
                           horizon=config["horizon"],
                           task_likelihoods=task_likelihoods,
                           action_dim=env.action_space.shape[0],
                           goal=config["goal"],
                           pred_high=pred_high,
                           pred_low=pred_low)
        config["cost_fn"] = cost_object.cost_fn
        optimizer = RS_opt(config)
        # sol = optimizer.obtain_solution(sliding_mean, init_var)
        sol = optimizer.obtain_solution()

        a = sol[0:env.action_space.shape[0]]
        next_state, r = 0, 0
        for k in range(1):
            if config["record_video"]:
                recorder.capture_frame()
            next_state, r, _, _ = env.step(a)

        # env.joint_reset()
        trajectory.append(
            [current_state.copy(),
             a.copy(), next_state - current_state, -r])
        current_state = next_state
        traject_cost += -r

        # sliding_mean = last_action_seq[i*config["sol_dim"] : (i+1) * config["sol_dim"]]
        # sliding_mean[0:-len(a)] = sol[len(a)::]
        # sliding_mean[-len(a)::] = sol[-len(a)::]
        bar.update(item_id=" Step " + str(i) + " ")

    if config["record_video"]:
        recorder.capture_frame()
        recorder.close()
    return trajectory, traject_cost
コード例 #2
0
def execute_random(env, steps, init_state):
    current_state = env.reset()
    trajectory = []
    traject_cost = 0
    for i in range(steps):
        a = env.action_space.sample()
        next_state, r = 0, 0
        for k in range(1):
            next_state, r, _, _ = env.step(a)

        trajectory.append(
            [current_state.copy(),
             a.copy(), next_state - current_state, -r])
        current_state = next_state
        traject_cost += -r
    return np.array(trajectory), traject_cost
コード例 #3
0
def execute(env, init_state, steps, init_mean, init_var, model, config,
            last_action_seq, pred_high, pred_low):
    current_state = env.reset()
    try:
        config["goal"] = env.goal
    except:
        pass
    trajectory = []
    traject_cost = 0
    model_error = 0
    sliding_mean = np.zeros(config["sol_dim"])
    rand = np.random.rand(config["sol_dim"])
    bar = ProgBar(steps,
                  track_time=True,
                  title='\nExecuting....',
                  bar_char='▒')
    for i in range(steps):
        cost_object = Cost(ensemble_model=model,
                           init_state=current_state,
                           horizon=config["horizon"],
                           action_dim=env.action_space.shape[0],
                           goal=config["goal"],
                           pred_high=pred_high,
                           pred_low=pred_low)
        config["cost_fn"] = cost_object.cost_fn
        optimizer = RS_opt(config)
        sol = optimizer.obtain_solution()
        ## Take soft action
        a = sol[0:env.action_space.shape[
            0]]  #if i == 0 else sol[0:env.action_space.shape[0]] * 0.8 + a * 0.2
        next_state, r = 0, 0
        for k in range(1):
            next_state, r, _, _ = env.step(a)
        trajectory.append(
            [current_state.copy(),
             a.copy(), next_state - current_state, -r])
        model_error += test_model(model, current_state.copy(), a.copy(),
                                  next_state - current_state)
        current_state = next_state
        traject_cost += -r
        sliding_mean[0:-len(a)] = sol[len(a)::]
        bar.update(item_id=" Step " + str(i) + " ")

    print("Model error: ", model_error)
    return np.array(trajectory), traject_cost
コード例 #4
0
def main(gym_args, config, mismatch_fn, gym_kwargs={}):
    '''---------Prepare the directories------------------'''
    now = datetime.now()
    timestamp = now.strftime("%d_%m_%Y_%H_%M_%S")
    experiment_name = timestamp + "_" + config["exp_suffix"]
    res_dir = os.path.join(os.getcwd(), config["result_dir"],
                           config["env_name"], experiment_name)
    try:
        i = 0
        while True:
            res_dir += "_" + str(i)
            i += 1
            if not os.path.isdir(res_dir):
                os.makedirs(res_dir)
                os.makedirs(res_dir + "/videos")
                break
    except:
        print("Could not make the result directory!!!")

    with open(res_dir + "/details.txt", "w+") as f:
        f.write(config["exp_details"])

    with open(res_dir + '/config.json', 'w') as fp:
        import json
        json.dump(config, fp)
    '''---------Prepare the test environment---------------'''
    env = gym.make(*gym_args, **gym_kwargs)
    trained_mismatches = np.load(config["data_dir"] + "/mismatches.npy")
    n_training_tasks = len(trained_mismatches)
    try:
        s = os.environ['DISPLAY']
        print("Display available")
        # env.render(mode="rgb_array")
        env.render(mode="human")
        env.reset()
    except:
        print("Display not available")
        env.reset()

    print("\n\n\n")
    '''---------Initialize global variables------------------'''
    data = []
    models = []
    best_action_seq = np.random.rand(config["sol_dim"]) * 2.0 - 1.0
    best_cost = 10000
    last_action_seq = None
    all_action_seq = []
    all_costs = []
    with open(res_dir + "/costs.txt", "w+") as f:
        f.write("")
    '''--------------------Meta learn the models---------------------------'''
    meta_model = None
    if not path.exists(config["data_dir"] + "/" + config["model_name"] +
                       ".pt"):
        print("Model not found. Learning from data...")
        meta_data = np.load(config["data_dir"] + "/trajectories.npy",
                            allow_pickle=True)
        tasks_in, tasks_out = [], []
        for n in range(n_training_tasks):
            x, y, high, low = process_data(meta_data[n])
            tasks_in.append(x)
            tasks_out.append(y)
            print("task ", n, " data: ", len(tasks_in[n]), len(tasks_out[n]))
        meta_model = train_meta(tasks_in, tasks_out, config)
        meta_model.save(config["data_dir"] + "/" + config["model_name"] +
                        ".pt")
    else:
        print("Model found. Loading from '.pt' file...")
        device = torch.device("cuda") if config["cuda"] else torch.device(
            "cpu")
        meta_model = nn_model.load_model(
            config["data_dir"] + "/" + config["model_name"] + ".pt", device)

    raw_models = [copy.deepcopy(meta_model) for _ in range(n_training_tasks)]
    models = [copy.deepcopy(meta_model) for _ in range(n_training_tasks)]
    for task_id, m in enumerate(raw_models):
        m.fix_task(task_id)

    for task_id, m in enumerate(models):
        m.fix_task(task_id)
    '''------------------------Test time------------------------------------'''

    high, low = np.ones(config["dim_out"])*1000.,  - \
        np.ones(config["dim_out"])*1000.
    task_likelihoods = np.random.rand(n_training_tasks)

    for index_iter in range(config["iterations"]):
        print("Episode: ", index_iter)
        new_mismatch = mismatch_fn(config)
        print("Mismatch: ", new_mismatch.tolist())
        env.set_mismatch(new_mismatch)
        recorder = VideoRecorder(
            env, res_dir + "/videos/" + str(index_iter) +
            ".mp4") if config["record_video"] else None
        trajectory, c = execute(env=env,
                                init_state=config["init_state"],
                                model=models,
                                steps=config["episode_length"],
                                init_mean=np.zeros(config["sol_dim"]),
                                init_var=0.01 * np.ones(config["sol_dim"]),
                                config=config,
                                last_action_seq=None,
                                task_likelihoods=task_likelihoods,
                                pred_high=high,
                                pred_low=low,
                                recorder=recorder)

        data += trajectory
        '''-----------------Compute likelihood before relearning the models-------'''
        task_likelihoods = compute_likelihood(data, raw_models,
                                              config['adapt_steps'])
        print("\nlikelihoods: ", task_likelihoods)

        x, y, high, low = process_data(data)

        task_index = sample_model_index(
            task_likelihoods) if config["sample_model"] else np.argmax(
                task_likelihoods)
        print("\nEstimated task-id: ", task_index)
        task_likelihoods = task_likelihoods * 0
        task_likelihoods[task_index] = 1.0
        data_size = config['adapt_steps']
        if data_size is None:
            data_size = len(x)
        print("Learning model with recent ", data_size, " data")
        models[task_index] = train_model(model=copy.deepcopy(
            raw_models[task_index]),
                                         train_in=x[-data_size::],
                                         train_out=y[-data_size::],
                                         task_id=task_index,
                                         config=config)

        print("\nCost : ", c)
        with open(res_dir + "/costs.txt", "a+") as f:
            f.write(str(c) + "\n")

        if c < best_cost:
            best_cost = c
            best_action_seq = []
            for d in trajectory:
                best_action_seq += d[1].tolist()
            best_action_seq = np.array(best_action_seq)
            last_action_seq = extract_action_seq(trajectory)

        all_action_seq.append(extract_action_seq(trajectory))
        all_costs.append(c)

        np.save(res_dir + "/trajectories.npy", data)
        print("\n********************************************************\n")