def execute(env, init_state, steps, init_mean, init_var, model, config,
            last_action_seq, task_likelihoods, pred_high, pred_low, recorder):
    # current_state = env.reset()
    current_state = copy.copy(env.state) if config['online'] else env.reset()
    try:
        config["goal"] = env.goal
    except:
        pass
    trajectory = []
    traject_cost = 0
    sliding_mean = init_mean  # np.zeros(config["sol_dim"])

    temp_config = copy.deepcopy(config)
    temp_config["popsize"] = 20000
    optimizer = None
    sol = None
    bar = ProgBar(steps,
                  track_time=True,
                  title='\nExecuting....',
                  bar_char='▒')
    for i in range(steps):
        cost_object = Cost(model=model,
                           init_state=current_state,
                           horizon=config["horizon"],
                           task_likelihoods=task_likelihoods,
                           action_dim=env.action_space.shape[0],
                           goal=config["goal"],
                           pred_high=pred_high,
                           pred_low=pred_low)
        config["cost_fn"] = cost_object.cost_fn
        optimizer = RS_opt(config)
        # sol = optimizer.obtain_solution(sliding_mean, init_var)
        sol = optimizer.obtain_solution()

        a = sol[0:env.action_space.shape[0]]
        next_state, r = 0, 0
        for k in range(1):
            if config["record_video"]:
                recorder.capture_frame()
            next_state, r, _, _ = env.step(a)

        # env.joint_reset()
        trajectory.append(
            [current_state.copy(),
             a.copy(), next_state - current_state, -r])
        current_state = next_state
        traject_cost += -r

        # sliding_mean = last_action_seq[i*config["sol_dim"] : (i+1) * config["sol_dim"]]
        # sliding_mean[0:-len(a)] = sol[len(a)::]
        # sliding_mean[-len(a)::] = sol[-len(a)::]
        bar.update(item_id=" Step " + str(i) + " ")

    if config["record_video"]:
        recorder.capture_frame()
        recorder.close()
    return trajectory, traject_cost
def execute_random(env, steps, init_state):
    current_state = env.reset()
    trajectory = []
    traject_cost = 0
    for i in range(steps):
        a = env.action_space.sample()
        next_state, r = 0, 0
        for k in range(1):
            next_state, r, _, _ = env.step(a)

        trajectory.append(
            [current_state.copy(),
             a.copy(), next_state - current_state, -r])
        current_state = next_state
        traject_cost += -r
    return np.array(trajectory), traject_cost
def execute(env, init_state, steps, init_mean, init_var, model, config,
            last_action_seq, pred_high, pred_low):
    current_state = env.reset()
    try:
        config["goal"] = env.goal
    except:
        pass
    trajectory = []
    traject_cost = 0
    model_error = 0
    sliding_mean = np.zeros(config["sol_dim"])
    rand = np.random.rand(config["sol_dim"])
    bar = ProgBar(steps,
                  track_time=True,
                  title='\nExecuting....',
                  bar_char='▒')
    for i in range(steps):
        cost_object = Cost(ensemble_model=model,
                           init_state=current_state,
                           horizon=config["horizon"],
                           action_dim=env.action_space.shape[0],
                           goal=config["goal"],
                           pred_high=pred_high,
                           pred_low=pred_low)
        config["cost_fn"] = cost_object.cost_fn
        optimizer = RS_opt(config)
        sol = optimizer.obtain_solution()
        ## Take soft action
        a = sol[0:env.action_space.shape[
            0]]  #if i == 0 else sol[0:env.action_space.shape[0]] * 0.8 + a * 0.2
        next_state, r = 0, 0
        for k in range(1):
            next_state, r, _, _ = env.step(a)
        trajectory.append(
            [current_state.copy(),
             a.copy(), next_state - current_state, -r])
        model_error += test_model(model, current_state.copy(), a.copy(),
                                  next_state - current_state)
        current_state = next_state
        traject_cost += -r
        sliding_mean[0:-len(a)] = sol[len(a)::]
        bar.update(item_id=" Step " + str(i) + " ")

    print("Model error: ", model_error)
    return np.array(trajectory), traject_cost