Exemple #1
0
 def actor_spec(neurons: Sequence[int], num_risky: int)\
         -> Sequence[FuncApproxSpec]:
     alpha_beta_vars = [FuncApproxSpec(
         state_feature_funcs=[
             lambda s: float(s[0]),
             lambda s: s[1]
         ],
         action_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=neurons,
             hidden_activation=DNNSpec.relu,
             hidden_activation_deriv=DNNSpec.relu_deriv,
             output_activation=DNNSpec.softplus,
             output_activation_deriv=DNNSpec.softplus_deriv
         )
     ) for _ in range(num_risky + 2)]
     means = [FuncApproxSpec(
         state_feature_funcs=[
             lambda s: float(s[0]),
             lambda s: s[1]
         ],
         action_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=neurons,
             hidden_activation=DNNSpec.relu,
             hidden_activation_deriv=DNNSpec.relu_deriv,
             output_activation=DNNSpec.identity,
             output_activation_deriv=DNNSpec.identity_deriv
         )
     ) for _ in range(num_risky)]
     return alpha_beta_vars + means
Exemple #2
0
 def get_actor_nu_spec() -> FuncApproxSpec:
     return FuncApproxSpec(
         state_feature_funcs=[],
         sa_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=[],
             hidden_activation=DNNSpec.log_squish,
             hidden_activation_deriv=DNNSpec.log_squish_deriv,
             output_activation=DNNSpec.pos_log_squish,
             output_activation_deriv=DNNSpec.pos_log_squish_deriv))
 def actor_spec(self) -> Tuple[FuncApproxSpec, FuncApproxSpec]:
     ff = lambda s: (1. + self.r)**float(s[0])
     mean = FuncApproxSpec(state_feature_funcs=[ff],
                           sa_feature_funcs=[lambda x, ff=ff: ff(x[0])],
                           dnn_spec=None)
     variance = FuncApproxSpec(
         state_feature_funcs=[],
         sa_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=[],
             hidden_activation=DNNSpec.log_squish,
             hidden_activation_deriv=DNNSpec.log_squish_deriv,
             output_activation=DNNSpec.pos_log_squish,
             output_activation_deriv=DNNSpec.pos_log_squish_deriv))
     return mean, variance
Exemple #4
0
 def critic_spec(neurons: Sequence[int]) -> FuncApproxSpec:
     return FuncApproxSpec(
         state_feature_funcs=[
             lambda s: float(s[0]),
             lambda s: s[1]
         ],
         action_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=neurons,
             hidden_activation=DNNSpec.relu,
             hidden_activation_deriv=DNNSpec.relu_deriv,
             output_activation=DNNSpec.identity,
             output_activation_deriv=DNNSpec.identity_deriv
         )
     )
Exemple #5
0
    def get_actor_mu_spec(self, time_steps: int) -> FuncApproxSpec:
        tnu = self.get_nu()

        # noinspection PyShadowingNames
        def state_ff(state: Tuple[int, float], tnu=tnu) -> float:
            tte = self.expiry * (1. - float(state[0]) / time_steps)
            if tnu == 0:
                ret = 1. / (tte + self.epsilon)
            else:
                ret = tnu / (1. +
                             (tnu * self.epsilon - 1.) * np.exp(-tnu * tte))
            return ret

        return FuncApproxSpec(
            state_feature_funcs=[state_ff],
            sa_feature_funcs=[lambda x, state_ff=state_ff: state_ff(x[0])],
            dnn_spec=DNNSpec(neurons=[],
                             hidden_activation=DNNSpec.log_squish,
                             hidden_activation_deriv=DNNSpec.log_squish_deriv,
                             output_activation=DNNSpec.sigmoid,
                             output_activation_deriv=DNNSpec.sigmoid_deriv))
    def critic_spec(self, neurons: Sequence[int]) -> FuncApproxSpec:
        def feature_func(state: StateType) -> float:
            t = float(state[0])
            # noinspection PyPep8Naming
            W = state[1]
            term1 = self.rho**(-t)
            term2 = np.exp((self.mu - self.r)**2 / (2 * self.sigma**2) * t)
            term3 = np.exp(-self.gamma * (1. + self.r)**(self.time_steps - t) *
                           W)
            return term1 * term2 * term3

        return FuncApproxSpec(
            state_feature_funcs=[feature_func],
            sa_feature_funcs=[
                lambda x, feature_func=feature_func: feature_func(x[0])
            ],
            dnn_spec=DNNSpec(neurons=neurons,
                             hidden_activation=DNNSpec.relu,
                             hidden_activation_deriv=DNNSpec.relu_deriv,
                             output_activation=DNNSpec.identity,
                             output_activation_deriv=DNNSpec.identity_deriv))
    def get_rl_fa_price(self, num_dt: int, method: str, exploring_start: bool,
                        algorithm: TDAlgorithm, softmax: bool, epsilon: float,
                        epsilon_half_life: float, lambd: float, num_paths: int,
                        batch_size: int, feature_funcs: Sequence[
                            Callable[[Tuple[StateType, ActionType]],
                                     float]], neurons: Optional[Sequence[int]],
                        learning_rate: float, learning_rate_decay: float,
                        adam: Tuple[bool, float,
                                    float], offline: bool) -> float:
        dt = self.expiry / num_dt

        def sa_func(_: StateType) -> Set[ActionType]:
            return {True, False}

        # noinspection PyShadowingNames
        def terminal_state(s: StateType, num_dt=num_dt) -> bool:
            return s[0] > num_dt

        # noinspection PyShadowingNames
        def sr_func(s: StateType,
                    a: ActionType,
                    num_dt=num_dt) -> Tuple[StateType, float]:
            return self.state_reward_gen(s, a, num_dt)

        def init_s() -> StateType:
            return 0, np.array([self.spot_price])

        def init_sa() -> Tuple[StateType, ActionType]:
            return init_s(), choice([True, False])

        # noinspection PyShadowingNames
        mdp_rep_obj = MDPRepForRLFA(state_action_func=sa_func,
                                    gamma=1.,
                                    terminal_state_func=terminal_state,
                                    state_reward_gen_func=sr_func,
                                    init_state_gen=init_s,
                                    init_state_action_gen=init_sa)

        fa_spec = FuncApproxSpec(
            state_feature_funcs=[],
            sa_feature_funcs=feature_funcs,
            dnn_spec=(None if neurons is None else (DNNSpec(
                neurons=neurons,
                hidden_activation=DNNSpec.log_squish,
                hidden_activation_deriv=DNNSpec.log_squish_deriv,
                output_activation=DNNSpec.pos_log_squish,
                output_activation_deriv=DNNSpec.pos_log_squish_deriv))),
            learning_rate=learning_rate,
            adam_params=adam,
            add_unit_feature=False)

        if method == "MC":
            rl_fa_obj = MonteCarlo(mdp_rep_for_rl=mdp_rep_obj,
                                   exploring_start=exploring_start,
                                   softmax=softmax,
                                   epsilon=epsilon,
                                   epsilon_half_life=epsilon_half_life,
                                   num_episodes=num_paths,
                                   max_steps=num_dt + 2,
                                   fa_spec=fa_spec)
        elif method == "TD0":
            rl_fa_obj = TD0(mdp_rep_for_rl=mdp_rep_obj,
                            exploring_start=exploring_start,
                            algorithm=algorithm,
                            softmax=softmax,
                            epsilon=epsilon,
                            epsilon_half_life=epsilon_half_life,
                            num_episodes=num_paths,
                            max_steps=num_dt + 2,
                            fa_spec=fa_spec)
        elif method == "TDL":
            rl_fa_obj = TDLambda(mdp_rep_for_rl=mdp_rep_obj,
                                 exploring_start=exploring_start,
                                 algorithm=algorithm,
                                 softmax=softmax,
                                 epsilon=epsilon,
                                 epsilon_half_life=epsilon_half_life,
                                 lambd=lambd,
                                 num_episodes=num_paths,
                                 batch_size=batch_size,
                                 max_steps=num_dt + 2,
                                 fa_spec=fa_spec,
                                 offline=offline)
        else:
            rl_fa_obj = TDLambdaExact(mdp_rep_for_rl=mdp_rep_obj,
                                      exploring_start=exploring_start,
                                      algorithm=algorithm,
                                      softmax=softmax,
                                      epsilon=epsilon,
                                      epsilon_half_life=epsilon_half_life,
                                      lambd=lambd,
                                      num_episodes=num_paths,
                                      batch_size=batch_size,
                                      max_steps=num_dt + 2,
                                      state_feature_funcs=[],
                                      sa_feature_funcs=feature_funcs,
                                      learning_rate=learning_rate,
                                      learning_rate_decay=learning_rate_decay)

        qvf = rl_fa_obj.get_qv_func_fa(None)
        # init_s = (0, np.array([self.spot_price]))
        # val_exec = qvf(init_s)(True)
        # val_cont = qvf(init_s)(False)
        # true_false_spot_max = max(val_exec, val_cont)

        all_paths = self.get_all_paths(num_paths, num_dt + 1)
        prices = np.zeros(num_paths)

        for path_num, path in enumerate(all_paths):
            steps = 0
            price_seq = np.array([])
            while steps <= num_dt:
                price_seq = np.append(price_seq, path[steps])
                state = (steps, price_seq)
                exercise_price = np.exp(-self.ir(dt * steps)) *\
                    self.payoff(dt * steps, price_seq)
                continue_price = qvf(state)(False)
                steps += 1
                if exercise_price > continue_price:
                    prices[path_num] = exercise_price
                    steps = num_dt + 1
                    # print(state)
                    # print(exercise_price)
                    # print(continue_price)
                    # print(qvf(state)(True))

        return np.average(prices)
Exemple #8
0
    gamma_val = 0.9
    mdp_ref_obj1 = MDPRefined(mdp_refined_data, gamma_val)
    mdp_rep_obj = mdp_ref_obj1.get_mdp_rep_for_adp()

    num_state_samples_val = 100
    num_action_samples_val = 100
    tol_val = 1e-4
    vf_fa_spec_val = FuncApproxSpec(
        state_feature_funcs=[
            lambda s: 1. if s == 1 else 0., lambda s: 1.
            if s == 2 else 0., lambda s: 1. if s == 3 else 0.
        ],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(neurons=[2, 4],
                         hidden_activation=DNNSpec.relu,
                         hidden_activation_deriv=DNNSpec.relu_deriv,
                         output_activation=DNNSpec.identity,
                         output_activation_deriv=DNNSpec.identity_deriv))
    pol_fa_spec_val = [
        FuncApproxSpec(state_feature_funcs=[
            lambda s: 1. if s == 1 else 0., lambda s: 1.
            if s == 2 else 0., lambda s: 1. if s == 3 else 0.
        ],
                       action_feature_funcs=[],
                       dnn_spec=DNNSpec(
                           neurons=[2, 4],
                           hidden_activation=DNNSpec.relu,
                           hidden_activation_deriv=DNNSpec.relu_deriv,
                           output_activation=DNNSpec.sigmoid,
                           output_activation_deriv=DNNSpec.sigmoid_deriv))
    ]
Exemple #9
0
        return get_generalized_back_prop(
            dnn_params=self.params,
            fwd_prop=fwd_prop,
            dObj_dOL=dObj_dOL,
            factors=factors,
            decay_param=gamma_lambda,
            hidden_activation_deriv=self.hidden_activation_deriv,
            output_activation_deriv=self.output_activation_deriv
        )


if __name__ == '__main__':
    this_dnn_obj = DNNSpec(
        neurons=[2],
        hidden_activation=DNNSpec.relu,
        hidden_activation_deriv=DNNSpec.relu_deriv,
        output_activation=DNNSpec.identity,
        output_activation_deriv=DNNSpec.identity_deriv
    )
    nn = DNN(
        feature_funcs=FuncApproxBase.get_identity_feature_funcs(3),
        dnn_obj=this_dnn_obj,
        reglr_coeff=0.,
        learning_rate=1.,
        adam=True,
        adam_decay1=0.9,
        adam_decay2=0.999
    )
    init_eval = nn.get_func_eval((2.0, 3.0, -4.0))
    print(init_eval)
Exemple #10
0
    this_epsilon = 0.05
    this_epsilon_half_life = 30
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 3000
    this_max_steps = 1000
    this_tdl_fa_offline = True
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=FuncApproxBase.get_identity_feature_funcs(
            ic.lead_time + 1
        ),
        action_feature_funcs=[lambda x: x],
        dnn_spec=DNNSpec(
            neurons=[2, 4],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv
        )
    )

    raa = RunAllAlgorithms(
        mdp_refined=mdp_ref_obj,
        tolerance=this_tolerance,
        first_visit_mc=this_first_visit_mc,
        num_samples=num_samples,
        softmax=this_softmax,
        epsilon=this_epsilon,
        epsilon_half_life=this_epsilon_half_life,
        learning_rate=this_learning_rate,
        learning_rate_decay=this_learning_rate_decay,
        lambd=this_lambd,
Exemple #11
0
                                     discount_rate=rho)

    reinforce_val = True
    num_state_samples_val = 500
    num_next_state_samples_val = 30
    num_action_samples_val = 50
    num_batches_val = 3000
    actor_lambda_val = 0.99
    critic_lambda_val = 0.99

    actor_mu = FuncApproxSpec(
        state_feature_funcs=[],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(neurons=[],
                         hidden_activation=DNNSpec.log_squish,
                         hidden_activation_deriv=DNNSpec.log_squish_deriv,
                         output_activation=DNNSpec.sigmoid,
                         output_activation_deriv=DNNSpec.sigmoid_deriv))
    actor_nu = FuncApproxSpec(
        state_feature_funcs=[],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(neurons=[],
                         hidden_activation=DNNSpec.log_squish,
                         hidden_activation_deriv=DNNSpec.log_squish_deriv,
                         output_activation=DNNSpec.pos_log_squish,
                         output_activation_deriv=DNNSpec.pos_log_squish_deriv))
    actor_mean = FuncApproxSpec(state_feature_funcs=[],
                                action_feature_funcs=[],
                                dnn_spec=None)
    actor_variance = FuncApproxSpec(
        state_feature_funcs=[],
Exemple #12
0
        errors = np.array([x[-1][0] for x in all_fwd_prop]) - \
            np.array(supervisory_seq)
        return get_generalized_back_prop(
            dnn_params=self.params,
            layer_inputs=layer_inputs,
            factors=errors,
            dObj_dSL=np.ones_like(errors),
            decay_param=gamma_lambda,
            hidden_activation_deriv=self.hidden_activation_deriv
        )


if __name__ == '__main__':
    this_dnn_obj = DNNSpec(
        neurons=[2],
        hidden_activation=DNNSpec.relu,
        hidden_activation_deriv=DNNSpec.relu_deriv
    )
    nn = DNN(
        feature_funcs=FuncApproxBase.get_identity_feature_funcs(3),
        dnn_obj=this_dnn_obj,
        reglr_coeff=0.,
        learning_rate=1.,
        adam=True,
        adam_decay1=0.9,
        adam_decay2=0.999
    )
    init_eval = nn.get_func_eval((2.0, 3.0, -4.0))
    print(init_eval)

    x_pts = np.arange(-10.0, 10.0, 0.5)