예제 #1
0
파일: pg.py 프로젝트: vg02765/MDP-DP-RL
    def __init__(
        self,
        mdp_rep_for_rl_pg: MDPRepForRLPG,
        reinforce: bool,
        batch_size: int,
        num_batches: int,
        num_action_samples: int,
        max_steps: int,
        actor_lambda: float,
        critic_lambda: float,
        score_func: Callable[[A, Sequence[float]], Sequence[float]],
        sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]],
        fa_spec: FuncApproxSpec,
        pol_fa_spec: Sequence[FuncApproxSpec]

    ) -> None:
        self.mdp_rep: MDPRepForRLPG = mdp_rep_for_rl_pg
        self.reinforce: bool = reinforce
        self.batch_size: int = batch_size
        self.num_batches: int = num_batches
        self.num_action_samples: int = num_action_samples
        self.max_steps: int = max_steps
        self.actor_lambda: float = actor_lambda
        self.critic_lambda: float = critic_lambda
        self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
            score_func
        self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
            sample_actions_gen_func
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.pol_fa: Sequence[FuncApproxBase] =\
            [s.get_vf_func_approx_obj() for s in pol_fa_spec]
예제 #2
0
 def actor_spec(neurons: Sequence[int], num_risky: int)\
         -> Sequence[FuncApproxSpec]:
     alpha_beta_vars = [FuncApproxSpec(
         state_feature_funcs=[
             lambda s: float(s[0]),
             lambda s: s[1]
         ],
         action_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=neurons,
             hidden_activation=DNNSpec.relu,
             hidden_activation_deriv=DNNSpec.relu_deriv,
             output_activation=DNNSpec.softplus,
             output_activation_deriv=DNNSpec.softplus_deriv
         )
     ) for _ in range(num_risky + 2)]
     means = [FuncApproxSpec(
         state_feature_funcs=[
             lambda s: float(s[0]),
             lambda s: s[1]
         ],
         action_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=neurons,
             hidden_activation=DNNSpec.relu,
             hidden_activation_deriv=DNNSpec.relu_deriv,
             output_activation=DNNSpec.identity,
             output_activation_deriv=DNNSpec.identity_deriv
         )
     ) for _ in range(num_risky)]
     return alpha_beta_vars + means
예제 #3
0
    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, softmax: bool,
                 epsilon: float, num_episodes: int, max_steps: int,
                 fa_spec: FuncApproxSpec) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.softmax: bool = softmax
        self.epsilon: float = epsilon
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func
예제 #4
0
 def actor_spec(self) -> Tuple[FuncApproxSpec, FuncApproxSpec]:
     ff = lambda s: (1. + self.r)**float(s[0])
     mean = FuncApproxSpec(state_feature_funcs=[ff],
                           sa_feature_funcs=[lambda x, ff=ff: ff(x[0])],
                           dnn_spec=None)
     variance = FuncApproxSpec(
         state_feature_funcs=[],
         sa_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=[],
             hidden_activation=DNNSpec.log_squish,
             hidden_activation_deriv=DNNSpec.log_squish_deriv,
             output_activation=DNNSpec.pos_log_squish,
             output_activation_deriv=DNNSpec.pos_log_squish_deriv))
     return mean, variance
예제 #5
0
 def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, exploring_start: bool,
              algorithm: TDAlgorithm, softmax: bool, epsilon: float,
              epsilon_half_life: float, lambd: float, num_episodes: int,
              batch_size: int, max_steps: int,
              state_feature_funcs: Sequence[Callable[[S], float]],
              sa_feature_funcs: Sequence[Callable[[Tuple[S, A]], float]],
              learning_rate: float, learning_rate_decay: float) -> None:
     super().__init__(mdp_rep_for_rl=mdp_rep_for_rl,
                      exploring_start=exploring_start,
                      softmax=softmax,
                      epsilon=epsilon,
                      epsilon_half_life=epsilon_half_life,
                      num_episodes=num_episodes,
                      max_steps=max_steps,
                      fa_spec=FuncApproxSpec(
                          state_feature_funcs=state_feature_funcs,
                          sa_feature_funcs=sa_feature_funcs,
                          dnn_spec=None,
                          learning_rate=learning_rate,
                          add_unit_feature=False))
     self.vf_w: np.ndarray = np.zeros(self.vf_fa.num_features)
     self.qvf_w: np.ndarray = np.zeros(self.qvf_fa.num_features)
     self.vf_fa.params = [self.vf_w]
     self.qvf_fa.params = [self.qvf_w]
     self.algorithm: TDAlgorithm = algorithm
     self.gamma_lambda: float = self.mdp_rep.gamma * lambd
     self.batch_size: int = batch_size
     self.learning_rate_decay: float = learning_rate_decay
예제 #6
0
파일: merton.py 프로젝트: chsd001/CME241
 def get_actor_nu_spec() -> FuncApproxSpec:
     return FuncApproxSpec(
         state_feature_funcs=[],
         sa_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=[],
             hidden_activation=DNNSpec.log_squish,
             hidden_activation_deriv=DNNSpec.log_squish_deriv,
             output_activation=DNNSpec.pos_log_squish,
             output_activation_deriv=DNNSpec.pos_log_squish_deriv))
예제 #7
0
 def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int,
              softmax: bool, epsilon: float, epsilon_half_life: float,
              tol: float, fa_spec: FuncApproxSpec) -> None:
     self.mdp_rep: MDPRepForADP = mdp_rep_for_adp
     self.num_samples: int = num_samples
     self.softmax: bool = softmax
     self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
         epsilon, epsilon_half_life)
     self.tol: float = tol
     self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.state_action_func: Callable[[S], Set[A]] =\
         self.mdp_rep.state_action_func
예제 #8
0
 def critic_spec(neurons: Sequence[int]) -> FuncApproxSpec:
     return FuncApproxSpec(
         state_feature_funcs=[
             lambda s: float(s[0]),
             lambda s: s[1]
         ],
         action_feature_funcs=[],
         dnn_spec=DNNSpec(
             neurons=neurons,
             hidden_activation=DNNSpec.relu,
             hidden_activation_deriv=DNNSpec.relu_deriv,
             output_activation=DNNSpec.identity,
             output_activation_deriv=DNNSpec.identity_deriv
         )
     )
    def __init__(
        self,
        mdp_rep_for_rl: MDPRepForRLFA,
        exploring_start: bool,
        softmax: bool,
        epsilon: float,
        epsilon_half_life: float,
        num_episodes: int,
        max_steps: int,
        fa_spec: FuncApproxSpec
    ) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.exploring_start: bool = exploring_start
        self.softmax: bool = softmax
        self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
            epsilon,
            epsilon_half_life
        )
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func
예제 #10
0
 def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG,
              num_state_samples: int, num_action_samples: int, tol: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              vf_fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg
     self.num_state_samples: int = num_state_samples
     self.num_action_samples: int = num_action_samples
     self.tol: float = tol
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]
예제 #11
0
    def get_critic_spec(self, time_steps: int) -> FuncApproxSpec:
        tnu = self.get_nu()
        gam = 1. - self.gamma

        # noinspection PyShadowingNames
        def state_ff(state: Tuple[int, float], tnu=tnu, gam=gam) -> float:
            t = float(state[0]) * self.expiry / time_steps
            tte = self.expiry - t
            if tnu == 0:
                ret = tte + self.epsilon
            else:
                ret = (1. +
                       (tnu * self.epsilon - 1.) * np.exp(-tnu * tte)) / tnu
            mult = state[1]**gam / gam if gam != 0 else np.log(state[1])
            return ret**self.gamma * mult / np.exp(self.rho * t)

        return FuncApproxSpec(state_feature_funcs=[state_ff],
                              action_feature_funcs=[],
                              dnn_spec=None)
예제 #12
0
    def critic_spec(self, neurons: Sequence[int]) -> FuncApproxSpec:
        def feature_func(state: StateType) -> float:
            t = float(state[0])
            # noinspection PyPep8Naming
            W = state[1]
            term1 = self.rho**(-t)
            term2 = np.exp((self.mu - self.r)**2 / (2 * self.sigma**2) * t)
            term3 = np.exp(-self.gamma * (1. + self.r)**(self.time_steps - t) *
                           W)
            return term1 * term2 * term3

        return FuncApproxSpec(
            state_feature_funcs=[feature_func],
            sa_feature_funcs=[
                lambda x, feature_func=feature_func: feature_func(x[0])
            ],
            dnn_spec=DNNSpec(neurons=neurons,
                             hidden_activation=DNNSpec.relu,
                             hidden_activation_deriv=DNNSpec.relu_deriv,
                             output_activation=DNNSpec.identity,
                             output_activation_deriv=DNNSpec.identity_deriv))
예제 #13
0
파일: merton.py 프로젝트: chsd001/CME241
    def get_actor_mu_spec(self, time_steps: int) -> FuncApproxSpec:
        tnu = self.get_nu()

        # noinspection PyShadowingNames
        def state_ff(state: Tuple[int, float], tnu=tnu) -> float:
            tte = self.expiry * (1. - float(state[0]) / time_steps)
            if tnu == 0:
                ret = 1. / (tte + self.epsilon)
            else:
                ret = tnu / (1. +
                             (tnu * self.epsilon - 1.) * np.exp(-tnu * tte))
            return ret

        return FuncApproxSpec(
            state_feature_funcs=[state_ff],
            sa_feature_funcs=[lambda x, state_ff=state_ff: state_ff(x[0])],
            dnn_spec=DNNSpec(neurons=[],
                             hidden_activation=DNNSpec.log_squish,
                             hidden_activation_deriv=DNNSpec.log_squish_deriv,
                             output_activation=DNNSpec.sigmoid,
                             output_activation_deriv=DNNSpec.sigmoid_deriv))
예제 #14
0
    def __init__(
            self, mdp_rep_for_rl: MDPRepForRLFA, exploring_start: bool,
            softmax: bool, epsilon: float, epsilon_half_life: float,
            num_episodes: int, batch_size: int, max_steps: int,
            state_feature_funcs: Sequence[Callable[[S], float]],
            sa_feature_funcs: Sequence[Callable[[Tuple[S, A]],
                                                float]]) -> None:

        super().__init__(mdp_rep_for_rl=mdp_rep_for_rl,
                         exploring_start=exploring_start,
                         softmax=softmax,
                         epsilon=epsilon,
                         epsilon_half_life=epsilon_half_life,
                         num_episodes=num_episodes,
                         max_steps=max_steps,
                         fa_spec=FuncApproxSpec(
                             state_feature_funcs=state_feature_funcs,
                             sa_feature_funcs=sa_feature_funcs,
                             dnn_spec=None,
                             reglr_coeff=0.,
                             learning_rate=0.,
                             adam_params=(False, 0., 0.),
                             add_unit_feature=True))
        self.batch_size: int = batch_size
예제 #15
0
 def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG,
              num_state_samples: int, num_next_state_samples: int,
              num_action_samples: int, num_batches: int, max_steps: int,
              actor_lambda: float, critic_lambda: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              vf_fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg
     self.num_state_samples: int = num_state_samples
     self.num_next_state_samples: int = num_next_state_samples
     self.num_action_samples: int = num_action_samples
     self.num_batches: int = num_batches
     self.max_steps: int = max_steps
     self.actor_lambda: float = actor_lambda
     self.critic_lambda: float = critic_lambda
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]
예제 #16
0
        }
    }
    gamma_val = 1.0
    mdp_ref_obj1 = MDPRefined(mdp_refined_data, gamma_val)
    mdp_rep_obj = mdp_ref_obj1.get_mdp_rep_for_rl_tabular()

    first_visit_flag = True
    softmax_flag = False
    episodes_limit = 10000
    epsilon_val = 0.1
    epsilon_half_life_val = 1000
    max_steps_val = 1000
    fa_spec_val = FuncApproxSpec(state_feature_funcs=[lambda s: float(s)],
                                 action_feature_funcs=[
                                     lambda a: 1. if a == 'a' else 0.,
                                     lambda a: 1. if a == 'b' else 0.,
                                     lambda a: 1. if a == 'c' else 0.,
                                 ],
                                 dnn_spec=None)
    mc_obj = MonteCarlo(mdp_rep_obj, first_visit_flag, softmax_flag,
                        epsilon_val, epsilon_half_life_val, episodes_limit,
                        max_steps_val, fa_spec_val)

    def policy_func(i: int) -> Mapping[str, float]:
        if i == 1:
            ret = {'a': 0.4, 'b': 0.6}
        elif i == 2:
            ret = {'a': 0.7, 'c': 0.3}
        elif i == 3:
            ret = {'b': 1.0}
        else:
예제 #17
0
    num_samples = 30
    this_softmax = True
    this_epsilon = 0.05
    this_epsilon_half_life = 30
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 3000
    this_max_steps = 1000
    this_tdl_fa_offline = True
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=FuncApproxBase.get_identity_feature_funcs(
            ic.lead_time + 1
        ),
        action_feature_funcs=[lambda x: x],
        dnn_spec=DNNSpec(
            neurons=[2, 4],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv
        )
    )

    raa = RunAllAlgorithms(
        mdp_refined=mdp_ref_obj,
        tolerance=this_tolerance,
        first_visit_mc=this_first_visit_mc,
        num_samples=num_samples,
        softmax=this_softmax,
        epsilon=this_epsilon,
        epsilon_half_life=this_epsilon_half_life,
        learning_rate=this_learning_rate,
예제 #18
0
    this_epsilon = 0.05
    this_epsilon_half_life = 100
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 1000
    this_max_steps = 1000
    this_td_offline = True
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=FuncApproxBase.get_indicator_feature_funcs(
            mdp_ref_obj.all_states),
        action_feature_funcs=FuncApproxBase.get_indicator_feature_funcs(
            {m.name
             for m in Move}),
        dnn_spec=None
        # dnn_spec=DNNSpec(
        #     neurons=[2, 4],
        #     hidden_activation=DNNSpec.relu,
        #     hidden_activation_deriv=DNNSpec.relu_deriv,
        #     output_activation=DNNSpec.identity,
        #     output_activation_deriv=DNNSpec.identity_deriv
        # )
    )

    raa = RunAllAlgorithms(mdp_refined=mdp_ref_obj,
                           tolerance=this_tolerance,
                           first_visit_mc=this_first_visit_mc,
                           num_samples=this_num_samples,
                           softmax=this_softmax,
                           epsilon=this_epsilon,
                           epsilon_half_life=this_epsilon_half_life,
예제 #19
0
    num_state_samples_val = 100
    num_next_state_samples_val = 25
    num_action_samples_val = 20
    num_batches_val = 100
    max_steps_val = 100
    actor_lambda_val = 0.95
    critic_lambda_val = 0.95
    state_ff = [
        lambda s: 1. if s == 1 else 0., lambda s: 1.
        if s == 2 else 0., lambda s: 1. if s == 3 else 0.
    ]
    vf_fa_spec_val = FuncApproxSpec(
        state_feature_funcs=state_ff,
        sa_feature_funcs=[(lambda x, f=f: f(x[0])) for f in state_ff],
        dnn_spec=DNNSpec(neurons=[2],
                         hidden_activation=DNNSpec.relu,
                         hidden_activation_deriv=DNNSpec.relu_deriv,
                         output_activation=DNNSpec.identity,
                         output_activation_deriv=DNNSpec.identity_deriv))
    pol_fa_spec_val = [
        FuncApproxSpec(
            state_feature_funcs=state_ff,
            sa_feature_funcs=[(lambda x, f=f: f(x[0])) for f in state_ff],
            dnn_spec=DNNSpec(neurons=[3],
                             hidden_activation=DNNSpec.relu,
                             hidden_activation_deriv=DNNSpec.relu_deriv,
                             output_activation=DNNSpec.sigmoid,
                             output_activation_deriv=DNNSpec.sigmoid_deriv))
    ]
    # noinspection PyPep8
    this_score_func = lambda a, p: [
예제 #20
0
    gamma_val = 0.9
    mdp_ref_obj1 = MDPRefined(mdp_refined_data, gamma_val)
    mdp_rep_obj = mdp_ref_obj1.get_mdp_rep_for_adp()

    num_samples_val = 100
    softmax_flag = False
    epsilon_val = 0.0
    epsilon_half_life_val = 30
    tol_val = 1e-4
    fa_spec_val = FuncApproxSpec(
        state_feature_funcs=[
            lambda s: 1. if s == 1 else 0.,
            lambda s: 1. if s == 2 else 0.,
            lambda s: 1. if s == 3 else 0.
        ],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(
            neurons=[2, 4],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv
        )
    )
    adp_obj = ADP(
        mdp_rep_for_adp=mdp_rep_obj,
        num_samples=num_samples_val,
        softmax=softmax_flag,
        epsilon=epsilon_val,
        epsilon_half_life=epsilon_half_life_val,
        tol=tol_val,
        fa_spec=fa_spec_val
    )
예제 #21
0
    this_num_episodes = 1000
    this_batch_size = 10
    this_max_steps = 1000
    this_td_offline = True
    state_ffs = FuncApproxBase.get_indicator_feature_funcs(
        mdp_ref_obj.all_states)
    sa_ffs = [(lambda x, f=f: f(x[0])) for f in state_ffs] +\
        [(lambda x, f=f: f(x[1])) for f in FuncApproxBase.get_indicator_feature_funcs(
            {m.name for m in Move}
        )]
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=state_ffs,
        sa_feature_funcs=sa_ffs,
        dnn_spec=None
        # dnn_spec=DNNSpec(
        #     neurons=[2, 4],
        #     hidden_activation=DNNSpec.relu,
        #     hidden_activation_deriv=DNNSpec.relu_deriv,
        #     output_activation=DNNSpec.identity,
        #     output_activation_deriv=DNNSpec.identity_deriv
        # )
    )

    raa = RunAllAlgorithms(mdp_refined=mdp_ref_obj,
                           tolerance=this_tolerance,
                           exploring_start=exploring_start,
                           first_visit_mc=this_first_visit_mc,
                           num_samples=this_num_samples,
                           softmax=this_softmax,
                           epsilon=this_epsilon,
                           epsilon_half_life=this_epsilon_half_life,
                           learning_rate=this_learning_rate,
예제 #22
0
                                     cons_util_func=util_func,
                                     beq_util_func=beq_util,
                                     discount_rate=rho)

    reinforce_val = True
    num_state_samples_val = 500
    num_next_state_samples_val = 30
    num_action_samples_val = 50
    num_batches_val = 3000
    actor_lambda_val = 0.99
    critic_lambda_val = 0.99

    actor_mu = FuncApproxSpec(
        state_feature_funcs=[],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(neurons=[],
                         hidden_activation=DNNSpec.log_squish,
                         hidden_activation_deriv=DNNSpec.log_squish_deriv,
                         output_activation=DNNSpec.sigmoid,
                         output_activation_deriv=DNNSpec.sigmoid_deriv))
    actor_nu = FuncApproxSpec(
        state_feature_funcs=[],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(neurons=[],
                         hidden_activation=DNNSpec.log_squish,
                         hidden_activation_deriv=DNNSpec.log_squish_deriv,
                         output_activation=DNNSpec.pos_log_squish,
                         output_activation_deriv=DNNSpec.pos_log_squish_deriv))
    actor_mean = FuncApproxSpec(state_feature_funcs=[],
                                action_feature_funcs=[],
                                dnn_spec=None)
    actor_variance = FuncApproxSpec(
예제 #23
0
 learning_rate_val = 0.1
 lambda_val = 0.7
 episodes_limit = 10000
 batch_size_val = 20
 max_steps_val = 1000
 offline_val = True
 state_ff = [lambda s: float(s)]
 sa_ff = [
     lambda x: float(x[0]),
     lambda x: 1. if x[1] == 'a' else 0.,
     lambda x: 1. if x[1] == 'b' else 0.,
     lambda x: 1. if x[1] == 'c' else 0.,
 ]
 fa_spec_val = FuncApproxSpec(
     state_feature_funcs=state_ff,
     sa_feature_funcs=sa_ff,
     dnn_spec=None,
     learning_rate=learning_rate_val
 )
 esl_obj = TDLambda(
     mdp_rep_obj,
     exploring_start_val,
     algorithm_type,
     softmax_flag,
     epsilon_val,
     epsilon_half_life_val,
     lambda_val,
     episodes_limit,
     batch_size_val,
     max_steps_val,
     fa_spec_val,
     offline_val
예제 #24
0
            }
        }
    }
    gamma_val = 0.9
    mdp_ref_obj1 = MDPRefined(mdp_refined_data, gamma_val)
    mdp_rep_obj = mdp_ref_obj1.get_mdp_rep_for_adp()

    num_state_samples_val = 100
    num_action_samples_val = 100
    tol_val = 1e-4
    vf_fa_spec_val = FuncApproxSpec(
        state_feature_funcs=[
            lambda s: 1. if s == 1 else 0., lambda s: 1.
            if s == 2 else 0., lambda s: 1. if s == 3 else 0.
        ],
        action_feature_funcs=[],
        dnn_spec=DNNSpec(neurons=[2, 4],
                         hidden_activation=DNNSpec.relu,
                         hidden_activation_deriv=DNNSpec.relu_deriv,
                         output_activation=DNNSpec.identity,
                         output_activation_deriv=DNNSpec.identity_deriv))
    pol_fa_spec_val = [
        FuncApproxSpec(state_feature_funcs=[
            lambda s: 1. if s == 1 else 0., lambda s: 1.
            if s == 2 else 0., lambda s: 1. if s == 3 else 0.
        ],
                       action_feature_funcs=[],
                       dnn_spec=DNNSpec(
                           neurons=[2, 4],
                           hidden_activation=DNNSpec.relu,
                           hidden_activation_deriv=DNNSpec.relu_deriv,
예제 #25
0
    this_first_visit_mc = True
    this_num_samples = 30
    this_softmax = False
    this_epsilon = 0.05
    this_epsilon_half_life = 100
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 10000
    this_max_steps = 1000
    this_td_offline = True
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=FuncApproxBase.get_indicator_feature_funcs(
            mdp_ref_obj.all_states),
        action_feature_funcs=FuncApproxBase.get_indicator_feature_funcs(
            {m.name
             for m in Move}),
        dnn_spec=DNNSpec(neurons=[2, 4],
                         hidden_activation=DNNSpec.relu,
                         hidden_activation_deriv=DNNSpec.relu_deriv))

    raa = RunAllAlgorithms(mdp_refined=mdp_ref_obj,
                           tolerance=this_tolerance,
                           first_visit_mc=this_first_visit_mc,
                           num_samples=this_num_samples,
                           softmax=this_softmax,
                           epsilon=this_epsilon,
                           epsilon_half_life=this_epsilon_half_life,
                           learning_rate=this_learning_rate,
                           learning_rate_decay=this_learning_rate_decay,
                           lambd=this_lambd,
예제 #26
0
    def get_rl_fa_price(self, num_dt: int, method: str, exploring_start: bool,
                        algorithm: TDAlgorithm, softmax: bool, epsilon: float,
                        epsilon_half_life: float, lambd: float, num_paths: int,
                        batch_size: int, feature_funcs: Sequence[
                            Callable[[Tuple[StateType, ActionType]],
                                     float]], neurons: Optional[Sequence[int]],
                        learning_rate: float, learning_rate_decay: float,
                        adam: Tuple[bool, float,
                                    float], offline: bool) -> float:
        dt = self.expiry / num_dt

        def sa_func(_: StateType) -> Set[ActionType]:
            return {True, False}

        # noinspection PyShadowingNames
        def terminal_state(s: StateType, num_dt=num_dt) -> bool:
            return s[0] > num_dt

        # noinspection PyShadowingNames
        def sr_func(s: StateType,
                    a: ActionType,
                    num_dt=num_dt) -> Tuple[StateType, float]:
            return self.state_reward_gen(s, a, num_dt)

        def init_s() -> StateType:
            return 0, np.array([self.spot_price])

        def init_sa() -> Tuple[StateType, ActionType]:
            return init_s(), choice([True, False])

        # noinspection PyShadowingNames
        mdp_rep_obj = MDPRepForRLFA(state_action_func=sa_func,
                                    gamma=1.,
                                    terminal_state_func=terminal_state,
                                    state_reward_gen_func=sr_func,
                                    init_state_gen=init_s,
                                    init_state_action_gen=init_sa)

        fa_spec = FuncApproxSpec(
            state_feature_funcs=[],
            sa_feature_funcs=feature_funcs,
            dnn_spec=(None if neurons is None else (DNNSpec(
                neurons=neurons,
                hidden_activation=DNNSpec.log_squish,
                hidden_activation_deriv=DNNSpec.log_squish_deriv,
                output_activation=DNNSpec.pos_log_squish,
                output_activation_deriv=DNNSpec.pos_log_squish_deriv))),
            learning_rate=learning_rate,
            adam_params=adam,
            add_unit_feature=False)

        if method == "MC":
            rl_fa_obj = MonteCarlo(mdp_rep_for_rl=mdp_rep_obj,
                                   exploring_start=exploring_start,
                                   softmax=softmax,
                                   epsilon=epsilon,
                                   epsilon_half_life=epsilon_half_life,
                                   num_episodes=num_paths,
                                   max_steps=num_dt + 2,
                                   fa_spec=fa_spec)
        elif method == "TD0":
            rl_fa_obj = TD0(mdp_rep_for_rl=mdp_rep_obj,
                            exploring_start=exploring_start,
                            algorithm=algorithm,
                            softmax=softmax,
                            epsilon=epsilon,
                            epsilon_half_life=epsilon_half_life,
                            num_episodes=num_paths,
                            max_steps=num_dt + 2,
                            fa_spec=fa_spec)
        elif method == "TDL":
            rl_fa_obj = TDLambda(mdp_rep_for_rl=mdp_rep_obj,
                                 exploring_start=exploring_start,
                                 algorithm=algorithm,
                                 softmax=softmax,
                                 epsilon=epsilon,
                                 epsilon_half_life=epsilon_half_life,
                                 lambd=lambd,
                                 num_episodes=num_paths,
                                 batch_size=batch_size,
                                 max_steps=num_dt + 2,
                                 fa_spec=fa_spec,
                                 offline=offline)
        else:
            rl_fa_obj = TDLambdaExact(mdp_rep_for_rl=mdp_rep_obj,
                                      exploring_start=exploring_start,
                                      algorithm=algorithm,
                                      softmax=softmax,
                                      epsilon=epsilon,
                                      epsilon_half_life=epsilon_half_life,
                                      lambd=lambd,
                                      num_episodes=num_paths,
                                      batch_size=batch_size,
                                      max_steps=num_dt + 2,
                                      state_feature_funcs=[],
                                      sa_feature_funcs=feature_funcs,
                                      learning_rate=learning_rate,
                                      learning_rate_decay=learning_rate_decay)

        qvf = rl_fa_obj.get_qv_func_fa(None)
        # init_s = (0, np.array([self.spot_price]))
        # val_exec = qvf(init_s)(True)
        # val_cont = qvf(init_s)(False)
        # true_false_spot_max = max(val_exec, val_cont)

        all_paths = self.get_all_paths(num_paths, num_dt + 1)
        prices = np.zeros(num_paths)

        for path_num, path in enumerate(all_paths):
            steps = 0
            price_seq = np.array([])
            while steps <= num_dt:
                price_seq = np.append(price_seq, path[steps])
                state = (steps, price_seq)
                exercise_price = np.exp(-self.ir(dt * steps)) *\
                    self.payoff(dt * steps, price_seq)
                continue_price = qvf(state)(False)
                steps += 1
                if exercise_price > continue_price:
                    prices[path_num] = exercise_price
                    steps = num_dt + 1
                    # print(state)
                    # print(exercise_price)
                    # print(continue_price)
                    # print(qvf(state)(True))

        return np.average(prices)
예제 #27
0
파일: merton.py 프로젝트: chsd001/CME241
 def get_actor_mean_spec() -> FuncApproxSpec:
     return FuncApproxSpec(state_feature_funcs=[],
                           sa_feature_funcs=[],
                           dnn_spec=None)