コード例 #1
0
ファイル: dyna.py プロジェクト: ocraft/rl-sandbox
    def __init__(self,
                 act_spec: Spec,
                 obs_spec: Spec,
                 alpha=0.5,
                 epsilon=0.1,
                 gamma=1.0,
                 n=5,
                 kappa=0):
        AgentProgram.__init__(self, act_spec, obs_spec)
        self.alpha = alpha
        self.epsilon = epsilon
        self.gamma = gamma
        self.kappa = kappa
        self.n = n
        self.q = np.zeros(shape=(obs_spec.size(), act_spec.size()))
        self.sample = cache_gen(
            lambda: np.random.randint(0, act_spec.size(), 1000))

        if self.kappa:
            self.model = TimeModel(act_spec, obs_spec, kappa)
        else:
            self.model = Model(act_spec, obs_spec)

        self.s = None
        self.a = None
コード例 #2
0
    def __init__(self, act_spec: Spec, obs_spec: Spec, alpha=0.5, epsilon=0.1,
                 gamma=1.0):
        AgentProgram.__init__(self, act_spec, obs_spec)
        self.alpha = alpha
        self.epsilon = epsilon
        self.gamma = gamma
        self.q = np.zeros(shape=(obs_spec.size(), act_spec.size()))
        self.sample = cache_gen(
            lambda: np.random.randint(0, act_spec.size(), 1000))

        self.s = None
        self.a = None
コード例 #3
0
ファイル: dyna.py プロジェクト: ocraft/rl-sandbox
 def __init__(self,
              act_spec: Spec,
              obs_spec: Spec,
              alpha=0.5,
              epsilon=0.1,
              gamma=1.0,
              n=5,
              kappa=0):
     DynaQ.__init__(self, act_spec, obs_spec, alpha, epsilon, gamma, n,
                    kappa)
     self.model = Model(act_spec, obs_spec)
     self.t = 0
     self.tau = np.ones(shape=(obs_spec.size(), act_spec.size()),
                        dtype=np.int32)
コード例 #4
0
ファイル: linear.py プロジェクト: ocraft/rl-sandbox
    def __init__(self,
                 act_spec: Spec,
                 obs_spec: Spec,
                 alpha_w=0.5,
                 alpha_theta=0.5,
                 gamma=1.0,
                 lambda_w=0.9,
                 lambda_theta=0.9):
        AgentProgram.__init__(self, act_spec, obs_spec)

        # critic
        self.w = np.zeros(self.MAX_SIZE)
        self.alpha_w = alpha_w
        self.lambda_w = lambda_w
        self.z_w = np.zeros(self.MAX_SIZE)
        self.step_size_w = alpha_w / self.N_TILINGS

        # actor
        self.theta = np.zeros(self.MAX_SIZE)
        self.alpha_theta = alpha_theta
        self.lambda_theta = lambda_theta
        self.z_theta = np.zeros(self.MAX_SIZE)
        self.step_size_theta = alpha_theta / self.N_TILINGS

        self.gamma = gamma
        self.hashtable = IHT(self.MAX_SIZE)
        self.scales = [self.N_TILINGS / (s.hi - s.lo) for s in obs_spec]
        self._a = np.arange(act_spec.size()).tolist()

        self.s0 = None
        self.a0 = None
        self.I = 1
コード例 #5
0
 def __init__(self, act_spec: Spec, obs_spec: Spec, n=2,
              alpha=0.5, epsilon=0.1, gamma=1.0):
     AgentProgram.__init__(self, act_spec, obs_spec)
     self.alpha = alpha
     self.epsilon = epsilon
     self.gamma = gamma
     self.n = n
     self.q = np.zeros(shape=(obs_spec.size(), act_spec.size()),
                       dtype=np.float64)
     self.actions = np.arange(act_spec.size())
     self.pi = e_greedy_policy(
         np.zeros(shape=(obs_spec.size(), act_spec.size()),
                  dtype=np.float64),
         self.q, self.epsilon)
     self._t = 0
     self._T = float('inf')
     self._S = []
     self._A = []
     self._R = []
     self._tau = 0
コード例 #6
0
ファイル: linear.py プロジェクト: ocraft/rl-sandbox
    def __init__(self,
                 act_spec: Spec,
                 obs_spec: Spec,
                 alpha=0.5,
                 epsilon=0.1,
                 gamma=1.0):
        AgentProgram.__init__(self, act_spec, obs_spec)
        self.alpha = alpha
        self.epsilon = epsilon
        self.gamma = gamma
        self.hashtable = IHT(self.MAX_SIZE)
        self.w = np.zeros(self.MAX_SIZE)
        self.scales = [self.N_TILINGS / (s.hi - s.lo) for s in obs_spec]
        self.step_size = alpha / self.N_TILINGS

        self.sample = cache_gen(
            lambda: np.random.randint(0, act_spec.size(), 1000))

        self.s = None
        self.a = None
コード例 #7
0
ファイル: test.py プロジェクト: ocraft/rl-sandbox
 def __init__(self, act_spec: Spec, obs_spec: Spec, pi):
     AgentProgram.__init__(self, act_spec, obs_spec)
     self.pi = pi
     self.actions = np.arange(act_spec.size())