def __init__(self, act_spec: Spec, obs_spec: Spec, alpha=0.5, epsilon=0.1, gamma=1.0, n=5, kappa=0): AgentProgram.__init__(self, act_spec, obs_spec) self.alpha = alpha self.epsilon = epsilon self.gamma = gamma self.kappa = kappa self.n = n self.q = np.zeros(shape=(obs_spec.size(), act_spec.size())) self.sample = cache_gen( lambda: np.random.randint(0, act_spec.size(), 1000)) if self.kappa: self.model = TimeModel(act_spec, obs_spec, kappa) else: self.model = Model(act_spec, obs_spec) self.s = None self.a = None
def __init__(self, act_spec: Spec, obs_spec: Spec, alpha=0.5, epsilon=0.1, gamma=1.0): AgentProgram.__init__(self, act_spec, obs_spec) self.alpha = alpha self.epsilon = epsilon self.gamma = gamma self.q = np.zeros(shape=(obs_spec.size(), act_spec.size())) self.sample = cache_gen( lambda: np.random.randint(0, act_spec.size(), 1000)) self.s = None self.a = None
def __init__(self, act_spec: Spec, obs_spec: Spec, alpha=0.5, epsilon=0.1, gamma=1.0, n=5, kappa=0): DynaQ.__init__(self, act_spec, obs_spec, alpha, epsilon, gamma, n, kappa) self.model = Model(act_spec, obs_spec) self.t = 0 self.tau = np.ones(shape=(obs_spec.size(), act_spec.size()), dtype=np.int32)
def __init__(self, act_spec: Spec, obs_spec: Spec, alpha_w=0.5, alpha_theta=0.5, gamma=1.0, lambda_w=0.9, lambda_theta=0.9): AgentProgram.__init__(self, act_spec, obs_spec) # critic self.w = np.zeros(self.MAX_SIZE) self.alpha_w = alpha_w self.lambda_w = lambda_w self.z_w = np.zeros(self.MAX_SIZE) self.step_size_w = alpha_w / self.N_TILINGS # actor self.theta = np.zeros(self.MAX_SIZE) self.alpha_theta = alpha_theta self.lambda_theta = lambda_theta self.z_theta = np.zeros(self.MAX_SIZE) self.step_size_theta = alpha_theta / self.N_TILINGS self.gamma = gamma self.hashtable = IHT(self.MAX_SIZE) self.scales = [self.N_TILINGS / (s.hi - s.lo) for s in obs_spec] self._a = np.arange(act_spec.size()).tolist() self.s0 = None self.a0 = None self.I = 1
def __init__(self, act_spec: Spec, obs_spec: Spec, n=2, alpha=0.5, epsilon=0.1, gamma=1.0): AgentProgram.__init__(self, act_spec, obs_spec) self.alpha = alpha self.epsilon = epsilon self.gamma = gamma self.n = n self.q = np.zeros(shape=(obs_spec.size(), act_spec.size()), dtype=np.float64) self.actions = np.arange(act_spec.size()) self.pi = e_greedy_policy( np.zeros(shape=(obs_spec.size(), act_spec.size()), dtype=np.float64), self.q, self.epsilon) self._t = 0 self._T = float('inf') self._S = [] self._A = [] self._R = [] self._tau = 0
def __init__(self, act_spec: Spec, obs_spec: Spec, alpha=0.5, epsilon=0.1, gamma=1.0): AgentProgram.__init__(self, act_spec, obs_spec) self.alpha = alpha self.epsilon = epsilon self.gamma = gamma self.hashtable = IHT(self.MAX_SIZE) self.w = np.zeros(self.MAX_SIZE) self.scales = [self.N_TILINGS / (s.hi - s.lo) for s in obs_spec] self.step_size = alpha / self.N_TILINGS self.sample = cache_gen( lambda: np.random.randint(0, act_spec.size(), 1000)) self.s = None self.a = None
def __init__(self, act_spec: Spec, obs_spec: Spec, pi): AgentProgram.__init__(self, act_spec, obs_spec) self.pi = pi self.actions = np.arange(act_spec.size())