def __init__(self, gamma=0.95, threshold=0): ValueBasedLearner.__init__(self) self.num_features = 5 self.num_actions = 1 self.sigma = 1.0 self.kern_c = 10 self.kern_sigma = 0.5 self.thresh = threshold self.gamma = gamma self.laststate = None self.lastaction = None self.lastreward = None self.state_dict = None self.cum_reward = np.array([]) self.u_tilde = np.array([]) self.C_tilde = np.array([[]]) self.d = 0.0 self.v_inv = 0.0 self.c_tild = np.array([]) self.dataset = None #self.g=np.array([]) self.K_inv = np.array([[]])
def __init__(self, alpha=0.5, gamma=0.99): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = gamma self.laststate = None self.lastaction = None
def __init__(self, alpha=0.5, gamma=0.99, neg_reward=False): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = gamma self.neg_reward = neg_reward self.laststate = None self.lastactions = None
def __init__(self, alpha=1.0, w=1.0, gamma=0.99, iters=10): ValueBasedLearner.__init__(self) self.alpha = alpha # step scale self.w = w # learning rate self.gamma = gamma # temporal discount self.iters = 10 # number of times to propagate value changes self.step = 0
def __init__(self, maxEpochs=20, indexOfAgent=None,): ValueBasedLearner.__init__(self) self.gamma = 0.9 self.maxEpochs = maxEpochs # self.ownerAgentProperties["requireOtherAgentsState"]=False self.ownerAgentProperties["requireJointAction"]=False self.ownerAgentProperties["requireJointReward"]=False self.isFirstLerning=True
def __init__(self, num_actions, num_features, **kwargs): ValueBasedLearner.__init__(self) setAllArgs(self, kwargs) self.explorer = None self.num_actions = num_actions self.num_features = num_features if self.randomInit: self._theta = randn(self.num_actions, self.num_features) / 10. else: self._theta = zeros((self.num_actions, self.num_features)) self._additionalInit() self._behaviorPolicy = self._boltzmannPolicy self.reset()
def __init__( self, maxEpochs=20, indexOfAgent=None, ): ValueBasedLearner.__init__(self) self.gamma = 0.9 self.maxEpochs = maxEpochs # self.ownerAgentProperties["requireOtherAgentsState"] = False self.ownerAgentProperties["requireJointAction"] = False self.ownerAgentProperties["requireJointReward"] = False self.isFirstLerning = True
def __init__(self, gamma=0.99): ValueBasedLearner.__init__(self) self.gamma = gamma self.laststate = None self.lastaction = None self.num_features = 2 self.num_actions = 1 self.kern_c = 10 self.covariance_mat = np.array([[]]) self.inv = np.array([]) self.state_dict = None self.cum_reward = np.array([]) self.H = [] self.kern_sigma = 0.2 self.dataset = None self.sigma = 1
def __init__(self, maxEpochs=20): ValueBasedLearner.__init__(self) self.gamma = 0.9 self.maxEpochs = maxEpochs
def __init__(self, indexOfAgent=None, **kwargs): ValueBasedLearner.__init__(self) self.indexOfAgent=indexOfAgent
def __init__(self, alpha=0.5): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = config.getf('gammaDiscountReward') self.netManager = CaffeMultiLayerPerceptronManagement(config.get('networkDir'))
def __init__(self, alpha=0.5, gamma=0.99): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = gamma self.netManager = cnm.CaffeConvNetManagement(config.get('networkDir'))
def __init__(self, indexOfAgent=None, **kwargs): ValueBasedLearner.__init__(self) self.indexOfAgent = indexOfAgent
def __init__(self): ValueBasedLearner.__init__(self) self.gamma = 0.9
def __init__(self, alpha=0.5): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = config.getf('gammaDiscountReward') self.netManager = CaffeMultiLayerPerceptronManagement( config.get('networkDir'))