예제 #1
0
    def __init__(self, gamma=0.95, threshold=0):
        ValueBasedLearner.__init__(self)
        self.num_features = 5
        self.num_actions = 1

        self.sigma = 1.0
        self.kern_c = 10
        self.kern_sigma = 0.5

        self.thresh = threshold
        self.gamma = gamma

        self.laststate = None
        self.lastaction = None
        self.lastreward = None

        self.state_dict = None
        self.cum_reward = np.array([])
        self.u_tilde = np.array([])
        self.C_tilde = np.array([[]])
        self.d = 0.0
        self.v_inv = 0.0
        self.c_tild = np.array([])
        self.dataset = None
        #self.g=np.array([])
        self.K_inv = np.array([[]])
예제 #2
0
    def __init__(self, alpha=0.5, gamma=0.99):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha
        self.gamma = gamma

        self.laststate = None
        self.lastaction = None
예제 #3
0
    def __init__(self, alpha=0.5, gamma=0.99):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha
        self.gamma = gamma

        self.laststate = None
        self.lastaction = None
예제 #4
0
    def __init__(self, alpha=0.5, gamma=0.99, neg_reward=False):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha
        self.gamma = gamma
        self.neg_reward = neg_reward

        self.laststate = None
        self.lastactions = None
예제 #5
0
    def __init__(self, alpha=1.0, w=1.0, gamma=0.99, iters=10):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha  # step scale
        self.w = w  # learning rate
        self.gamma = gamma  # temporal discount
        self.iters = 10  # number of times to propagate value changes

        self.step = 0
 def __init__(self, maxEpochs=20, indexOfAgent=None,):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
     #
     self.ownerAgentProperties["requireOtherAgentsState"]=False
     self.ownerAgentProperties["requireJointAction"]=False
     self.ownerAgentProperties["requireJointReward"]=False
     self.isFirstLerning=True
예제 #7
0
 def __init__(self, num_actions, num_features, **kwargs):
     ValueBasedLearner.__init__(self)
     setAllArgs(self, kwargs)
     self.explorer = None        
     self.num_actions = num_actions
     self.num_features = num_features
     if self.randomInit:
         self._theta = randn(self.num_actions, self.num_features) / 10.
     else:
         self._theta = zeros((self.num_actions, self.num_features))
     self._additionalInit()
     self._behaviorPolicy = self._boltzmannPolicy
     self.reset()
예제 #8
0
 def __init__(self, num_actions, num_features, **kwargs):
     ValueBasedLearner.__init__(self)
     setAllArgs(self, kwargs)
     self.explorer = None
     self.num_actions = num_actions
     self.num_features = num_features
     if self.randomInit:
         self._theta = randn(self.num_actions, self.num_features) / 10.
     else:
         self._theta = zeros((self.num_actions, self.num_features))
     self._additionalInit()
     self._behaviorPolicy = self._boltzmannPolicy
     self.reset()
예제 #9
0
 def __init__(
     self,
     maxEpochs=20,
     indexOfAgent=None,
 ):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
     #
     self.ownerAgentProperties["requireOtherAgentsState"] = False
     self.ownerAgentProperties["requireJointAction"] = False
     self.ownerAgentProperties["requireJointReward"] = False
     self.isFirstLerning = True
예제 #10
0
    def __init__(self, gamma=0.99):
        ValueBasedLearner.__init__(self)

        self.gamma = gamma

        self.laststate = None
        self.lastaction = None

        self.num_features = 2
        self.num_actions = 1
        self.kern_c = 10

        self.covariance_mat = np.array([[]])
        self.inv = np.array([])
        self.state_dict = None
        self.cum_reward = np.array([])
        self.H = []
        self.kern_sigma = 0.2
        self.dataset = None
        self.sigma = 1
예제 #11
0
 def __init__(self, maxEpochs=20):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
 def __init__(self, indexOfAgent=None, **kwargs):
     ValueBasedLearner.__init__(self)
     self.indexOfAgent=indexOfAgent
예제 #13
0
 def __init__(self, alpha=0.5):
   ValueBasedLearner.__init__(self)
   self.alpha = alpha
   self.gamma = config.getf('gammaDiscountReward')
   self.netManager = CaffeMultiLayerPerceptronManagement(config.get('networkDir'))
예제 #14
0
파일: nfq.py 프로젝트: DanSGraham/code
 def __init__(self, maxEpochs=20):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
예제 #15
0
 def __init__(self, alpha=0.5, gamma=0.99):
     ValueBasedLearner.__init__(self)
     self.alpha = alpha
     self.gamma = gamma
     self.netManager = cnm.CaffeConvNetManagement(config.get('networkDir'))
 def __init__(self, indexOfAgent=None, **kwargs):
     ValueBasedLearner.__init__(self)
     self.indexOfAgent = indexOfAgent
예제 #17
0
 def __init__(self):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
예제 #18
0
 def __init__(self, alpha=0.5):
     ValueBasedLearner.__init__(self)
     self.alpha = alpha
     self.gamma = config.getf('gammaDiscountReward')
     self.netManager = CaffeMultiLayerPerceptronManagement(
         config.get('networkDir'))