Esempio n. 1
0
    def __init__(self, gamma=0.95, threshold=0):
        ValueBasedLearner.__init__(self)
        self.num_features = 5
        self.num_actions = 1

        self.sigma = 1.0
        self.kern_c = 10
        self.kern_sigma = 0.5

        self.thresh = threshold
        self.gamma = gamma

        self.laststate = None
        self.lastaction = None
        self.lastreward = None

        self.state_dict = None
        self.cum_reward = np.array([])
        self.u_tilde = np.array([])
        self.C_tilde = np.array([[]])
        self.d = 0.0
        self.v_inv = 0.0
        self.c_tild = np.array([])
        self.dataset = None
        #self.g=np.array([])
        self.K_inv = np.array([[]])
Esempio n. 2
0
    def __init__(self, alpha=0.5, gamma=0.99):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha
        self.gamma = gamma

        self.laststate = None
        self.lastaction = None
Esempio n. 3
0
    def __init__(self, alpha=0.5, gamma=0.99):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha
        self.gamma = gamma

        self.laststate = None
        self.lastaction = None
    def __init__(self, alpha=0.5, gamma=0.99, neg_reward=False):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha
        self.gamma = gamma
        self.neg_reward = neg_reward

        self.laststate = None
        self.lastactions = None
 def __init__(self, maxEpochs=20, indexOfAgent=None,):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
     #
     self.ownerAgentProperties["requireOtherAgentsState"]=False
     self.ownerAgentProperties["requireJointAction"]=False
     self.ownerAgentProperties["requireJointReward"]=False
     self.isFirstLerning=True
Esempio n. 6
0
    def __init__(self, alpha=1.0, w=1.0, gamma=0.99, iters=10):
        ValueBasedLearner.__init__(self)

        self.alpha = alpha  # step scale
        self.w = w  # learning rate
        self.gamma = gamma  # temporal discount
        self.iters = 10  # number of times to propagate value changes

        self.step = 0
Esempio n. 7
0
 def __init__(self, num_actions, num_features, **kwargs):
     ValueBasedLearner.__init__(self)
     setAllArgs(self, kwargs)
     self.explorer = None        
     self.num_actions = num_actions
     self.num_features = num_features
     if self.randomInit:
         self._theta = randn(self.num_actions, self.num_features) / 10.
     else:
         self._theta = zeros((self.num_actions, self.num_features))
     self._additionalInit()
     self._behaviorPolicy = self._boltzmannPolicy
     self.reset()
Esempio n. 8
0
 def __init__(self, num_actions, num_features, **kwargs):
     ValueBasedLearner.__init__(self)
     setAllArgs(self, kwargs)
     self.explorer = None
     self.num_actions = num_actions
     self.num_features = num_features
     if self.randomInit:
         self._theta = randn(self.num_actions, self.num_features) / 10.
     else:
         self._theta = zeros((self.num_actions, self.num_features))
     self._additionalInit()
     self._behaviorPolicy = self._boltzmannPolicy
     self.reset()
Esempio n. 9
0
 def __init__(
     self,
     maxEpochs=20,
     indexOfAgent=None,
 ):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
     #
     self.ownerAgentProperties["requireOtherAgentsState"] = False
     self.ownerAgentProperties["requireJointAction"] = False
     self.ownerAgentProperties["requireJointReward"] = False
     self.isFirstLerning = True
Esempio n. 10
0
    def __init__(self, gamma=0.99):
        ValueBasedLearner.__init__(self)

        self.gamma = gamma

        self.laststate = None
        self.lastaction = None

        self.num_features = 2
        self.num_actions = 1
        self.kern_c = 10

        self.covariance_mat = np.array([[]])
        self.inv = np.array([])
        self.state_dict = None
        self.cum_reward = np.array([])
        self.H = []
        self.kern_sigma = 0.2
        self.dataset = None
        self.sigma = 1
Esempio n. 11
0
 def reset(self):
     ValueBasedLearner.reset(self)
     self._callcount = 0
     self.newEpisode()
 def __init__(self, indexOfAgent=None, **kwargs):
     ValueBasedLearner.__init__(self)
     self.indexOfAgent = indexOfAgent
Esempio n. 13
0
File: q.py Progetto: ikarpov/pybrain
 def reset(self):
     ValueBasedLearner.reset(self)
     self.laststate = None
     self.lastaction = None
Esempio n. 14
0
 def __init__(self, maxEpochs=20):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
Esempio n. 15
0
 def __init__(self, alpha=0.5):
   ValueBasedLearner.__init__(self)
   self.alpha = alpha
   self.gamma = config.getf('gammaDiscountReward')
   self.netManager = CaffeMultiLayerPerceptronManagement(config.get('networkDir'))
Esempio n. 16
0
 def reset(self):        
     ValueBasedLearner.reset(self)        
     self._callcount = 0
     self.newEpisode()
Esempio n. 17
0
 def newEpisode(self):
     ValueBasedLearner.newEpisode(self)
     self._callcount += 1
     self.learningRate *= ((self.learningRateDecay + self._callcount) /
                           (self.learningRateDecay + self._callcount + 1.))
Esempio n. 18
0
 def __init__(self):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
Esempio n. 19
0
 def __init__(self, alpha=0.5):
     ValueBasedLearner.__init__(self)
     self.alpha = alpha
     self.gamma = config.getf('gammaDiscountReward')
     self.netManager = CaffeMultiLayerPerceptronManagement(
         config.get('networkDir'))
 def __init__(self, indexOfAgent=None, **kwargs):
     ValueBasedLearner.__init__(self)
     self.indexOfAgent=indexOfAgent
Esempio n. 21
0
 def __init__(self, alpha=0.5, gamma=0.99):
     ValueBasedLearner.__init__(self)
     self.alpha = alpha
     self.gamma = gamma
     self.netManager = cnm.CaffeConvNetManagement(config.get('networkDir'))
Esempio n. 22
0
 def __init__(self, maxEpochs=20):
     ValueBasedLearner.__init__(self)
     self.gamma = 0.9
     self.maxEpochs = maxEpochs
Esempio n. 23
0
 def newEpisode(self):  
     ValueBasedLearner.newEpisode(self)      
     self._callcount += 1
     self.learningRate *= ((self.learningRateDecay + self._callcount) 
                           / (self.learningRateDecay + self._callcount + 1.))
Esempio n. 24
0
    epis=10000/batch #number of roleouts
    
    numbExp=1 #number of experiments

    env = None
    for runs in range(numbExp):
        # create environment
        #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
        if env != None: env.closeSocket()
        env = Tetra2Environment() 
        # create task
        task = WalkForwardTask(env)

        # create automatic programmer module for the robot (it will be received already created from the robot)
        channels_setup = [(1 if i!=2 else 0,1, (0,255), 0) for i in range(5)]
        
        programmer = Programmer(steps = 1000, channels_setup = channels_setup, types_subset = [1]) #, 2, 3])

        learner = ValueBasedLearner()
        agent = TetrapodAgent(programmer, learner)

        # create the experiment
        experiment = EpisodicExperiment(task, agent)

        for _ in range(epis):
            experiment.doEpisodes(batch)
            agent.learn(total_reward = task.getTotalReward())