Example #1
0
File: pbo.py Project: teopir/ifqi
    def fit(self, sast, r):
        """
        Perform a run of PBO using input data sast and r.
        Note that if the dataset does not change between iterations, you can
        provide None inputs after the first iteration.

        Args:
            sast (numpy.array, None): the input in the dataset
            r (numpy.array, None): the output in the dataset
            **kwargs: additional parameters to be provided to the fit function
            of the estimator

        Returns:
            the history of the parameters used to update the q regressor
        """
        self.iteration_best_rho_value = np.inf

        next_states_idx = self.state_dim + self.action_dim
        self._sa = sast[:, :next_states_idx]
        self._snext = sast[:, next_states_idx:-1]
        self._absorbing = sast[:, -1]
        self._r = r

        optimizer = ExactNES(self._fitness, self._get_rho(),
                             minimize=True, batchSize=self._batch_size,
                             learningRate=self._learning_rate,
                             maxLearningSteps=self._learning_steps - 1,
                             importanceMixing=False,
                             maxEvaluations=None)
        optimizer.listener = self.my_listener
        optimizer.learn()
        self._q_weights_list.append(self._get_q_weights())

        return self._q_weights_list
def experiment3():
  l = ExactNES(fitnessFunction, myNetwork.params)
  l.minimize = True
  l.verbose = True
  l.maxLearningSteps = 1000
  params, fitness = l.learn()
  myNetwork._setParameters(params)
  logNet()
Example #3
0
      def perform_gradient_descent(self,chromosome):
	  from pybrain.optimization import ExactNES, OriginalNES
	  inp = numpy.array([v for v in chromosome])
	  
	  if self.num_eval != 0:
		#bf = BoundsSafeFunction(self.func,self.bounds)
		l = ExactNES(objF, inp[:],rangemins=self.mins,rangemaxs=self.maxs,learningRate=0.01,initCovariances=numpy.eye(len(bounds))*0.1)
		l.minimize = True
		l.maxEvaluations = self.num_eval
		#l.rangemins = self.mins
		#l.rangemaxs = self.maxs
		(new_K,success) = l.learn()
	  	for i in xrange(0,len(chromosome)):
	  		chromosome[i] = new_K[i]
		score = objF(numpy.array(new_K))			
	  return score
Example #4
0
    def fit(self, sast=None, r=None):
        if sast is not None:
            next_states_idx = self.state_dim + self.action_dim
            self._sa = sast[:, :next_states_idx]
            self._snext = sast[:, next_states_idx:-1]
            self._absorbing = sast[:, -1]
        if r is not None:
            self._r = r

        old_theta = self._estimator._regressor.theta

        self._optimizer = ExactNES(self._fitness,
                                   self._get_rho(),
                                   minimize=True,
                                   batchSize=100)

        rho, score = self._optimizer.learn()
        self._estimator._regressor.theta = self._f(rho)

        self._iteration += 1

        return (self._estimator._regressor.theta,
                np.sum(self._estimator._regressor.theta - old_theta)**2)
Example #5
0
    def fit(self, sast, r):
        """
        Perform a run of PBO using input data sast and r.
        Note that if the dataset does not change between iterations, you can
        provide None inputs after the first iteration.

        Args:
            sast (numpy.array, None): the input in the dataset
            r (numpy.array, None): the output in the dataset
            **kwargs: additional parameters to be provided to the fit function
            of the estimator

        Returns:
            the history of the parameters used to update the q regressor
        """
        self.iteration_best_rho_value = np.inf

        next_states_idx = self.state_dim + self.action_dim
        self._sa = sast[:, :next_states_idx]
        self._snext = sast[:, next_states_idx:-1]
        self._absorbing = sast[:, -1]
        self._r = r

        optimizer = ExactNES(self._fitness,
                             self._get_rho(),
                             minimize=True,
                             batchSize=self._batch_size,
                             learningRate=self._learning_rate,
                             maxLearningSteps=self._learning_steps - 1,
                             importanceMixing=False,
                             maxEvaluations=None)
        optimizer.listener = self.my_listener
        optimizer.learn()
        self._q_weights_list.append(self._get_q_weights())

        return self._q_weights_list
Example #6
0
from pybrain.optimization import ExactNES
from pybrain.rl.experiments import EpisodicExperiment

batch = 2  #number of samples per learning step
prnts = 100  #number of learning steps after results are printed
epis = 4000 / batch / prnts  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, ExactNES(storeAllEvaluations=True))
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        print "Epsilon   : ", agent.learner.sigma
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
Example #7
0
def learn(obj_fun, init_values):
    l = ExactNES(obj_fun, init_values, minimize=True, verbose=True)
    res = l.learn()
    return res[0]
Example #8
0
class PBO(Algorithm):
    def __init__(self,
                 estimator,
                 state_dim,
                 action_dim,
                 discrete_actions,
                 gamma,
                 horizon,
                 features=None,
                 verbose=False):
        self._regressor_rho = Sequential()
        self._regressor_rho.add(Dense(30, input_shape=(2, ),
                                      activation='relu'))
        self._regressor_rho.add(Dense(2, activation='linear'))
        self._regressor_rho.compile(optimizer='rmsprop', loss='mse')

        super(PBO, self).__init__(estimator, state_dim, action_dim,
                                  discrete_actions, gamma, horizon, features,
                                  verbose)

    def fit(self, sast=None, r=None):
        if sast is not None:
            next_states_idx = self.state_dim + self.action_dim
            self._sa = sast[:, :next_states_idx]
            self._snext = sast[:, next_states_idx:-1]
            self._absorbing = sast[:, -1]
        if r is not None:
            self._r = r

        old_theta = self._estimator._regressor.theta

        self._optimizer = ExactNES(self._fitness,
                                   self._get_rho(),
                                   minimize=True,
                                   batchSize=100)

        rho, score = self._optimizer.learn()
        self._estimator._regressor.theta = self._f(rho)

        self._iteration += 1

        return (self._estimator._regressor.theta,
                np.sum(self._estimator._regressor.theta - old_theta)**2)

    def _fitness(self, rho):
        Q = self._estimator.predict(self._sa, f_rho=self._f(rho))
        maxQ, _ = self.maxQA(self._snext, self._absorbing)

        return np.mean((Q - self._r - self.gamma * maxQ)**2)

    def _f(self, rho):
        self._set_rho(rho)
        output = self._regressor_rho.predict(np.array(
            [self._estimator._regressor.theta]),
                                             batch_size=1).ravel()

        return output

    def _get_rho(self):
        rho = self._regressor_rho.get_weights()
        r = list()
        for i in rho:
            r += i.ravel().tolist()

        return np.array(r)

    def _set_rho(self, rho):
        weights = list()
        rho = rho.tolist()
        for l in self._regressor_rho.layers:
            w = l.get_weights()[0]
            b = l.get_weights()[1]
            weights.append(np.array(rho[:w.size]).reshape(w.shape))
            del rho[:w.size]
            weights.append(np.array(rho[:b.size]))
            del rho[:b.size]

        self._regressor_rho.set_weights(weights)