# print 'added feature' self.addNewWeight() addedFeature = True self.features_num += 1 self.logger.log('Added feature. \t %d total feats' % self.features_num) else: break return addedFeature def featureType(self): return float if __name__ == '__main__': STDOUT_FILE = 'out.txt' JOB_ID = 1 OUT_PATH = 'Results/Temp' # logger = Logger('%s/%d-%s'%(OUT_PATH,JOB_ID,STDOUT_FILE)) logger = Logger() discovery_threshold = 1 domain = GridWorld() rep = BEBF(domain,logger,debug=1,batchThreshold = 10 ** -5) rep.theta = arange(rep.features_num*domain.actions_num)*10 print 'initial features' print rep.features_num,'---',rep.features s = domain.s0() a = domain.possibleActions(s) a = a[0] r,ns,terminal = domain.step(s, a) print 'step 2 r,ns',r,ns
r = v / v.sum() assert not np.any(np.isnan(r)) return r if __name__ == "__main__": # Finite differences check of dlogpi from Domains import GridWorld from Representations import Tabular from scipy.optimize import check_grad, approx_fprime MAZE = "./Domains/GridWorldMaps/4x5.txt" NOISE = 0.3 logger = Logger() domain = GridWorld(MAZE, noise=NOISE, logger=logger) representation = Tabular(logger=logger, domain=domain, discretization=20) policy = GibbsPolicy(representation=representation, logger=logger) def f(theta, s, a): policy.representation.theta = theta return np.log(policy.prob(s, a)) def df(theta, s, a): policy.representation.theta = theta return policy.dlogpi(s, a) def df_approx(theta, s, a): return approx_fprime(theta, f, 1e-10, s, a) thetas = np.random.rand(10, len(representation.theta))