# print 'added feature' self.addNewWeight() addedFeature = True self.features_num += 1 self.logger.log('Added feature. \t %d total feats' % self.features_num) else: break return addedFeature def featureType(self): return float if __name__ == '__main__': STDOUT_FILE = 'out.txt' JOB_ID = 1 OUT_PATH = 'Results/Temp' # logger = Logger('%s/%d-%s'%(OUT_PATH,JOB_ID,STDOUT_FILE)) logger = Logger() discovery_threshold = 1 domain = GridWorld() rep = BEBF(domain,logger,debug=1,batchThreshold = 10 ** -5) rep.theta = arange(rep.features_num*domain.actions_num)*10 print 'initial features' print rep.features_num,'---',rep.features s = domain.s0() a = domain.possibleActions(s) a = a[0] r,ns,terminal = domain.step(s, a) print 'step 2 r,ns',r,ns
MAZE = "./Domains/GridWorldMaps/4x5.txt" NOISE = 0.3 logger = Logger() domain = GridWorld(MAZE, noise=NOISE, logger=logger) representation = Tabular(logger=logger, domain=domain, discretization=20) policy = GibbsPolicy(representation=representation, logger=logger) def f(theta, s, a): policy.representation.theta = theta return np.log(policy.prob(s, a)) def df(theta, s, a): policy.representation.theta = theta return policy.dlogpi(s, a) def df_approx(theta, s, a): return approx_fprime(theta, f, 1e-10, s, a) thetas = np.random.rand(10, len(representation.theta)) for i in range(10): s = np.array([np.random.randint(4), np.random.randint(5)]) a = randSet(domain.possibleActions(s)) for theta in thetas: print "s", s print "a", a # print "f", f(theta, s, a) # print "df", df(theta, s, a) # print "df_approx", df_approx(theta, s, a) print "Error", check_grad(f, df, theta, s, a)