def probability_experiment(): m = model.train_model_cg() mdp = grid_world.MDP() score_d, score_r = test_learned_model(m, mdp) print 'unnormalized data probability: ', score_d print 'unnormalized random probability: ', score_r
def probability_experiment(): m = model.train_model_cg() mdp = grid_world.MDP() score_d, score_r = test_learned_model(m, mdp) print "unnormalized data probability: ", score_d print "unnormalized random probability: ", score_r
def main(model_size=(18, 2, 18), dist='uniform', n_iters=1): file_str = 'model.%i.%i.%s.pickle.gz' % (model_size[0], model_size[1], dist) try: with util.openz(file_str) as f: print 'found previous file, using: ', f m = pickle.load(f) except IOError: print 'no serialized model found, training a new one' m = model.train_model_cg(model_size, dist=dist) with util.openz(file_str, 'wb') as f: pickle.dump(m, f) mdp = grid_world.MDP() map_policy = MapPolicy(m, mdp.env) mdp.policy = map_policy #optimal_policy = grid_world.OptimalPolicy(mdp.env) for i in xrange(n_iters): state = None while not mdp.env.is_goal_state(): state = mdp.env.state print 'current state: ', state #actions = mdp.env.get_actions(state) #act = map_policy.choose_action() # convert state to binary vector phi_s = np.zeros(model_size[0]) phi_s[state[0]] = 1 phi_s[mdp.env.n_rows + state[1]] = 1 map_config = get_map(m, phi_s) pos, act, pos_p, r = parse_config(map_config, m, mdp.env) print 'map mu vector: ', map_config print 'map configuration: ', pos, act, pos_p, r mdp.env.take_action(act) print 'final state: ', mdp.env.state
def main(model_size = (18,2,18), dist = 'uniform', n_iters = 1): file_str = 'model.%i.%i.%s.pickle.gz'%(model_size[0],model_size[1],dist) try: with util.openz(file_str) as f: print 'found previous file, using: ', f m = pickle.load(f) except IOError: print 'no serialized model found, training a new one' m = model.train_model_cg(model_size, dist = dist) with util.openz(file_str, 'wb') as f: pickle.dump(m, f) mdp = grid_world.MDP() map_policy = MapPolicy(m, mdp.env) mdp.policy = map_policy #optimal_policy = grid_world.OptimalPolicy(mdp.env) for i in xrange(n_iters): state = None while not mdp.env.is_goal_state(): state = mdp.env.state print 'current state: ', state #actions = mdp.env.get_actions(state) #act = map_policy.choose_action() # convert state to binary vector phi_s = np.zeros(model_size[0]) phi_s[state[0]] = 1 phi_s[mdp.env.n_rows + state[1]] = 1 map_config = get_map(m, phi_s) pos, act, pos_p, r = parse_config(map_config, m, mdp.env) print 'map mu vector: ', map_config print 'map configuration: ', pos, act, pos_p, r mdp.env.take_action(act) print 'final state: ', mdp.env.state