예제 #1
0
def probability_experiment():

    m = model.train_model_cg()
    mdp = grid_world.MDP()

    score_d, score_r = test_learned_model(m, mdp)

    print 'unnormalized data probability: ', score_d
    print 'unnormalized random probability: ', score_r
예제 #2
0
def probability_experiment():

    m = model.train_model_cg()
    mdp = grid_world.MDP()

    score_d, score_r = test_learned_model(m, mdp)

    print "unnormalized data probability: ", score_d
    print "unnormalized random probability: ", score_r
예제 #3
0
def main(model_size=(18, 2, 18), dist='uniform', n_iters=1):

    file_str = 'model.%i.%i.%s.pickle.gz' % (model_size[0], model_size[1],
                                             dist)

    try:
        with util.openz(file_str) as f:
            print 'found previous file, using: ', f
            m = pickle.load(f)
    except IOError:
        print 'no serialized model found, training a new one'
        m = model.train_model_cg(model_size, dist=dist)
        with util.openz(file_str, 'wb') as f:
            pickle.dump(m, f)

    mdp = grid_world.MDP()

    map_policy = MapPolicy(m, mdp.env)
    mdp.policy = map_policy
    #optimal_policy = grid_world.OptimalPolicy(mdp.env)

    for i in xrange(n_iters):
        state = None
        while not mdp.env.is_goal_state():
            state = mdp.env.state
            print 'current state: ', state

            #actions = mdp.env.get_actions(state)

            #act = map_policy.choose_action()

            # convert state to binary vector
            phi_s = np.zeros(model_size[0])
            phi_s[state[0]] = 1
            phi_s[mdp.env.n_rows + state[1]] = 1

            map_config = get_map(m, phi_s)
            pos, act, pos_p, r = parse_config(map_config, m, mdp.env)
            print 'map mu vector: ', map_config

            print 'map configuration: ', pos, act, pos_p, r

            mdp.env.take_action(act)

        print 'final state: ', mdp.env.state
예제 #4
0
def main(model_size = (18,2,18), dist = 'uniform', n_iters = 1):
    
    file_str = 'model.%i.%i.%s.pickle.gz'%(model_size[0],model_size[1],dist)

    try:
        with util.openz(file_str) as f:
            print 'found previous file, using: ', f
            m = pickle.load(f)
    except IOError:
        print 'no serialized model found, training a new one'
        m = model.train_model_cg(model_size, dist = dist)
        with util.openz(file_str, 'wb') as f:
            pickle.dump(m, f)

    mdp = grid_world.MDP() 
    
    map_policy = MapPolicy(m, mdp.env)
    mdp.policy = map_policy
    #optimal_policy = grid_world.OptimalPolicy(mdp.env)
    
    for i in xrange(n_iters):
        state = None
        while not mdp.env.is_goal_state():
            state = mdp.env.state
            print 'current state: ', state

            #actions = mdp.env.get_actions(state)

            #act = map_policy.choose_action()

            # convert state to binary vector
            phi_s = np.zeros(model_size[0])
            phi_s[state[0]] = 1 
            phi_s[mdp.env.n_rows + state[1]] = 1
            
            map_config = get_map(m, phi_s)
            pos, act, pos_p, r = parse_config(map_config, m, mdp.env)
            print 'map mu vector: ', map_config

            print 'map configuration: ', pos, act, pos_p, r

            mdp.env.take_action(act)


        print 'final state: ', mdp.env.state