Ejemplo n.º 1
0
def bellman_test_features(args):
    if args.model != 'bellman':
        raise Exception, 'wrong model for this'
    (feature_fn, margin_fn, num_features, actions) = select_feature_fn(args)
    demofile = h5py.File(args.demofile, 'r')
    # get a random set of trajectories
    trajectories = []
    traj = []
    for uid in range(len(demofile)):
        key = str(uid)
        group = demofile[key]
        state = [key,group['cloud_xyz'][:]] # these are already downsampled
        action = group['action'][()] if not group['knot'][()] else 'done'
        if group['pred'][()] == key:
            if traj:
                trajectories.append(traj)
                traj = []
        traj.append([state, action])
    if traj:
        trajectories.append(traj)
    random.shuffle(trajectories)
    constraint_trajs = trajectories[:args.num_constraints]
    # put them into a Bellman model
    mm_model = BellmanMaxMarginModel(action, 500, 1000, 10, .9, num_features, feature_fn, margin_fn)
    for i, t in enumerate(constraint_trajs):
        mm_model.add_trajectory(t, str(i), True)
    weights, w0 = mm_model.optimize_model()
    # evaluate value fn performance
    num_evals = len(trajectories) if args.num_evals < 0 else args.num_constraints + args.num_evals
    trajectories = trajectories[args.num_constraints:num_evals]
    values = np.zeros((len(trajectories), 6))
    num_decreases = 0
    for (i, t) in enumerate(trajectories):
        for j, (s, a) in enumerate(t):
            values[i, j] = np.dot(mm_model.weights, mm_model.feature_fn(s, a))
        for k in range(j):
            if values[i, k] > values[i, k+1]: 
                num_decreases += 1
                print "DECREASE", values[i, k], values[i, k+1]
        sys.stdout.write('num decreases:\t{} computed values for trajectory {}\r'.format(num_decreases, i))
        sys.stdout.flush()
    sys.stdout.write('\n')
    print "num decreases:\t", num_decreases
    ipy.embed()