예제 #1
0
파일: models.py 프로젝트: rskwan/opti-sim
 def init_transitions(self, horizon):
     neutral_tmat = generate_transitions(self.states, self.actions,
                                         self.true_tran_func)
     realvals, _ = generate_policy(horizon, self.max_time,
                                   neutral_tmat, self.rmat)
     for i, (aname, gamma) in enumerate(self.attitudes):
         transition_func = self.make_transition_func(aname, gamma, realvals)
         self.transitions[aname] = generate_transitions(self.states, self.actions,
                                                   f, self.max_time)
         fname = self.dirname + "transitions_{0}_{1}_h{2}.p".format(i, aname, horizon)
         self.transitions[aname].dump(fname)
예제 #2
0
파일: run.py 프로젝트: rskwan/opti-sim
def run_sims(dirname, horizons, attitudes, model_type, model, num_episodes):
    max_time = max(horizons)
    time_based = False
    if model_type == "valopt":
        time_based = True
    # output setup
    csvout = open(dirname + "results_{0}_data.csv".format(num_episodes), 'w+')
    writer = csv.writer(csvout)
    writer.writerow(["Horizon", "AvgEarnings", "StdErr", "AttitudeIndex", "AttitudeName"])
    readableout = open(dirname + "results_{0}_readable.txt".format(num_episodes), 'w+')
    verboseout = open(dirname + "results_{0}_verbose.txt".format(num_episodes), 'w+')
    print_readverb = make_multiprint([readableout, verboseout])
    # variables only used for constvar
    # TODO: allow for >2 actions?
    num_actions = 2
    new_attitudes = []
    policies = []
    if model_type == "constvar":
        for action_idx in range(num_actions):
            policies.append(const_policy(action_idx, max_time, len(model.states)))
        results = np.zeros((len(attitudes) * num_actions, len(horizons), 3))
    else:
        results = np.zeros((len(attitudes), len(horizons), 3))
    for horizon_idx, horizon in enumerate(horizons):
        print_readverb("Horizon: {}".format(horizon))
        if model_type == "constvar":
            """In the `constvar` model, we vary the true transition matrix,
            rather than the policy, based on beliefs. We take a constant policy
            (one for each action)."""
            for attitude_idx, attitude in enumerate(attitudes):
                # compute results
                tmat = model.transitions[attitude]
                rmat = model.rewards[attitude]
                for action_idx in range(num_actions):
                    new_att_idx = (attitude_idx * num_actions) + action_idx
                    new_attitude = attitude + str(action_idx)
                    if len(new_attitudes) < len(attitudes) * num_actions:
                        new_attitudes.append(new_attitude)
                    totals, mean, stderr = episode_stats(policies[action_idx], num_episodes,
                                                         max_time, tmat, rmat, time_based)
                    results[new_att_idx][horizon_idx] = [horizon, mean, stderr]
                    # output to files
                    writer.writerow([horizon, mean, stderr, new_att_idx, new_attitude])
                    print_readverb("{0}: mean = {1}, stderr = {2}".format(new_attitude, mean, stderr))
                    print("Totals: {0}".format(totals), file=verboseout)
                    print("Policy for {0}: {1}".format(new_attitude, policies[action_idx]), file=verboseout)
        else:
            if model_type == "valopt":
                model.init_transitions(horizon)
            # note: "neutral" must be an attitude
            neutral_tmat = model.transitions['neutral']
            for attitude_idx, attitude in enumerate(attitudes):
                rmat = model.rewards[attitude]
                if model_type == "const":
                    # TODO: allow const to have something other than 0
                    policy = const_policy(0, max_time, rmat.shape[1])
                else:
                    tmat = model.transitions[attitude]
                    _, policy = generate_policy(horizon, max_time, tmat, rmat, time_based)
                totals, mean, stderr = episode_stats(policy, num_episodes, max_time,
                                                     neutral_tmat, rmat, time_based)
                results[attitude_idx][horizon_idx] = [horizon, mean, stderr]
                # output to files
                writer.writerow([horizon, mean, stderr, attitude_idx, attitude])
                print_readverb("{0}: mean = {1}, stderr = {2}".format(attitude, mean, stderr))
                print("Totals: {0}".format(totals), file=verboseout)
                print("Policy for {0}: {1}".format(attitude, policy), file=verboseout)
                polfname = "policy_{0}_{1}_{2}_{3}.p".format(num_episodes, horizon,
                                                             attitude_idx, attitude)
                cPickle.dump((attitude, policy), open(dirname + polfname, 'w+'))
    if len(new_attitudes) > 0:
        attitudes = new_attitudes
    return results, attitudes