Example #1
0
 def get_all_graphs_infos(cls):
     for (_, _, filenames) in os.walk(FOLDER_GRAPH):
         for filename in filenames:
             if '.gz' in filename:
                 graph_name = filename.replace('.gz',
                                               '')  # Remove .gz extension.
                 domain_name = filename.split('-', 1)[0]
                 options_list = [
                     '--domain', domain_name, '--graph', graph_name
                 ]
                 exp_params = ExpParams.get_exp_params(options_list)
                 g = StateGraph.load(exp_params)
                 g.print_stats()
         break
Example #2
0
    def probe_network(self):
        exp_params = ExpParams.get_exp_params_from_command_line_args()
        graph = exp_params.state_class.GAME_GRAPH

        print "Network predictions:"
        self.network_predictions = {}  # Network predictions.
        true_values = {
        }  # True values obtained from the graph using value iteration.
        for state_roll_action_str in sorted(self.network_inputs.iterkeys()):
            # state_value = self.network_outputs[state_str]
            state_roll_action_value = self.network.activate(
                self.network_inputs[state_roll_action_str])
            self.network_predictions[
                state_roll_action_str] = state_roll_action_value
            node_id = graph.get_node_id(
                state_roll_action_str[:-4])  # Removes roll and action.
            true_value = graph.get_attr(node_id, VAL_ATTR)
            true_values[state_roll_action_str] = true_value
            # print "%s -> %s (%.2f)" % (state_str, state_value, abs_value)
        for (si, ai), _ in sorted(self.visit_count.iteritems(),
                                  key=lambda (k, v): (v, k)):
            state_roll_action_str = '%s-%s' % (si, ai)
            true_value = true_values[state_roll_action_str]
            # Reward for white win is [1, 0],
            # Reward for black win is [0, 1],
            # state_value[0] - state_value[1] ranges from -1 to +1, although
            # it can exceed those bounds when the network outputs are
            # outside the range [0, 1].
            # The following formula is meant to scale the difference to range [0, 1].
            print "(%s, %s): opt. val. for white: %+.2f prediction: %s visited: %d" % (
                si, ai, true_value,
                map(PrettyFloat,
                    self.network_predictions[state_roll_action_str]),
                self.visit_count.get((si, ai), 0))
        print(
            'Note: optimal values for white are based on the board '
            'positions only and ignore the current roll.')
Example #3
0
                                   reverse=True)
        for traj, cnt in sorted_traj_count:
            print "%s: %d" % (traj, cnt)
        # Reset after each query.
        self.traj_count = {}

    def print_learner_state(self):
        self.print_visit_count()
        self.print_e()
        self.probe_network()
        self.print_traj_counts()


if __name__ == '__main__':
    make_data_folders()
    exp_params = ExpParams.get_exp_params_from_command_line_args()

    filename = exp_params.get_trial_filename(FILE_PREFIX_NTD)
    f = open(filename, 'w')

    agent_ntd = AgentNTD(exp_params.state_class)
    if TRAIN_BUDDY == TRAIN_BUDDY_SELF:
        agent_train_buddy = agent_ntd
    elif TRAIN_BUDDY == TRAIN_BUDDY_COPY:
        agent_train_buddy = AgentNTD(exp_params.state_class)
    elif TRAIN_BUDDY == TRAIN_BUDDY_RANDOM:
        agent_train_buddy = AgentRandom(exp_params.state_class)
    agent_eval = Experiment.create_eval_opponent_agent(exp_params)
    print 'Training buddy is: %s' % agent_train_buddy
    print 'Evaluation opponent is: %s' % agent_eval
Example #4
0
 def get_knowledge_filename(cls):
     exp_params = ExpParams.get_exp_params_from_command_line_args()
     filename = exp_params.get_custom_filename_no_trial(FOLDER_QTABLE,
                                                        FILE_PREFIX_SARSA)
     return filename