Example #1
0
    def __init__(self, game, sess, nb_actions, global_step):
        BaseAgent.__init__(self, game, sess, nb_actions, global_step)
        self.name = "SF_linear_agent"
        self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.algorithm)
        self.nb_action = nb_actions
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.episode_max_values = []
        self.episode_min_values = []
        self.episode_mean_returns = []
        self.episode_max_returns = []
        self.episode_min_returns = []
        self.exploration = LinearSchedule(FLAGS.explore_steps,
                                          FLAGS.final_random_action_prob,
                                          FLAGS.initial_random_action_prob)
        self.summary_writer = tf.summary.FileWriter(
            os.path.join(FLAGS.summaries_dir, FLAGS.algorithm))
        self.summary = tf.Summary()
        self.nb_states = game.nb_states
        self.q_net = SFLinearNetwork(nb_actions, self.nb_states, 'orig')
        self.target_net = SFLinearNetwork(nb_actions, self.nb_states, 'target')

        self.targetOps = self.update_target_graph('orig', 'target')

        self.probability_of_random_action = self.exploration.value(0)
Example #2
0
    def __init__(self, game, sess, nb_actions, global_step):
        BaseAgent.__init__(self, game, sess, nb_actions, global_step)
        self.name = "SF_agent"
        self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.algorithm)

        self.nb_states = self.env.nb_states
        if FLAGS.matrix_type == "incidence":
            self.sf_buffer = np.zeros(
                [self.nb_states * self.nb_states, self.nb_states])
        else:
            self.sf_buffer = np.zeros([self.nb_states, self.nb_states])
        self.seen_states = set()
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.episode_max_values = []
        self.episode_min_values = []
        self.episode_mean_returns = []
        self.episode_max_returns = []
        self.episode_min_returns = []
        self.exploration = LinearSchedule(FLAGS.explore_steps,
                                          FLAGS.final_random_action_prob,
                                          FLAGS.initial_random_action_prob)
        self.summary_writer = tf.summary.FileWriter(
            os.path.join(FLAGS.summaries_dir, FLAGS.algorithm))
        self.summary = tf.Summary()

        self.sf_table = np.zeros([self.nb_states, self.nb_states])

        # self.q_net = SFNetwork(self.nb_actions, self.nb_states, 'orig')
        # self.target_net = SFNetwork(self.nb_actions, self.nb_states, 'target')
        #
        # self.targetOps = self.update_target_graph('orig', 'target')
        #
        self.probability_of_random_action = self.exploration.value(0)
Example #3
0
    def __init__(self, game, sess, nb_actions, global_step):
        BaseAgent.__init__(self, game, sess, nb_actions, global_step)
        self.name = "CategoricalDQN_agent"
        self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.algorithm)
        self.support = np.linspace(FLAGS.v_min, FLAGS.v_max, FLAGS.nb_atoms)
        self.delta_z = (FLAGS.v_max - FLAGS.v_min) / (FLAGS.nb_atoms - 1)

        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.episode_max_values = []
        self.episode_min_values = []
        self.episode_mean_returns = []
        self.episode_max_returns = []
        self.episode_min_returns = []
        self.exploration = LinearSchedule(FLAGS.explore_steps, FLAGS.final_random_action_prob,
                                          FLAGS.initial_random_action_prob)
        self.summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.summaries_dir, FLAGS.algorithm))
        self.summary = tf.Summary()

        self.q_net = CategoricalDQNetwork(nb_actions, 'orig')
        self.target_net = CategoricalDQNetwork(nb_actions, 'target')

        self.targetOps = self.update_target_graph('orig', 'target')

        self.probability_of_random_action = self.exploration.value(0)
Example #4
0
    def run(self):

        # Use the agent to find mines in our mine-sweeper environment
        if self.agent_name == self.BasicAgent:
            self.mine_sweeper_agent = BaseAgent(env=self.env)
        elif self.agent_name == self.CSPAgent:
            self.mine_sweeper_agent = CSPAgent(
                env=self.env, end_game_on_mine_hit=self.end_game_on_mine_hit)
        else:
            self.mine_sweeper_agent = ProbCSPAgent(
                env=self.env,
                end_game_on_mine_hit=self.end_game_on_mine_hit,
                use_probability_agent=self.use_probability_agent,
                prob=self.bonus_uncertain_p)

        self.mine_sweeper_agent.play()
        metrics = self.mine_sweeper_agent.get_gameplay_metrics()
        # print("Game won = ", str(metrics["game_won"]))
        print("Number of mines hit = ", str(metrics["number_of_mines_hit"]))
        print("Number of mines flagged correctly = ",
              str(metrics["number_of_mines_flagged_correctly"]))
        print("Number of cells flagged incorrectly = ",
              str(metrics["number_of_cells_flagged_incorrectly"]))

        self.env.render_env(100)
Example #5
0
    def __init__(self, action_space, cmdl, is_training=True):
        BaseAgent.__init__(self, action_space, is_training)
        self.name = "DQN_agent"
        self.cmdl = cmdl
        eps = self.cmdl.epsilon
        e_steps = self.cmdl.epsilon_steps

        self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        self.target = target = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        if self.cmdl.cuda:
            self.policy.cuda()
            self.target.cuda()
        self.policy_evaluation = DQNEvaluation(policy)
        self.policy_improvement = DQNImprovement(policy, target, cmdl)
        self.exploration = get_epsilon_schedule("linear", eps, 0.05, e_steps)
        self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay)

        self.dtype = TorchTypes(cmdl.cuda)
        self.max_q = -1000
Example #6
0
    def __init__(self, action_space, cmdl, is_training=True):
        BaseAgent.__init__(self, action_space, is_training)
        self.name = "DQN_agent"
        self.cmdl = cmdl
        eps = self.cmdl.epsilon
        e_steps = self.cmdl.epsilon_steps

        self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        self.target = target = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        if self.cmdl.cuda:
            self.policy.cuda()
            self.target.cuda()
        self.policy_evaluation = DQNEvaluation(policy)
        self.policy_improvement = DQNImprovement(policy, target, cmdl)
        self.exploration = get_epsilon_schedule("linear", eps, 0.05, e_steps)
        self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay)

        self.dtype = TorchTypes(cmdl.cuda)
        self.max_q = -1000
Example #7
0
def main():

    # setup the GPU/CPU device
    if torch.cuda.is_available():
        torch.cuda.set_device(int(Utilis.gpu_id_with_max_memory()))

    # prepare the Log for recording the RL procedure
    cfg = Utilis.config()

    game = None

    try:
        working_mode = cfg['GLOBAL'].get('working_mode')

        working_agent = BaseAgent.create(cfg)

        if working_agent is None:
            print("Working Agent not found.")
            return

        game = Game(cfg)

        if working_mode == 'train':
            print('******************The Dino is being trained by ' +
                  cfg['GLOBAL'].get('working_agent') +
                  '*************************')

            logger = Logger.get_instance()
            logger.create_log(cfg)
            working_agent.train(game)

        elif working_mode == 'replay':
            print(
                '******************The Dino is being replayed*************************'
            )
            working_agent.replay(game)
        else:
            print(
                "working mode is not found. Check the spelling of working mode in config.ini. "
            )

    finally:
        if game is not None:
            game.end()