Ejemplo n.º 1
0
    def __init__(self):
        self.frame_skip = 4
        self.exp_scale = 0.1
        self.time_scale = 1
        
        
#        self.env = gym.make("Pong-v0")
#        self.env.unwrapped.frameskip = self.frame_skip
        self.env = PenaltyEnvironment() 
        self.steps_to_train = int(self.time_scale * 5e7)
        self.agent = DQNAgent(num_actions=self.env.action_space.n, 
                              experience_replay_capacity = 1e6 * self.exp_scale, 
                              frame_skip=self.frame_skip, 
                              starting_experience = 5e4 * self.exp_scale, 
                              discount = 0.99, 
                              batch_size = 32, 
                              update_frequency = 4 * self.time_scale, 
                              target_update_frequency = 1e4 * self.time_scale, 
                              starting_epsilon = 1, 
                              final_epsilon = 0.1,
                              final_epsilon_step = 1e6 * self.time_scale)
        
        self.steps_played = 0
        self.env.reset()
        self.sequence = StateSequence([84, 84], 4)
        self.processor = StateProcessor()
Ejemplo n.º 2
0
 def initialize(self):
     # Initialize game parameters.
     self.env = T9Desk()
     state_size = self.env.observation_space
     action_size = self.env.action_space_size
     self.agent = DQNAgent(state_size, action_size)
     self.agent.load("T9-dqn.h5")
Ejemplo n.º 3
0
def remember():
    state = np.array(request.get_json()['state'])
    action = request.get_json()['action']
    reward = request.get_json()['reward']
    next_state = np.array(request.get_json()['next_state'])
    done = request.get_json()['done']
    DQNAgent.remember(state, action, reward, next_state, done)
    return 'ok'
Ejemplo n.º 4
0
class DQNPlatform:
    def __init__(self):
        self.frame_skip = 4
        self.exp_scale = 0.1
        self.time_scale = 1
        
        
#        self.env = gym.make("Pong-v0")
#        self.env.unwrapped.frameskip = self.frame_skip
        self.env = PenaltyEnvironment() 
        self.steps_to_train = int(self.time_scale * 5e7)
        self.agent = DQNAgent(num_actions=self.env.action_space.n, 
                              experience_replay_capacity = 1e6 * self.exp_scale, 
                              frame_skip=self.frame_skip, 
                              starting_experience = 5e4 * self.exp_scale, 
                              discount = 0.99, 
                              batch_size = 32, 
                              update_frequency = 4 * self.time_scale, 
                              target_update_frequency = 1e4 * self.time_scale, 
                              starting_epsilon = 1, 
                              final_epsilon = 0.1,
                              final_epsilon_step = 1e6 * self.time_scale)
        
        self.steps_played = 0
        self.env.reset()
        self.sequence = StateSequence([84, 84], 4)
        self.processor = StateProcessor()



    def train(self, sess):
        for n in range(self.steps_to_train):
            state = self.sequence.get_sequence()
            action = self.agent.select_action(sess, state)
            obs, reward, done, info = self.env.step(action)
                
            self.sequence.append_obs(self.processor.process(sess, obs))
            state_prime = self.sequence.get_sequence()
            self.agent.learn(sess, state, action, reward, state_prime, done)

            self.env.render()
            if done:
                self.env.reset()
            self.steps_played += 1
            if n % 10000 == 0:
                print(self.steps_played / self.steps_to_train * 100, "%")
        
    def test(self, sess):
        self.agent.epsilon = 0
        done = False
        self.env.reset()
        while not done:
            state = self.sequence.get_sequence()
            action = self.agent.select_action(sess, state)
            obs, reward, done, info = self.env.step(action)
            self.sequence.append_obs(self.processor.process(sess, obs))
            self.env.render()
            sleep(0.1)
Ejemplo n.º 5
0
 def __init__(self, env, qnet):
     self.TrainPolicy = GreedyEpsPolicy(0.8)      
     self.TestPolicy = GreedyEpsPolicy(0.0)
     DQNAgent.__init__(self, env, qnet, gamma=0.9,
             train_policy=self.TrainPolicy, test_policy=self.TestPolicy,
             steps_between_train = 1000, episodes_between_train = 1, 
             train_sample_size = 50, train_rounds = 40,
             trains_between_updates = 1
     )
Ejemplo n.º 6
0
class T9Handler(tornado.websocket.WebSocketHandler):

    def initialize(self):
        # Initialize game parameters.
        self.env = T9Desk()
        state_size = self.env.observation_space
        action_size = self.env.action_space_size
        self.agent = DQNAgent(state_size, action_size)
        self.agent.load("T9-dqn.h5")

    def open(self):
        print("WebSocket opened")
        self.initialize()
        # self.write_message(u"Dimas sends greetings!")
        print(self.env.state_json)
        self.write_message(self.env.state_json)

    def on_message(self, message):
        data = json.loads(message)
        print('MESSAGE:', message)
        print('DATA:', data)
        # print('DATA:', data[0], data[1])
        # self.env.tuzdyk = {'p1': 1, 'p2': 2}
        # self.write_message(u"Your message was: " + message)
        if data[0] == 'action_':
            action = data[1]
            who_moves = self.env.who_move
            if (action > 10) & who_moves:
                action -= 10
            elif (action < 10) & who_moves:
                action += 10
            if action < 10:
                self.env.step(action)
                self.write_message(self.env.state_json)
        elif data[0] == 'action':
            action = data[1]
            pr, op = self.env.who_moves_str
            state, reward, done, _ = self.env.step(action)
            self.write_message(self.env.state_json)
            self.env.render()

            # <insert agent>
            action_space = self.env.action_space[pr]
            # action = action_space[np.random.randint(0, action_space.size)]
            action = self.agent.act(state) + 1

            # <\insert agent>
            time.sleep(2)
            self.env.step(action)
            self.write_message(self.env.state_json)
            self.env.render()   



    def on_close(self):
        print("WebSocket closed")
Ejemplo n.º 7
0
def main(_):
    env = gym.make(FLAGS.env_name)

    num_actions = env.action_space.n
    num_features = env.observation_space.shape[0]
    print("num_actions", num_actions, "num_features", num_features)

    agent = DQNAgent(
        num_actions,
        num_features,
        learning_rate=FLAGS.learning_rate,
        reward_decay=FLAGS.reward_decay,
        e_greedy=FLAGS.e_greedy,
        replace_target_iter=FLAGS.replace_target_iter,
        memory_size=FLAGS.memory_size,
        batch_size=FLAGS.batch_size,
        e_greedy_increment=None,
    )
    eps = 0
    score_list = []
    while True:
        eps += 1
        obs = env.reset()
        obs = obs.reshape(-1, num_features)
        score = 0
        for step in range(FLAGS.max_step):
            #print("step",step)
            #action = randint(0,num_actions-1)
            # action = agent.take_random_action()
            action = agent.choose_action(obs)
            obs_, rew, done, info = env.step(action)
            score += rew
            obs_ = obs_.reshape(-1, num_features)
            agent.store_transition(obs, action, rew, obs_)
            agent.train_model()
            if FLAGS.display:
                env.render()
            #print("x :",obs_[10],"y :",obs_[16])
            if step < 80:
                continue
            # if step>160:
            #     break
            #delta_obs =obs-obs_
            obs = obs_
            #print('delta_obs',delta_obs,'rew',rew,'done',done,'info',info)
            #print('delta_obs\n',delta_obs)
            #time.sleep(0.1)
            if done or info['ale.lives'] < 3:
                print(eps, "th episode with reward ", score)
                score_list.append(score)
                np.save(FLAGS.path_name + 'dqn_mspacma.npy', score_list)
                break
        if np.mean(score_list[-10:]) > 1000:
            agent.save_model(FLAGS.path_name + 'dqn_mspacman.h5')
            break
Ejemplo n.º 8
0
    def __init__(self, host, game, player, max_steps=4, verbose=False):
        self._client = mqtt.Client()
        self._game = game
        self._player = player
        self._verbose = verbose
        self._agent = DQNAgent(game, True)
        self._repeated_actions = 0
        self._steps = 0
        self._max_actions = max_steps
        self._version = 'v0.1.2'

        self._client.connect(host)
        self._client.subscribe('Ctrl/Zumo/#', 0)
        self._client.on_message = self.on_message
Ejemplo n.º 9
0
 def train(self,
           show=True,
           print_scores=True,
           n_generations=100,
           update_epsilon=True,
           model_path='',
           random_state=False,
           max_value=30):
     self.n_generations = n_generations
     for i in range(1, n_generations + 1):
         game = Game(show=show, max_value=max_value, batch_size=16)
         game.add_player(0, random_state=random_state)
         if self.mode == "naive":
             agent = NaiveQAgent(n_moves, n_states, self.model,
                                 self.epsilon, self.alpha, self.gamma)
         elif self.mode == "dqn":
             agent = DQNAgent(n_moves, n_states, self.model, self.epsilon,
                              self.alpha, self.gamma)
         else:
             print("Invalide mode")
             return
         agent.max_value = max_value
         game.players[0].set_agent(agent)
         game.players[0].age = 20
         score = game.run()
         if update_epsilon:
             self.updateEpsilon()
         if print_scores:
             print("Score at the %s-th iteration :  %s" % (i, score))
         else:
             if not i % 50:
                 print("%s-th iterations" % i)
         if model_path and not i % 50:
             self.save_model(model_path)
         self.scores.append(score)
Ejemplo n.º 10
0
def main():
    network = DuelingCNN(C.FILENAME, [5], (11, 11, 3), C.NUM_ACTIONS,
                         C.Q_LEARNING_RATE)
    target = network.copy()
    replay = PrioritizedReplayBuffer(C.REPLAY_CAPACITY, alpha=C.ALPHA)

    agt = DQNAgent(network, replay)

    agent_list = [agt, DullAgent(), DullAgent(), DullAgent()]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    total_time = 0
    for i_episode in range(1000000):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            total_time += 1
            if total_time % C.UPDATE_RATE == 0:
                target = network.copy()
            if i_episode > 3: train(network, target, replay)
        print('Episode {} finished'.format(i_episode))
        if i_episode % 20 == 0:
            network.save()
    env.close()
Ejemplo n.º 11
0
def train():
    agent = DQNAgent(state_size, action_size)  # initialise agent
    #choice = raw_input("Name weight: ")
    #filename = output_dir + "weights_" + choice + ".hdf5"
    #agent.load(filename)
    batch_size = 32
    n_episodes = 10001  # n games we want agent to play (default 1001)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    done = False
    for e in range(n_episodes):  # iterate over new episodes of the game
        state = env.reset(
        )  # reset state at start of each new episode of the game
        state = np.reshape(state, [1, state_size])

        score = 0

        for time in range(
                100
        ):  # time represents a frame of the game; goal is to keep pole upright as long as possible up to range, e.g., 500 or 5000 timesteps
            #         env.render()
            action = agent.act(
                state
            )  # action is either 0 or 1 (move cart left or right); decide on one or other here
            next_state, reward, done, _ = env.step(
                action
            )  # agent interacts with env, gets feedback; 4 state data points, e.g., pole angle, cart position
            #reward = reward if not done else -1000 # reward +1 for each additional frame with pole upright
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(
                state, action, reward, next_state, done
            )  # remember the previous timestep's state, actions, reward, etc.
            state = next_state  # set "current state" for upcoming iteration to the current next state
            score = score + reward
            if done:  # episode ends if agent drops pole or we reach timestep 5000
                print(
                    "episode: {}/{}, score: {}, e: {:.2}, time: {}, x: {:.2}"  # print the episode's score and agent's epsilon
                    .format(e, n_episodes, score, agent.epsilon, time,
                            state[0, 0]))
                break  # exit loop
        if len(agent.memory) > batch_size:
            agent.replay(
                batch_size
            )  # train the agent by replaying the experiences of the episode
        if e % 500 == 0:
            agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")
            #print(env.get_traj())
            #env.plot_traj()

    env.plot_traj()
Ejemplo n.º 12
0
def main():
    warnings.simplefilter(action='ignore', category=FutureWarning)

    set_global_seed(0)

    env = rlcard.make('limit-holdem', config={'record_action': True})
    human_agent = HumanAgent(env.action_num)

    dqn_agent = DQNAgent(env.action_num,
                         env.state_shape[0],
                         hidden_neurons=[1024, 512, 1024, 512])

    dqn_agent.load(sys.argv[1])

    env.set_agents([human_agent, dqn_agent])

    play(env)
Ejemplo n.º 13
0
def main():
    env = Game()
    env.start()
    agent = DQNAgent(env)
    MAX_EPISODES = 500
    MAX_STEPS = 5000
    BATCH_SIZE = 32
    episode_rewards = mini_batch_train(env, agent, MAX_EPISODES, MAX_STEPS,
                                       BATCH_SIZE)
Ejemplo n.º 14
0
def run(environment, model_name, key=None):
    tdir = tempfile.mkdtemp()
    env = gym.make(environment)
    env = gym.wrappers.Monitor(env, tdir, force=True)
    agent = DQNAgent(env, trained_model=model_name)
    EPISODES = 100
    for episode in range(EPISODES):
        state, reward, done = env.reset(), 0.0, False
        action = agent.action(state, reward, done, episode, training=False)
        while not done:
            #env.render()
            next_state, reward, done, _ = env.step(action)
            state = next_state
            action = agent.action(state, reward, done, episode, training=False)
    env.close()
    if key:
        gym.upload(tdir, api_key=key)
    shutil.rmtree(tdir)
Ejemplo n.º 15
0
def play():
    env = gym.make('MsPacman-v0')
    agent = DQNAgent(LEARNING_RATE, IMG_ROWS, IMG_COLS, IMG_CHANNELS, INITIALIZE_STDDEV)
    print("Now we load weight")
    agent.model.load_weights(WEIGHT_PATH + "model.h5")
    print("Weight load successfully")
    step = 0
    x_t = env.reset()
    while step < 80:
        env.render()
        env.step(0)
        step += 1
    
    loss = 0
    total_reward = 0
    epsilon = INITIAL_EPSILON
    
    env.reder()
    x_t,_,_,_ = env.step(0)
    x_t = skimage.color.rgb2gray(x_t)
    x_t = skimage.transform.resize(x_t, (IMG_ROWS, IMG_COLS), mode='constant')
    x_t = skimage.exposure.rescale_intensity(x_t, out_range=(0, 255))
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
    s_t = s_t.reshape((1, s_t.shape[0], s_t.shape[1], s_t.shape[2]))


    for step in range(MAX_STEPS):
        env.render()
        # choose an action epsilon greedy
        a_t = np.zeros([ACTIONS])

        q = agent.model.predict(s_t)
        print("TIMESTEP", step,
              "/ ACTION_PREDICTION", q)
        action_index = np.argmax(q)
        a_t[action_index] = 1

        # run the selected action and observed next state and reward
        x_t1_colored, r_t, terminal, info = env.step(action_index)
        total_reward += r_t
        x_t1 = process_image(x_t1_colored)
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)
        s_t = s_t1

        # print info
        print("TIMESTEP", step,
              "/ ACTION", action_index,
              "/ REWARD", r_t,
              "/ Loss ", loss,
              "/ EPSILON", epsilon)

        if terminal:
            break
    print("Game ended, Total rewards: " + str(total_reward))
Ejemplo n.º 16
0
def main(args):
    env = gym.make(args.env)
    writer = SummaryWriter(comment="CartPole-v0-DQN")
    totalReward = []
    actionDim = env.action_space.n
    stateDim = env.observation_space.shape[0]
    hiddenDim = args.hiddenDim
    buffer = UniformReplayBuffer(args.maxCapacity, env.observation_space.shape,
                                 np.float32, np.long)

    dqnAgent = DQNAgent(buffer, stateDim, actionDim, hiddenDim, args)
    stepCounter = 0
    epsilon = args.epsStart
    for e in range(args.numberOfEpisode):
        state = env.reset()
        episodeReward = 0
        done = False

        while not done:
            stepCounter += 1
            action = dqnAgent.GetAction(state, epsilon)
            nextState, reward, done, _ = env.step(action)
            buffer.push_transition(
                Transition(state, action, reward, nextState, done))

            episodeReward += reward
            if stepCounter > 2 * args.batchSize:
                dqnAgent.Update(stepCounter)
                epsilon = max(epsilon * args.epsDecay, args.epsStop)

            state = nextState

        totalReward.append(episodeReward)
        meanReward = float(np.mean(totalReward[-100:]))
        writer.add_scalar("episodeReward", episodeReward, stepCounter)
        writer.add_scalar("meanReward", meanReward, stepCounter)
        writer.add_scalar("epsilon", epsilon, stepCounter)
        writer.add_scalar("episodes", e, stepCounter)
        print(
            "Eps:{} Steps:{} Mean Reward: {} Episode Reward: {}  Epsilon: {}".
            format(e, stepCounter, meanReward, episodeReward, epsilon))
Ejemplo n.º 17
0
def main():
    game.set_mode('manual')
    game.setup(skip_setup)

    if debug:
        game.play()

    if mqtt:
        server = Server("127.0.0.1", game, player, verbose=True)
        server.play(games_total)
    else:
        game.set_mode('random')
        agent = DQNAgent(game, skip_training)
        agent.train(games_start, games_total)
        agent.validate(games_total, validation_games, validation_max_steps)
        agent.play(games_total, game_max_steps)
def main():
    np.set_printoptions(suppress=True,
                        formatter={'float_kind': '{:0.2f}'.format})
    env_fns = [make_env('MountainCar-v0', i) for i in range(4)]
    try:
        env = SyncVectorEnv(env_fns)
    finally:
        env.close()

    state_size = env.observation_space.shape[1]
    action_size = env.action_space[0].n

    NUM_EPISODES = 1000
    STEPS_PER_EPISODE = 200
    batch_size = 32
    eps_mean_reward = [0.0] * NUM_EPISODES

    agent = DQNAgent(state_size, action_size)
    start_time = datetime.now()
    for ep_count in range(NUM_EPISODES):
        episode_rew = 0
        state = env.reset()
        if (ep_count == 0):
            print("ep={} state.shape={}".format(ep_count, state.shape))
        #state = np.reshape(state, [-1, state_size])
        ep_start_time = datetime.now()
        for time in range(STEPS_PER_EPISODE):
            # env.render()
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            episode_rew += np.sum(reward)
            #next_state = np.reshape(next_state, [-1, state_size])
            if (time == 0):
                print("ep={} time={} action.len={} next_state.shape={} elaps_time={}".format( \
                    ep_count, time, len(action), next_state.shape, (datetime.now() - ep_start_time)) )
            #add to DQN buffer
            for idx in range(0, env.num_envs):
                agent.memorize(state[idx], action[idx], reward[idx],
                               next_state[idx], done[idx])
            state = next_state
            if time >= STEPS_PER_EPISODE - 1:
                eps_mean_reward[ep_count] = np.mean(episode_rew) / time
                print("ep: {}/{}, mean_avg_reward: {}, exec_time= {}".format( \
                    ep_count , NUM_EPISODES, eps_mean_reward[ep_count], (datetime.now() - ep_start_time)))
            #update DQN model if there are enough samples
            if len(agent.memory) > batch_size and time % 8 == 0:
                agent.replay(batch_size)
        #if ep_count % 2 == 0:
        #    agent.save(str(os.path.join(save_path,'ma-foraging-dqn.h5')))
    print("Finish train DQN Agent with {} episodes in {}".format(
        NUM_EPISODES, (datetime.now() - start_time)))
Ejemplo n.º 19
0
class ReinforcedTablicPlayer(TablicPlayer):
    def __init__(self, gamma):
        self.agent = DQNAgent(gamma)

    def load_model(self, model_path):
        self.agent = torch.load(model_path)

    def save_model(self, model_path):
        torch.save(self.agent, model_path)

    @classmethod
    def take_to_state_action(cls, state_vector, played_card, take):
        take_vector = Tablic.get_take_vector(played_card, take)
        result = np.concatenate((take_vector, state_vector))
        return torch.from_numpy(result).type(torch.cuda.FloatTensor)

    @classmethod
    def get_valid_state_actions(cls, game):
        hand = game.get_hand(game.current_player)
        observation = game.get_observation_vector(game.current_player)
        valid_takes = list(Tablic.get_valid_takes(game.table, hand))
        valid_state_actions = torch.zeros([len(valid_takes),
                                           80]).type(torch.cuda.FloatTensor)
        for ind, (played_card, take) in enumerate(valid_takes):
            valid_state_actions[ind] = cls.take_to_state_action(
                observation, played_card, take)
        return valid_takes, valid_state_actions

    def find_best_play_from_state_actions(self, takes, state_actions):
        with torch.no_grad():
            takes_value = self.agent.forward(state_actions)
        best_take_ind = torch.argmax(takes_value)
        return takes[best_take_ind]

    def get_random_play_from_state_actions(self, valid_takes,
                                           valid_state_actions):
        return random.choice(valid_takes)

    def find_best_play(self, game):
        return self.find_best_play_from_state_actions(
            *self.get_valid_state_actions(game))

    def get_random_play(self, game):
        return random.choice(
            list(
                Tablic.get_all_valid_takes(game.table,
                                           game.get_hand(
                                               game.current_player))))
Ejemplo n.º 20
0
def main():
    env = gym.make("FightingiceDataNoFrameskip-v0",
                   java_env_path="/home/rurito/lesson/ken/FTG4.50")
    # HACK: aciontから自動で取ってこれるようにしておく
    action_size = 56
    learning_rate = 0.1
    batch_size = 10
    episode = 3
    gamma = 0.1
    greedy_value = 0.3

    p2 = "MctsAi"
    env = Observer(env, p2)
    agent = DQNAgent(learning_rate, action_size, greedy_value)
    agent.model.load_model('param.hdf5')
    # agent = RoleBaseAgent()
    trainer = Trainer(env, agent)

    trainer.train(episode, batch_size, gamma)
Ejemplo n.º 21
0
def train(environment, model_name=None, key=None):
    tdir = tempfile.mkdtemp()
    env = gym.make(environment)
    env = gym.wrappers.Monitor(env, tdir, force=True)
    agent = DQNAgent(env)
    EPISODES = 5000
    for episode in range(EPISODES):
        state, reward, done = env.reset(), 0.0, False
        action = agent.action(state, reward, done, episode)
        while not done:
            #env.render()
            next_state, reward, done, _ = env.step(action)
            agent.store(state, action, reward, next_state, done)
            state = next_state
            action = agent.action(state, reward, done, episode)
        if model_name and (episode == EPISODES - 1 or episode % 10 == 0):
            agent.save_model(filename=model_name)
            pass
    env.close()
    if key:
        gym.upload(tdir, api_key=key)
    shutil.rmtree(tdir)
Ejemplo n.º 22
0
def replay():
    if len(DQNAgent.memory) >= SAMPLE_SIZE:
        print("----------------------------------------")
        print("---> Starting experience replay...")
        start_time = time.time()
        losses = []
        accuraces = []
        # DQNAgent.replay()
        for i in range(0, 100):
            loss, acc = DQNAgent.replay()
            losses.append(sum(loss) / len(loss))
            accuraces.append(sum(acc) / len(acc))
            # print(i, '-', 'Loss:', (sum(loss)/len(loss)), 'Accuracy:', (sum(acc)/len(acc)))

        # losses, accuraces = DQNAgent.replay()
        print('--->', 'Loss:', (sum(losses) / len(losses)), 'Accuracy:',
              (sum(accuraces) / len(accuraces)))

        # print(DQNAgent.epsilon)
        elapsed_time = round(time.time() - start_time, 2)
        print("---> Experience replay took: ", elapsed_time, " seconds")
        # print("----------------------------------------")
    return 'ok'
Ejemplo n.º 23
0
def main():
    env = Game()
    env.start()
    agent = DQNAgent(env)
    state = torch.from_numpy(np.zeros((4, 160, 240)))
    i = 0
    MAX_ESPISODES = 20
    episodes = []
    scores = []

    # episodes = np.arange(500)
    # scores = np.random.randn(1, 500)

    while i <= MAX_ESPISODES:
        action = agent.get_action(state)

        observation, reward, done = env.step(action)

        prev_state = state
        state = torch.cat((state[1:], torch.from_numpy(np.array([observation]))), axis=0)
        agent.update_buffer(prev_state, action, reward, state, done)

        if done:
            i += 1
            episodes.append(i)
            scores.append(env.result)
            agent.update(batch_size=20)
            env.reset()

    print('Episodes')
    print(episodes)
    print('Scores:')
    print(scores)
    plt.scatter(episodes, scores, s=1)
    plt.xlabel('Episodes')
    plt.ylabel('Score')
    plt.title('Deep Q-Learning Agent')
    plt.savefig('score-500-episodes.png')
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser(
        'Train or Evaluate a DQN Agent for OpenAI '
        'Gym Atari Environments')
    parser.add_argument('--env', '-e', default=ENV_NAME)
    parser.add_argument('--evaluate', action='store_true', default=False)
    parser.add_argument('--load_weights', '-l', default=None)
    parser.add_argument('--render', '-r', action='store_true', default=False)

    args = parser.parse_args()
    env_name = args.env
    weights_to_load = args.load_weights
    evaluate = args.evaluate
    render = args.render

    env = gym.make(env_name)
    model = ConvModel(env,
                      learning_rate=2.5e-4,
                      momentum=0.95,
                      gamma=0.99,
                      tau=0.01,
                      soft_updates=True,
                      weights_to_load=weights_to_load,
                      grayscale=False,
                      window_size=8)
    agent = DQNAgent(env,
                     model,
                     linear_epsilon_decay=True,
                     epsilon_decay_steps=3.e6,
                     epsilon=1.0,
                     min_epsilon=0.06,
                     exp_buffer_size=1000000,
                     batch_size=256,
                     render=render,
                     update_freq=1,
                     random_starts=30,
                     max_steps=10000)

    if evaluate:
        agent.evaluate()
    else:
        agent.train()
Ejemplo n.º 25
0
from GlobalVariables import GlobalVariables
from DQNAgent import DQNAgent
import numpy as np
import matplotlib.pyplot as plt
import pylab
import sys

Extract = Extract_Features

options = GlobalVariables  #To access global variables from GlobalVariable.py
parameter = GlobalVariables  #To access parameters from GlobalVariables.py
samples = Extract_Features  #To access the member functions of the ExtractFeatures class
grid_size = GlobalVariables  #To access the size of grid from Global Variables.py

env = Environment(grid_size.nRow, grid_size.nCol)
agent = DQNAgent(env)
list = []

for i in range(1, parameter.how_many_times + 1):
    print(
        "************************************************************************************"
    )
    print("Iteration", i)
    Number_of_Iterations = []
    Number_of_Episodes = []
    reward_List = []
    filename = str(grid_size.nRow) + "X" + str(
        grid_size.nCol) + "_Experiment.txt"
    for episode in range(1, parameter.Number_of_episodes + 1):
        #file = open(filename, 'a')
        #done = False
Ejemplo n.º 26
0
from Scenario import Scenario
from DQNAgent import DQNAgent
import numpy as np

EPISODES = 2000

if __name__ == "__main__":
    env = Scenario()
    state_size = env.state_size
    action_size = env.action_size
    agent = DQNAgent(state_size, action_size)
    done = False
    batch_size = 32
    for e in range(EPISODES):
        state = env.reset()
        print("Size: " + str(state.shape))
        state = np.reshape(state, [1, state_size])
        time = 0
        done = 0
        while not done:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                agent.update_target_model()
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break
            time = time + 1
    esp = 0.01
else:
    esp = 1

robot = simulated_1D_robot(goalie_pos_start=3,
                           GRID_NUM_HEIGHT=num_grid_y,
                           GRID_NUM_WIDTH=num_grid_x,
                           GRID_SIZE=10,
                           draw_scene=draw_scene,
                           gridworld=True)

agent = DQNAgent(  #state_size =num_grid_y*num_grid_x,
    state_size=2,
    action_size=3,
    gamma=0.95,
    epsilon=esp,
    epsilon_min=0.01,
    epsilon_decay=0.995,
    learning_rate=0.001,
    model_type='DeepModel')

EPOCHS = 5000
if not draw_scene:
    UPDATE_FREQ = 1000000
else:
    UPDATE_FREQ = 1000000

batch_size = 32

start = time.time()
done = False
Ejemplo n.º 28
0
        # Get the screens:
        s = []
        while len(s) < 4:
            s.append(self.get_screen(self.driver))

        # Get our images normalized
        s = self.get_normalized_input(s)

        # Return the values
        return s, self.get_distance_ran(self.driver), self.get_game_over(
            self.driver)


if __name__ == '__main__':
    env = DinoEnvironment()
    agent = DQNAgent((150, 300), action_size=3, memory_size=5000)
    max_score = 0

    for i in range(100):
        state, score, done = env.reset()
        print("state shape: {}".format(state.shape))
        a = 0

        while env.get_game_over() is False:
            action = agent.act(state)
            next_state, score, done = env.act(env.action_space[action])

            reward = score if not done else -10
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            a += 1
Ejemplo n.º 29
0
from Desk import T9Desk
from DQNAgent import DQNAgent
import numpy as np


env = T9Desk("random_1", "Deep QN_2")
state_size = env.observation_space
action_size = env.action_space_size
agent = DQNAgent(state_size, action_size)
agent.load("T9-dqn.h5")

for i in range(100):
    state = env.reset(False)
    done = False
    # print(i)
    while not done:
        pr, op = env.who_moves_str
        if pr == 'p2':
            action_space = env.action_space[pr]
            action = action_space[np.random.randint(0, action_space.size)]
        else:
            action = agent.act(state) + 1
        next_state, reward, done, _ = env.step(action)
        env.render()
    score_sum = env.win_count['p1'] + env.win_count['p2'] + env.win_count['draw']
Ejemplo n.º 30
0
    elif layer['type'] == 'flatten':
        layerStack = keras.layers.Flatten()(layerStack)

# Model's output
layerStack = keras.layers.Dense(
    env.action_space.n,
    activation='linear',
    kernel_initializer=tf.keras.initializers.VarianceScaling(
        scale=2.0))(layerStack)

# Initialize a new model
agent = DQNAgent(inputs,
                 layerStack,
                 memSize=config['agent']['replayMemorySize'],
                 stackedStateLength=config['agent']['stackedStateLength'],
                 stateScaleFactor=config['agent']['stateScaleFactor'],
                 epsilonPolicy=epsilonPolicy,
                 optimizer=optimizer,
                 loss=config['model']['lossFunction'],
                 batchSize=config['model']['batchSize'],
                 modelName=config['model']['name'])
# If required load the old model for futher learning
if config['paths']['initialModelName'] != False:
    modelToLoad = os.path.join(config['paths']['savesDir'],
                               env.unwrapped.spec.id, 'models',
                               config['paths']['initialModelName'])
    agent.loadModel(modelToLoad)

# Load replay memory if needed
if config['paths']['initialReplayMemoryName'] != False:
    replaysToLoad = os.path.join(config['paths']['savesDir'],
                                 env.unwrapped.spec.id, 'replays',