コード例 #1
0
def train(game):
    agent = DQN(game)

    for i in tqdm(range(TRAIN_GAMES)):
        game.new_episode()
        previous_variables = None
        previous_img = None
        done = False
        local_history = []
        total_reward = 0
        while not done:
            state = game.get_state()

            img = state.screen_buffer
            variables = state.game_variables
            if previous_variables is None:
                previous_variables = variables
            if previous_img is None:
                previous_img = img

            action = agent.act(img)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100
            total_reward += reward
            local_history.append([previous_img, img, reward, action, done])
            previous_variables = variables
            previous_img = img

        if total_reward >= 0:
            for previous_state, state, reward, action, done in local_history:
                agent.remember(previous_state, state, reward, action, done)
            agent.train()
コード例 #2
0
def main(args):
    env = gym.make("CartPole-v0")

    if args.seed >= 0:
        random_seed(args.seed)
        env.seed(args.seed)

    agent = DQN(env, args)
    model = get_model(out_dim=env.action_space.n, lr=args.lr)
    agent.set_model(model)

    rewards_history, steps_history = [], []
    train_steps = 0
    # Training
    for ep in range(args.max_ep):
        state = env.reset()
        ep_rewards = 0
        for step in range(env.spec.timestep_limit):
            # sample action
            action = agent.sample_action(state, policy="egreedy")
            # apply action
            next_state, reward, done, debug = env.step(action)
            train_steps += 1
            ep_rewards += reward
            # modified reward to speed up learning
            reward = 0.1 if not done else -1
            # train
            agent.train(state, action, reward, next_state, done)

            state = next_state
            if done:
                break

        steps_history.append(train_steps)
        if not rewards_history:
            rewards_history.append(ep_rewards)
        else:
            rewards_history.append(rewards_history[-1] * 0.9 +
                                   ep_rewards * 0.1)

        # Decay epsilon
        if agent.epsilon > args.final_epsilon:
            decay = (args.init_epsilon - args.final_epsilon) / args.max_ep
            agent.epsilon -= decay

        # Evaluate during training
        if ep % args.log_every == args.log_every - 1:
            total_reward = 0
            for i in range(args.test_ep):
                state = env.reset()
                for j in range(env.spec.timestep_limit):
                    if args.render:
                        env.render()
                    action = agent.sample_action(state, policy="greedy")
                    state, reward, done, _ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            current_mean_rewards = total_reward / args.test_ep
            print("Episode: %d Average Reward: %.2f" %
                  (ep + 1, current_mean_rewards))

    # plot training rewards
    plt.plot(steps_history, rewards_history)
    plt.xlabel("steps")
    plt.ylabel("running avg rewards")
    plt.show()
コード例 #3
0
    x = input('''To train model: train,
        To test a trained model: test,
        To train on different dataset: d: ''')
    if x == 'd':
        dataset = input('Enter name of dataset as "example_dataset.csv": ')
        try:
            raw = preprocess(dataset)
        except:
            print('Invalid dataset')
        raw = preprocess(dataset)
        actions = 2
        states = 7
        env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300))
        agent = DQN(actions, states, 100)
        all_rewards = agent.train(env, 1000)
    elif x == 'test':
        raw = preprocess()
        env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300))
        all_rewards = trained_test('dqn_model.h5', env)
    else:
        raw = preprocess()
        actions = 2
        states = 7
        env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300))
        agent = DQN(actions, states, 100)
        all_rewards = agent.train(env, 1000)

    if all_rewards != 0:
        print(all_rewards)
        plot(all_rewards)
コード例 #4
0

# Check whether cuda is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialise the game
env = gym.make('ChromeDino-v0')
# env = gym.make('ChromeDinoNoBrowser-v0')
env = make_dino(env, timer=True, frame_stack=True)

# Get the number of actions and the dimension of input
n_actions = env.action_space.n

# ----------- Nature DQN ---------------
dqn = DQN(n_actions, device)
dqn.train(env, logger)
# dqn.load("./trained/dqn.pkl")
# dqn.test(env)

# ----------- Prioritized DQN ---------------
# dqn_p = DQNPrioritized(n_actions, device)
# dqn_p.train(env, logger)
# dqn_p.load("./trained/dqn_p.pkl")
# dqn_p.test(env)


# ----------- Double DQN ----------------
# double_dqn = DoubleDQN(n_actions, device)
# double_dqn.train(env, logger)
# double_dqn.load("./trained/double-dqn.pkl")
# double_dqn.test(env)