def main():
    # 创建环境
    traci.start([sumoBinary, "-c", "data/cross.sumocfg",
                 "--tripinfo-output", "tripinfo.xml"])

    traci.trafficlight.setPhase("0", 0)

    act_dim = 2
    obs_dim = 1440  # (10, 24, 6)

    # 使用PARL框架创建agent

    model = Model(act_dim)
    algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(algorithm, obs_dim, act_dim)

    # 加载模型
    if os.path.exists('./DQNmodel.ckpt'):
        save_path = './DQNmodel.ckpt'
        agent.restore(save_path)
        print("模型加载成功")
    env = 0
    # 创建经验池
    rpm = ReplayMemory(MEMORY_SIZE)
    # 往经验池中预存数据
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(agent, env, rpm)

    episode = 0
    while episode < TRAIN_EPISODE:

        print("=============================")
        print("episode:",episode)
        total_reward, steps = run_episode(agent, env, rpm)
        episode += 1

        eval_reward = evaluate(env, agent, render=False)
        logger.info('episode:{}    test_reward:{}'.format(
            episode, eval_reward))

        save_path = './dqnmodel/model_{}_{}.ckpt'.format(episode, total_reward)
        agent.save(save_path)

    # 保存模型到文件 ./model.ckpt
    agent.save('./DQNmodel.ckpt')
Example #2
0
    sns.lineplot(x='Episode', y='Success Rate', color='C0', data=data)
    plt.savefig('plots/Success_rate_%s' % str(game))
    plt.clf()


if __name__ == '__main__':
    tf.compat.v1.disable_eager_execution()
    env_name = 'AirSimEnv-v42'
    env = gym.make(env_name)
    lr = 0.0005
    n_games = 3000
    agent = Agent(gamma=0.99,
                  epsilon=1.1,
                  lr=lr,
                  input_dims=env.observation_space.shape,
                  n_actions=env.action_space.n,
                  mem_size=100000,
                  batch_size=64,
                  epsilon_dec=0.95,
                  epsilon_end=0.01,
                  fname='_New_R_target.h5')
    scores = []
    eps_history = []
    dones = []
    data = {'eps_history': [], 'scores': [], 'dones': []}
    # with open('data.json') as json_data:
    #     data = json.load(json_data)
    # print(data)
    # scores = data['scores']
    # eps_history = data['eps_history']
    # dones = data['dones']
    '''---------------------------------------evaluate main------------------------------------------'''
Example #3
0
    plt.ylabel('Average Q-length every 8 seconds')
    plt.xticks(index, label)
    axes = plt.gca()
    axes.set_ylim([0, 20])
    plt.show()

    AVG_Q_len_perepisode.append(sum_q_lens / 702)
    sum_q_lens = 0


if __name__ == '__main__':
    act_dim = 2
    obs_dim = 1440  # (10, 24, 6)

    # 使用PARL框架创建agent

    model = Model(act_dim)
    algorithm = parl.algorithms.DQN(model,
                                    act_dim=act_dim,
                                    gamma=GAMMA,
                                    lr=LEARNING_RATE)
    agent = Agent(algorithm, obs_dim, act_dim)

    # 加载模型
    if os.path.exists('./DQNmodel.ckpt'):
        save_path = './DQNmodel.ckpt'
        agent.restore(save_path)
        print("模型加载成功")

    test(agent)
Example #4
0
    print(x[:2])


if __name__ == '__main__':
    # for i in range(10):
    #     test()
    tf.compat.v1.disable_eager_execution()
    env_name = 'AirSimEnv-v42'
    env = gym.make(env_name)
    lr = 0.0005
    n_games = 4000
    agent = Agent(gamma=0.99,
                  epsilon=0.1,
                  lr=lr,
                  input_dims=env.observation_space.shape,
                  n_actions=env.action_space.n,
                  mem_size=100000,
                  batch_size=64,
                  epsilon_end=0.01,
                  fname='_New_R.h5')
    scores = []
    eps_history = []
    dones = np.zeros(n_games)
    observation = env.reset()
    action = agent.choose_action(observation)
    observation_, reward, done, info = env.step(action)
    # games = 50
#     scores = np.random.choice([1, 0],50)
#     epsilon = np.linspace(1, 0.1)
#     print(scores)
#     plot_success_rate(scores, games)
Example #5
0
def main():
    n_games = 1000
    gamma = 0.01
    epsilon = 0.8
    lr = 0.001
    input_dims = 32
    batch_size = 64
    n_actions = len(encoded_actions)

    LandlordAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions)
    PeasantAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions)

    LandlordAI_wins = 0
    PeasantAI_wins = 0

    LandlordAI_winRates = []
    PeasantAI_winRates = []

    for i in range(n_games):
        if i % 50 == 0:
            print("game ", str(i))
        game = GameState()
        while game.get_winner() == -1:
            turn = game.turn
            observation = game.get_player_state(turn)
            possible_moves = game.legal_actions()
            possible_moves_indices = np.array(
                [encoded_actions[tuple(a)] for a in possible_moves])

            if turn == 0:
                action = LandlordAI.choose_action(observation,
                                                  possible_moves_indices)
                game.move(Play(decoded_actions[action]))
                observation_ = game.get_player_state(turn)
                if game.get_winner() != -1:
                    if game.get_winner() == 0:
                        reward = 1
                        LandlordAI_wins += 1
                    else:
                        reward = -1
                    done = True
                else:
                    reward = 0
                    done = False
                LandlordAI.store_transition(observation, action, reward,
                                            observation_, done)
                LandlordAI.learn()

            else:
                action = PeasantAI.choose_action(observation,
                                                 possible_moves_indices)
                game.move(Play(decoded_actions[action]))
                observation_ = game.get_player_state(turn)
                if game.get_winner() != -1:
                    if game.get_winner() == 0:
                        reward = -1
                    else:
                        reward = 1
                        PeasantAI_wins += 1
                    done = True
                else:
                    reward = 0
                    done = False
                PeasantAI.store_transition(observation, action, reward,
                                           observation_, done)
                PeasantAI.learn()

        LandlordAI_winRates.append(LandlordAI_wins / (i + 1))
        PeasantAI_winRates.append(PeasantAI_wins / (i + 1))

    plt.plot(LandlordAI_winRates)
    plt.plot(PeasantAI_winRates)
    plt.legend(['Landlord (DQN)', 'Peasant (DQN)'])
    plt.title('Win Rate vs. Games Played')
    plt.savefig('Win Rate vs. Games Played (DQN Landlord, DQN Peasant).png')

    print("Landlord Final Win Rate: ", str(LandlordAI_winRates[-1]))
    print("Peasant Final Win Rate: ", str(PeasantAI_winRates[-1]))
Example #6
0
    nrows=5000,
    names=['Local time', 'Open', 'High', 'Low', 'Close', 'Volume'])
df = df.drop('Volume', 1)

data_max = df['High'].max()
data_min = df['Low'].min()
df = np.array(df)
df = (df - data_min) / (data_max - data_min)

MIN_MEM_SIZE = 128
BATCH_SIZE = 32
WINDOW_SIZE = 90
EPISODE_LENGTH = len(df) - WINDOW_SIZE
ACTIONS = ['PASS', 'LONG', 'SHORT']

agent = Agent(WINDOW_SIZE)
win, loss = 0, 0

loop = tqdm(total=EPISODE_LENGTH, position=0, leave=False)
for ep in range(10):

    print('starting ep ', ep)
    ret = 0

    for t in range(EPISODE_LENGTH):
        loop.set_description('Training ...'.format(t))
        loop.update(1)

        done = False
        if t == 0:
            state = getState(df, t, WINDOW_SIZE)  # (90, 4) size
Example #7
0
import tensorflow as tf
import gym_airsim.envs
import gym_airsim

if __name__ == '__main__':
    print(gym.envs.registry.all())
    tf.compat.v1.disable_eager_execution()
    env_name = 'AirSimEnv-v42'
    env = gym.make(env_name)
    lr = 0.001
    n_games = 500
    agent = Agent(gamma=0.99,
                  epsilon=1.0,
                  lr=lr,
                  input_dims=env.observation_space.shape,
                  n_actions=env.action_space.n,
                  mem_size=100000,
                  batch_size=64,
                  epsilon_end=0.01,
                  fname=env_name + '.h5')
    scores = []
    eps_history = []

    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            # env.render()
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)