def main(): # 创建环境 traci.start([sumoBinary, "-c", "data/cross.sumocfg", "--tripinfo-output", "tripinfo.xml"]) traci.trafficlight.setPhase("0", 0) act_dim = 2 obs_dim = 1440 # (10, 24, 6) # 使用PARL框架创建agent model = Model(act_dim) algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = Agent(algorithm, obs_dim, act_dim) # 加载模型 if os.path.exists('./DQNmodel.ckpt'): save_path = './DQNmodel.ckpt' agent.restore(save_path) print("模型加载成功") env = 0 # 创建经验池 rpm = ReplayMemory(MEMORY_SIZE) # 往经验池中预存数据 while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(agent, env, rpm) episode = 0 while episode < TRAIN_EPISODE: print("=============================") print("episode:",episode) total_reward, steps = run_episode(agent, env, rpm) episode += 1 eval_reward = evaluate(env, agent, render=False) logger.info('episode:{} test_reward:{}'.format( episode, eval_reward)) save_path = './dqnmodel/model_{}_{}.ckpt'.format(episode, total_reward) agent.save(save_path) # 保存模型到文件 ./model.ckpt agent.save('./DQNmodel.ckpt')
sns.lineplot(x='Episode', y='Success Rate', color='C0', data=data) plt.savefig('plots/Success_rate_%s' % str(game)) plt.clf() if __name__ == '__main__': tf.compat.v1.disable_eager_execution() env_name = 'AirSimEnv-v42' env = gym.make(env_name) lr = 0.0005 n_games = 3000 agent = Agent(gamma=0.99, epsilon=1.1, lr=lr, input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=100000, batch_size=64, epsilon_dec=0.95, epsilon_end=0.01, fname='_New_R_target.h5') scores = [] eps_history = [] dones = [] data = {'eps_history': [], 'scores': [], 'dones': []} # with open('data.json') as json_data: # data = json.load(json_data) # print(data) # scores = data['scores'] # eps_history = data['eps_history'] # dones = data['dones'] '''---------------------------------------evaluate main------------------------------------------'''
plt.ylabel('Average Q-length every 8 seconds') plt.xticks(index, label) axes = plt.gca() axes.set_ylim([0, 20]) plt.show() AVG_Q_len_perepisode.append(sum_q_lens / 702) sum_q_lens = 0 if __name__ == '__main__': act_dim = 2 obs_dim = 1440 # (10, 24, 6) # 使用PARL框架创建agent model = Model(act_dim) algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = Agent(algorithm, obs_dim, act_dim) # 加载模型 if os.path.exists('./DQNmodel.ckpt'): save_path = './DQNmodel.ckpt' agent.restore(save_path) print("模型加载成功") test(agent)
print(x[:2]) if __name__ == '__main__': # for i in range(10): # test() tf.compat.v1.disable_eager_execution() env_name = 'AirSimEnv-v42' env = gym.make(env_name) lr = 0.0005 n_games = 4000 agent = Agent(gamma=0.99, epsilon=0.1, lr=lr, input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=100000, batch_size=64, epsilon_end=0.01, fname='_New_R.h5') scores = [] eps_history = [] dones = np.zeros(n_games) observation = env.reset() action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) # games = 50 # scores = np.random.choice([1, 0],50) # epsilon = np.linspace(1, 0.1) # print(scores) # plot_success_rate(scores, games)
def main(): n_games = 1000 gamma = 0.01 epsilon = 0.8 lr = 0.001 input_dims = 32 batch_size = 64 n_actions = len(encoded_actions) LandlordAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions) PeasantAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions) LandlordAI_wins = 0 PeasantAI_wins = 0 LandlordAI_winRates = [] PeasantAI_winRates = [] for i in range(n_games): if i % 50 == 0: print("game ", str(i)) game = GameState() while game.get_winner() == -1: turn = game.turn observation = game.get_player_state(turn) possible_moves = game.legal_actions() possible_moves_indices = np.array( [encoded_actions[tuple(a)] for a in possible_moves]) if turn == 0: action = LandlordAI.choose_action(observation, possible_moves_indices) game.move(Play(decoded_actions[action])) observation_ = game.get_player_state(turn) if game.get_winner() != -1: if game.get_winner() == 0: reward = 1 LandlordAI_wins += 1 else: reward = -1 done = True else: reward = 0 done = False LandlordAI.store_transition(observation, action, reward, observation_, done) LandlordAI.learn() else: action = PeasantAI.choose_action(observation, possible_moves_indices) game.move(Play(decoded_actions[action])) observation_ = game.get_player_state(turn) if game.get_winner() != -1: if game.get_winner() == 0: reward = -1 else: reward = 1 PeasantAI_wins += 1 done = True else: reward = 0 done = False PeasantAI.store_transition(observation, action, reward, observation_, done) PeasantAI.learn() LandlordAI_winRates.append(LandlordAI_wins / (i + 1)) PeasantAI_winRates.append(PeasantAI_wins / (i + 1)) plt.plot(LandlordAI_winRates) plt.plot(PeasantAI_winRates) plt.legend(['Landlord (DQN)', 'Peasant (DQN)']) plt.title('Win Rate vs. Games Played') plt.savefig('Win Rate vs. Games Played (DQN Landlord, DQN Peasant).png') print("Landlord Final Win Rate: ", str(LandlordAI_winRates[-1])) print("Peasant Final Win Rate: ", str(PeasantAI_winRates[-1]))
nrows=5000, names=['Local time', 'Open', 'High', 'Low', 'Close', 'Volume']) df = df.drop('Volume', 1) data_max = df['High'].max() data_min = df['Low'].min() df = np.array(df) df = (df - data_min) / (data_max - data_min) MIN_MEM_SIZE = 128 BATCH_SIZE = 32 WINDOW_SIZE = 90 EPISODE_LENGTH = len(df) - WINDOW_SIZE ACTIONS = ['PASS', 'LONG', 'SHORT'] agent = Agent(WINDOW_SIZE) win, loss = 0, 0 loop = tqdm(total=EPISODE_LENGTH, position=0, leave=False) for ep in range(10): print('starting ep ', ep) ret = 0 for t in range(EPISODE_LENGTH): loop.set_description('Training ...'.format(t)) loop.update(1) done = False if t == 0: state = getState(df, t, WINDOW_SIZE) # (90, 4) size
import tensorflow as tf import gym_airsim.envs import gym_airsim if __name__ == '__main__': print(gym.envs.registry.all()) tf.compat.v1.disable_eager_execution() env_name = 'AirSimEnv-v42' env = gym.make(env_name) lr = 0.001 n_games = 500 agent = Agent(gamma=0.99, epsilon=1.0, lr=lr, input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=100000, batch_size=64, epsilon_end=0.01, fname=env_name + '.h5') scores = [] eps_history = [] for i in range(n_games): done = False score = 0 observation = env.reset() while not done: # env.render() action = agent.choose_action(observation) observation_, reward, done, info = env.step(action)