def main(model_path, n_test_episodes): run_paths = glob.glob(os.path.join(model_path, '*')) for run_path in run_paths: if len(glob.glob(os.path.join(run_path, 'carracing_results*'))) > 0: print(run_path, 'already processed') continue # Load run config run_config = json.load(open(os.path.join(run_path, 'config.json'), 'r')) env = gym.make("CarRacing-v0").unwrapped num_actions = 5 # Define networks and load agent if run_config['model'] == 'Resnet': Q_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device) Q_target_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to( device) elif run_config['model'] == 'Lenet': Q_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device) Q_target_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to( device) elif run_config['model'] == 'DeepQNetwork': Q_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device) Q_target_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to( device) else: raise ValueError('{} not implmented.'.format(run_config['model'])) agent = DQNAgent(Q=Q_net, Q_target=Q_target_net, num_actions=num_actions, **run_config) agent.load(os.path.join(run_path, 'agent.pt')) episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, history_length=run_config['history_length'], do_training=False, rendering=True, normalize_images=run_config['normalize_images'], skip_frames=run_config['skip_frames'], max_timesteps=1000) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() fname = "{}/carracing_results_dqn-{}.json".format(run_path, datetime.now().strftime("%Y%m%d-%H%M%S")) fh = open(fname, "w") json.dump(results, fh) fh.close() env.close() print('... finished')
def build(path): # create world world = World(path, thread_num=args.thread) # create agents agents = [] for i in world.intersections: action_space = gym.spaces.Discrete(len(i.phases)) agents.append( DQNAgent( action_space, LaneVehicleGenerator(world, i, ["lane_count"], in_only=True, average=None), LaneVehicleGenerator(world, i, ["lane_waiting_count"], in_only=True, average="all", negative=True), i.id)) if args.load_model: agents[-1].load_model(args.save_dir) # if len(agents) == 5: # break # create metric metric = TravelTimeMetric(world) # create env env = TSCEnv(world, agents, metric) return world, agents, env
def _single_dqn_test_demo(): catch_game_object = MultiPlayerCatch(1, board_size=20, food_spawn_rate=0.05) visualizer = ImageStateVisualizator('MPCatch visualization', 10) recorder = ImageStateRecorder('MPCatch_rgb_trained_network_results') model = load_trained_model('final.h5') agent = DQNAgent(model, 10000) agent.train(catch_game_object, epochs=100000, batch_size=50, gamma=0.9, epsilon=0.1, visualizer=visualizer)
def init(args, test=False): tf_mute_warning() args.save_dir = save_dir + args.config_file[7:-5] if test: args.save_dir = save_dir + args.config_file[7:-10] # config_name = args.config_file.split('/')[1].split('.')[0] # args.agent_save_dir = args.save_dir + "/" + config_name if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) logger = logging.getLogger('main') logger.setLevel(logging.DEBUG) fh = logging.FileHandler( os.path.join(args.log_dir, datetime.now().strftime('%Y%m%d-%H%M%S') + ".log")) fh.setLevel(logging.DEBUG) sh = logging.StreamHandler() sh.setLevel(logging.INFO) logger.addHandler(fh) logger.addHandler(sh) # create world world = World(args.config_file, thread_num=args.thread, silent=True) # create agents agents = [] for i in world.intersections: action_space = gym.spaces.Discrete(len(i.phases)) agents.append( DQNAgent( action_space, LaneVehicleGenerator(world, i, ["lane_count"], in_only=True, average=None), LaneVehicleGenerator(world, i, ["lane_waiting_count"], in_only=True, average="all", negative=True), i.id)) if args.load_model: agents[-1].load_model(args.save_dir) if args.share_weights: model = agents[0].model for agent in agents: agent.model = model # create metric metric = TravelTimeMetric(world) # create env env = TSCEnv(world, agents, metric) return env
def create(self, model): parameters = self._parameters #行動の数 actions_count = 2 #方策(ここでいう方策とは、greedyかε-greedyということ) policies = self._create_policies(model, parameters, actions_count) #経験を記憶する memory = Memory(parameters["memory_size"]) #割引率γ gamma = parameters["gamma"] #replay_start_memory_size個のデータが貯まるまで学習を開始しない replay_start_memory_size = parameters["replay_start_memory_size"] #experience_replayするときのデータ数 replay_count = parameters["replay_count"] #学習する間隔を少し開ける training_interval_steps = parameters["training_interval_steps"] #optimizerを生成する optimizer_parameters = (parameters["optimizer"]["alpha"], parameters["optimizer"]["epsilon"]) optimizer = optimizers.Adam(alpha=optimizer_parameters[0], eps=optimizer_parameters[1]) optimizer.setup(model) #モデルを更新する処理 model_updater = SoftModelUpdater(parameters["tau"]) #agent生成 agent = DQNAgent(gamma, model, optimizer, model_updater, memory, replay_start_memory_size, replay_count, training_interval_steps, policies) return agent
def make_random_agents(): return [DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, STARTING_EPSILON, E_MIN, E_DECAY, GAMMA)] * N_AGENTS
def copy_agent(agent): weights = agent.model.get_weights() copied_model = clone_model(agent.model) copied = DQNAgent(*agent.get_init_info()) copied.set_model(copied_model, weights) return copied
import numpy as np np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... state_dim = 4 num_actions = 2 Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) agent = DQNAgent(Q, Q_target, num_actions) agent.load("C:\\Users\\Monish\\Desktop\\workspace\\exercise3_R\\reinforcement_learning\\models_cartpole\\dqn_agent_199.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std()
import numpy as np np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... states_dim = 4 action_dim = 2 Q = MLP(states_dim, action_dim) Q_target = MLP(states_dim, action_dim) agent = DQNAgent(Q, Q_target, action_dim, double=True) agent.load("./models_cartpole/dqn_agent_fixed_1.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", type=str, help="Model file to use", required=True) parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=5, required=False) args = parser.parse_args() env = gym.make("CarRacing-v0").unwrapped history_length = 5 #TODO: Define networks and load agent # .... Q_network = CNN(history_length=history_length, n_classes=5) Q_target = CNN(history_length=history_length, n_classes=5) agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=5) agent.load(args.model) episode_rewards = [] for i in range(args.episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True, history_length=history_length) episode_rewards.append(stats.episode_reward) print('Episode %d - [ Reward %.2f ]' % (i+1, stats.episode_reward)) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"):
num_eval_episodes = 5 # evaluate on 5 episodes eval_cycle = 10 # evaluate every 10 episodes # You find information about cartpole in # https://github.com/openai/gym/wiki/CartPole-v0 # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. env = gym.make("CartPole-v0").unwrapped state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) # 2. init DQNAgent (see dqn/dqn_agent.py) # 3. train DQN agent with train_online(...) Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) agent = DQNAgent(Q, Q_target, num_actions, gamma=0.9, batch_size=32, epsilon=0.1, tau=0.01, lr=0.001, history_length=0) training, validation = train_online(env, agent, 200, eval_cycle, num_eval_episodes)
# store model. if i % eval_cycle == 0 or i >= (num_episodes - 1): agent.save(os.path.join(model_dir, "dqn_agent-3.pt")) tensorboard.close_session() if __name__ == "__main__": num_eval_episodes = 5 # evaluate on 5 episodes eval_cycle = 20 # evaluate every 10 episodes # You find information about cartpole in # https://github.com/openai/gym/wiki/CartPole-v0 # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. env = gym.make("CartPole-v0").unwrapped state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent(Q, Q_target, num_actions, history_length=1000000) # 3. train DQN agent with train_online(...) train_online(env=env, agent=agent, num_episodes=1000)
# if i % eval_cycle == 0 or i >= (num_episodes - 1): tensorboard_train.close_session() tensorboard_eval.close_session() if __name__ == "__main__": num_eval_episodes = 5 # evaluate on 5 episodes eval_cycle = 10 # evaluate every 10 episodes num_episodes = 10000 # You find information about cartpole in # https://github.com/openai/gym/wiki/CartPole-v0 # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. env = gym.make("CartPole-v0").unwrapped state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) # ... Q_target = MLP(state_dim, num_actions) Q = MLP(state_dim, num_actions) # 2. init DQNAgent (see dqn/dqn_agent.py) # agent = DQNAgent(Q, Q_target, num_actions, double=True, history_length=1e6) agent = DQNAgent(Q, Q_target, num_actions, double=True, epsilon=0.99, eps_decay=True, history_length=1e6) # 3. train DQN agent with train_online(...) train_online(env, agent, num_episodes, num_eval_episodes, eval_cycle)
if __name__ == "__main__": # You find information about cartpole in # https://github.com/openai/gym/wiki/CartPole-v0 # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. parser = argparse.ArgumentParser() parser.add_argument("-i", "--interrupt", action='store_true', help="Save model if interrupted", default=False, required=False) parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=500, required=False) parser.add_argument('-s', "--steps", type=int, help="num steps per episode", default=200, required=False) parser.add_argument("-r", "--render", action='store_true', help="render during training and evaluation", default=False, required=False) args = parser.parse_args() print(args) env = gym.make("CartPole-v0").unwrapped state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) Q_network = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=num_actions, buffer_size=1e5, lr=1e-4) # 3. train DQN agent with train_online(...) train_online(env=env, agent=agent, num_episodes=args.episodes, max_timesteps=args.steps, eval_cycle=20, num_eval_episodes=5, rendering=args.render, tensorboard_dir='./tensorboard', save_interrupt=args.interrupt)
def setup_ai(model_path): agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, EPSILON, None, None, 0.95) agent.load(model_path) return DQNAgentWrapper(agent, STACK_SIZE)
# Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. env = gym.make("CartPole-v0").unwrapped #import pdb; pdb.set_trace() state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) # 2. init DQNAgent (see dqn/dqn_agent.py) # 3. train DQN agent with train_online(...) # Duelling DQN or Not Duel = False num_episodes = 2000 if Duel: Q = MLP_Duel(state_dim, num_actions) Q_target = MLP_Duel(state_dim, num_actions) else: Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) DQNAgent = DQNAgent(Q, Q_target, num_actions, double=True, history_length=1e6) train_online(env, DQNAgent, num_episodes, epsilon_decay=False)
np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... state_dim = 4 num_actions = 2 Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) PATH = '/home/salem/Documents/freiburg/Lab/CarRacing/reinforcement_learning/models_cartpole/best_eval_dqn_agent.pt' Q.load_state_dict(torch.load(PATH), strict=False) Q_target.load_state_dict(torch.load(PATH), strict=False) agent = DQNAgent(Q, Q_target, num_actions, double=True) n_test_episodes = 150 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, eps=0.1, deterministic=True, do_training=False, rendering=True) print(stats.episode_reward) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict()
def __init__(self, *args, **kwargs): DQNAgent.__init__(self, *args, **kwargs)
def setup_ai(model_path): agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, None, None, None) agent.epsilon = 0.01 agent.load(model_path) return DQNAgentWrapper(agent, STACK_SIZE)
from agent.dqn_agent import DQNAgent from function import * import sys if len(sys.argv) != 4: print("Usage: python train.py [stock] [window] [episodes]") exit() stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]) agent = DQNAgent(window_size) data = getStockDataVec(stock_name) len_data = len(data) l = len(data) - 1 batch_size = 32 for e in range(episode_count + 1): print("Episode " + str(e) + "/" + str(episode_count)) state = getState(data, 0, window_size + 1, len_data) total_profits = [] total_profit = 0 agent.inventory = [] for t in range(l): action = agent.act(state) next_state = getState(data, t + 1, window_size + 1, len_data) reward = 0 if action == 1: # buy agent.inventory.append(data[t][1]) total_profits.append(['buy', total_profit])
logger.addHandler(sh) # create world world = World(args.config_file, thread_num=args.thread) # create agents agents = [] for i in world.intersections: action_space = gym.spaces.Discrete(len(i.phases)) agents.append( DQNAgent( action_space, LaneVehicleGenerator(world, i, ["lane_count"], in_only=True, average=None), LaneVehicleGenerator(world, i, ["lane_waiting_count"], in_only=True, average="all", negative=True), i.id)) if args.load_model: agents[-1].load_model(args.save_dir) # create metric metric = TravelTimeMetric(world) # create env env = TSCEnv(world, agents, metric)
from train_cartpole import run_episode from agent.networks import * import numpy as np from agent.networks import MLP import torch np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... Q = MLP(state_dim = 4,action_dim = 2) Q_target = MLP(state_dim = 4, action_dim = 2) agent = DQNAgent(Q, Q_target, num_actions = 2) agent.load("./models_cartpole/dqn_agent-perfect.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True,epsilon=0) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"):