コード例 #1
0
def main(model_path, n_test_episodes):
    run_paths = glob.glob(os.path.join(model_path, '*'))
    for run_path in run_paths:
        if len(glob.glob(os.path.join(run_path, 'carracing_results*'))) > 0:
            print(run_path, 'already processed')
            continue
        # Load run config
        run_config = json.load(open(os.path.join(run_path, 'config.json'), 'r'))
        env = gym.make("CarRacing-v0").unwrapped

        num_actions = 5

        # Define networks and load agent
        if run_config['model'] == 'Resnet':
            Q_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device)
            Q_target_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(
                device)
        elif run_config['model'] == 'Lenet':
            Q_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device)
            Q_target_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(
                device)
        elif run_config['model'] == 'DeepQNetwork':
            Q_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device)
            Q_target_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(
                device)
        else:
            raise ValueError('{} not implmented.'.format(run_config['model']))

        agent = DQNAgent(Q=Q_net, Q_target=Q_target_net, num_actions=num_actions, **run_config)
        agent.load(os.path.join(run_path, 'agent.pt'))

        episode_rewards = []
        for i in range(n_test_episodes):
            stats = run_episode(env, agent, deterministic=True, history_length=run_config['history_length'],
                                do_training=False, rendering=True, normalize_images=run_config['normalize_images'],
                                skip_frames=run_config['skip_frames'], max_timesteps=1000)
            episode_rewards.append(stats.episode_reward)

        # save results in a dictionary and write them into a .json file
        results = dict()
        results["episode_rewards"] = episode_rewards
        results["mean"] = np.array(episode_rewards).mean()
        results["std"] = np.array(episode_rewards).std()
        fname = "{}/carracing_results_dqn-{}.json".format(run_path, datetime.now().strftime("%Y%m%d-%H%M%S"))
        fh = open(fname, "w")
        json.dump(results, fh)
        fh.close()

        env.close()
        print('... finished')
コード例 #2
0
def build(path):
    # create world
    world = World(path, thread_num=args.thread)

    # create agents
    agents = []
    for i in world.intersections:
        action_space = gym.spaces.Discrete(len(i.phases))
        agents.append(
            DQNAgent(
                action_space,
                LaneVehicleGenerator(world,
                                     i, ["lane_count"],
                                     in_only=True,
                                     average=None),
                LaneVehicleGenerator(world,
                                     i, ["lane_waiting_count"],
                                     in_only=True,
                                     average="all",
                                     negative=True), i.id))
        if args.load_model:
            agents[-1].load_model(args.save_dir)
        # if len(agents) == 5:
        #     break

    # create metric
    metric = TravelTimeMetric(world)

    # create env
    env = TSCEnv(world, agents, metric)
    return world, agents, env
コード例 #3
0
def _single_dqn_test_demo():
    catch_game_object = MultiPlayerCatch(1,
                                         board_size=20,
                                         food_spawn_rate=0.05)
    visualizer = ImageStateVisualizator('MPCatch visualization', 10)
    recorder = ImageStateRecorder('MPCatch_rgb_trained_network_results')

    model = load_trained_model('final.h5')

    agent = DQNAgent(model, 10000)
    agent.train(catch_game_object,
                epochs=100000,
                batch_size=50,
                gamma=0.9,
                epsilon=0.1,
                visualizer=visualizer)
コード例 #4
0
ファイル: run_dqn.py プロジェクト: mKafouros/PlanLight
def init(args, test=False):
    tf_mute_warning()
    args.save_dir = save_dir + args.config_file[7:-5]
    if test:
        args.save_dir = save_dir + args.config_file[7:-10]

    # config_name = args.config_file.split('/')[1].split('.')[0]
    # args.agent_save_dir = args.save_dir + "/" + config_name
    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    logger = logging.getLogger('main')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(
        os.path.join(args.log_dir,
                     datetime.now().strftime('%Y%m%d-%H%M%S') + ".log"))
    fh.setLevel(logging.DEBUG)
    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
    logger.addHandler(fh)
    logger.addHandler(sh)

    # create world
    world = World(args.config_file, thread_num=args.thread, silent=True)

    # create agents
    agents = []
    for i in world.intersections:
        action_space = gym.spaces.Discrete(len(i.phases))
        agents.append(
            DQNAgent(
                action_space,
                LaneVehicleGenerator(world,
                                     i, ["lane_count"],
                                     in_only=True,
                                     average=None),
                LaneVehicleGenerator(world,
                                     i, ["lane_waiting_count"],
                                     in_only=True,
                                     average="all",
                                     negative=True), i.id))
        if args.load_model:
            agents[-1].load_model(args.save_dir)
    if args.share_weights:
        model = agents[0].model
        for agent in agents:
            agent.model = model

    # create metric
    metric = TravelTimeMetric(world)

    # create env
    env = TSCEnv(world, agents, metric)

    return env
    def create(self, model):
        parameters = self._parameters

        #行動の数
        actions_count = 2

        #方策(ここでいう方策とは、greedyかε-greedyということ)
        policies = self._create_policies(model, parameters, actions_count)

        #経験を記憶する
        memory = Memory(parameters["memory_size"])

        #割引率γ
        gamma = parameters["gamma"]

        #replay_start_memory_size個のデータが貯まるまで学習を開始しない
        replay_start_memory_size = parameters["replay_start_memory_size"]

        #experience_replayするときのデータ数
        replay_count = parameters["replay_count"]

        #学習する間隔を少し開ける
        training_interval_steps = parameters["training_interval_steps"]

        #optimizerを生成する
        optimizer_parameters = (parameters["optimizer"]["alpha"],
                                parameters["optimizer"]["epsilon"])
        optimizer = optimizers.Adam(alpha=optimizer_parameters[0],
                                    eps=optimizer_parameters[1])
        optimizer.setup(model)

        #モデルを更新する処理
        model_updater = SoftModelUpdater(parameters["tau"])

        #agent生成
        agent = DQNAgent(gamma, model, optimizer, model_updater, memory,
                         replay_start_memory_size, replay_count,
                         training_interval_steps, policies)

        return agent
コード例 #6
0
def make_random_agents():
    return [DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, STARTING_EPSILON, E_MIN, E_DECAY, GAMMA)] * N_AGENTS
コード例 #7
0
def copy_agent(agent):
    weights = agent.model.get_weights()
    copied_model = clone_model(agent.model)
    copied = DQNAgent(*agent.get_init_info())
    copied.set_model(copied_model, weights)
    return copied
コード例 #8
0
import numpy as np

np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    state_dim = 4
    num_actions = 2
    
    Q = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    agent = DQNAgent(Q, Q_target, num_actions)
    agent.load("C:\\Users\\Monish\\Desktop\\workspace\\exercise3_R\\reinforcement_learning\\models_cartpole\\dqn_agent_199.pt")
 
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
コード例 #9
0
import numpy as np

np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    states_dim = 4
    action_dim = 2

    Q = MLP(states_dim, action_dim)
    Q_target = MLP(states_dim, action_dim)
    agent = DQNAgent(Q, Q_target, action_dim, double=True)
    agent.load("./models_cartpole/dqn_agent_fixed_1.pt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
コード例 #10
0
if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model", type=str, help="Model file to use", required=True)
    parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=5, required=False)
    args = parser.parse_args()

    env = gym.make("CarRacing-v0").unwrapped

    history_length =  5

    #TODO: Define networks and load agent
    # ....
    Q_network = CNN(history_length=history_length, n_classes=5)
    Q_target = CNN(history_length=history_length, n_classes=5)
    agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=5)
    agent.load(args.model)

    episode_rewards = []
    for i in range(args.episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True, history_length=history_length)
        episode_rewards.append(stats.episode_reward)
        print('Episode %d - [ Reward %.2f ]' % (i+1, stats.episode_reward))

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
    if not os.path.exists("./results"):
コード例 #11
0
    num_eval_episodes = 5  # evaluate on 5 episodes
    eval_cycle = 10  # evaluate every 10 episodes

    # You find information about cartpole in
    # https://github.com/openai/gym/wiki/CartPole-v0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.

    env = gym.make("CartPole-v0").unwrapped

    state_dim = 4
    num_actions = 2

    # TODO:
    # 1. init Q network and target network (see dqn/networks.py)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    # 3. train DQN agent with train_online(...)

    Q = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     gamma=0.9,
                     batch_size=32,
                     epsilon=0.1,
                     tau=0.01,
                     lr=0.001,
                     history_length=0)
    training, validation = train_online(env, agent, 200, eval_cycle,
                                        num_eval_episodes)
コード例 #12
0
        # store model.
        if i % eval_cycle == 0 or i >= (num_episodes - 1):
            agent.save(os.path.join(model_dir, "dqn_agent-3.pt"))
   
    tensorboard.close_session()


if __name__ == "__main__":

    num_eval_episodes = 5   # evaluate on 5 episodes
    eval_cycle = 20         # evaluate every 10 episodes

    # You find information about cartpole in 
    # https://github.com/openai/gym/wiki/CartPole-v0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.

    env = gym.make("CartPole-v0").unwrapped

    state_dim = 4
    num_actions = 2

    # TODO: 
    # 1. init Q network and target network (see dqn/networks.py)
    Q = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(Q, Q_target, num_actions, history_length=1000000)
    # 3. train DQN agent with train_online(...)
    train_online(env=env, agent=agent, num_episodes=1000)
 
コード例 #13
0
        # if i % eval_cycle == 0 or i >= (num_episodes - 1):

    tensorboard_train.close_session()
    tensorboard_eval.close_session()


if __name__ == "__main__":

    num_eval_episodes = 5   # evaluate on 5 episodes
    eval_cycle = 10       # evaluate every 10 episodes
    num_episodes = 10000
    # You find information about cartpole in
    # https://github.com/openai/gym/wiki/CartPole-v0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.

    env = gym.make("CartPole-v0").unwrapped

    state_dim = 4
    num_actions = 2

    # TODO:
    # 1. init Q network and target network (see dqn/networks.py)
    # ...
    Q_target = MLP(state_dim, num_actions)
    Q = MLP(state_dim, num_actions)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    # agent = DQNAgent(Q, Q_target, num_actions, double=True, history_length=1e6)
    agent = DQNAgent(Q, Q_target, num_actions, double=True, epsilon=0.99, eps_decay=True, history_length=1e6)
    # 3. train DQN agent with train_online(...)
    train_online(env, agent, num_episodes, num_eval_episodes, eval_cycle)
コード例 #14
0
if __name__ == "__main__":
    # You find information about cartpole in
    # https://github.com/openai/gym/wiki/CartPole-v0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--interrupt", action='store_true', help="Save model if interrupted",
                        default=False, required=False)
    parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=500, required=False)
    parser.add_argument('-s', "--steps", type=int, help="num steps per episode", default=200, required=False)
    parser.add_argument("-r", "--render", action='store_true', help="render during training and evaluation",
                        default=False, required=False)
    args = parser.parse_args()
    print(args)

    env = gym.make("CartPole-v0").unwrapped

    state_dim = 4
    num_actions = 2

    # TODO: 
    # 1. init Q network and target network (see dqn/networks.py)
    Q_network = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=num_actions, buffer_size=1e5, lr=1e-4)
    # 3. train DQN agent with train_online(...)
    train_online(env=env, agent=agent, num_episodes=args.episodes, max_timesteps=args.steps,
                 eval_cycle=20, num_eval_episodes=5, rendering=args.render,
                 tensorboard_dir='./tensorboard', save_interrupt=args.interrupt)
コード例 #15
0
ファイル: setup_gui_ai.py プロジェクト: nn-simon/PokerAI
def setup_ai(model_path):
    agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, EPSILON, None, None, 0.95)
    agent.load(model_path)
    return DQNAgentWrapper(agent, STACK_SIZE)
コード例 #16
0
ファイル: setup_gui_ai.py プロジェクト: nn-simon/PokerAI
def setup_ai(model_path):
    agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, EPSILON, None, None,
                     0.95)
    agent.load(model_path)
    return DQNAgentWrapper(agent, STACK_SIZE)
コード例 #17
0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.

    env = gym.make("CartPole-v0").unwrapped
    #import pdb; pdb.set_trace()
    state_dim = 4
    num_actions = 2

    # TODO:
    # 1. init Q network and target network (see dqn/networks.py)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    # 3. train DQN agent with train_online(...)

    # Duelling DQN or Not
    Duel = False

    num_episodes = 2000

    if Duel:
        Q = MLP_Duel(state_dim, num_actions)
        Q_target = MLP_Duel(state_dim, num_actions)
    else:
        Q = MLP(state_dim, num_actions)
        Q_target = MLP(state_dim, num_actions)

    DQNAgent = DQNAgent(Q,
                        Q_target,
                        num_actions,
                        double=True,
                        history_length=1e6)
    train_online(env, DQNAgent, num_episodes, epsilon_decay=False)
コード例 #18
0
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    state_dim = 4
    num_actions = 2
    Q = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    PATH = '/home/salem/Documents/freiburg/Lab/CarRacing/reinforcement_learning/models_cartpole/best_eval_dqn_agent.pt'
    Q.load_state_dict(torch.load(PATH), strict=False)
    Q_target.load_state_dict(torch.load(PATH), strict=False)
    agent = DQNAgent(Q, Q_target, num_actions, double=True)
    n_test_episodes = 150

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            eps=0.1,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        print(stats.episode_reward)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
コード例 #19
0
 def __init__(self, *args, **kwargs):
     DQNAgent.__init__(self, *args, **kwargs)
コード例 #20
0
def setup_ai(model_path):
    agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, None, None, None)
    agent.epsilon = 0.01
    agent.load(model_path)
    return DQNAgentWrapper(agent, STACK_SIZE)
コード例 #21
0
from agent.dqn_agent import DQNAgent
from function import *
import sys

if len(sys.argv) != 4:
	print("Usage: python train.py [stock] [window] [episodes]")
	exit()

stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])

agent = DQNAgent(window_size)
data = getStockDataVec(stock_name)
len_data = len(data)
l = len(data) - 1
batch_size = 32

for e in range(episode_count + 1):
	print("Episode " + str(e) + "/" + str(episode_count))
	state = getState(data, 0, window_size + 1, len_data)
	total_profits = []
	total_profit = 0
	agent.inventory = []

	for t in range(l):
		action = agent.act(state)
		next_state = getState(data, t + 1, window_size + 1, len_data)
		reward = 0	

		if action == 1: # buy
			agent.inventory.append(data[t][1])
			total_profits.append(['buy', total_profit])
コード例 #22
0
logger.addHandler(sh)

# create world
world = World(args.config_file, thread_num=args.thread)

# create agents
agents = []
for i in world.intersections:
    action_space = gym.spaces.Discrete(len(i.phases))
    agents.append(
        DQNAgent(
            action_space,
            LaneVehicleGenerator(world,
                                 i, ["lane_count"],
                                 in_only=True,
                                 average=None),
            LaneVehicleGenerator(world,
                                 i, ["lane_waiting_count"],
                                 in_only=True,
                                 average="all",
                                 negative=True), i.id))
    if args.load_model:
        agents[-1].load_model(args.save_dir)

# create metric
metric = TravelTimeMetric(world)

# create env
env = TSCEnv(world, agents, metric)

コード例 #23
0
from train_cartpole import run_episode
from agent.networks import *
import numpy as np
from agent.networks import MLP
import torch
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    Q = MLP(state_dim = 4,action_dim = 2)
    Q_target = MLP(state_dim = 4, action_dim = 2)
    agent = DQNAgent(Q, Q_target, num_actions = 2)
    agent.load("./models_cartpole/dqn_agent-perfect.pt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True,epsilon=0)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
    if not os.path.exists("./results"):