Ejemplo n.º 1
0
    def __init__(self, args, exp_model, logging_funcs):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_funcs["log"]
        self.env = logging_funcs["env"]

        self.replay = ExpReplay(args.exp_replay_size,
                                args.stale_limit,
                                exp_model,
                                args,
                                priority=self.args.prioritized)
        self.bonus_replay = ExpReplay(args.bonus_replay_size,
                                      args.stale_limit,
                                      exp_model,
                                      args,
                                      priority=self.args.prioritized)

        self.bonus_replay_stuff = 0

        # DQN and Target DQN
        model = get_models(args.model)
        self.dqn = model(actions=args.actions)
        self.target_dqn = model(actions=args.actions)

        dqn_params = 0
        for weight in self.dqn.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            dqn_params += weight_params
        print("DQN has {:,} parameters.".format(dqn_params))

        self.target_dqn.eval()

        if args.gpu:
            print("Moving models to GPU.")
            self.dqn.cuda()
            self.target_dqn.cuda()

        # Optimizer
        self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr)
        # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max

        self.max_bonus = 0

        self.player_bonus_positions = []
Ejemplo n.º 2
0
    def __init__(self, args, exp_model, logging_func):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_func["log"]

        # Experience Replay
        self.replay = ExpReplay(args.exp_replay_size, args)
        self.dnds = [
            DND(kernel=kernel,
                num_neighbors=args.nec_neighbours,
                max_memory=args.dnd_size,
                embedding_size=args.nec_embedding)
            for _ in range(self.args.actions)
        ]

        # DQN and Target DQN
        model = get_models(args.model)
        self.embedding = model(embedding=args.nec_embedding)

        embedding_params = 0
        for weight in self.embedding.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            embedding_params += weight_params
        print(
            "Embedding Network has {:,} parameters.".format(embedding_params))

        if args.gpu:
            print("Moving models to GPU.")
            self.embedding.cuda()

        # Optimizer
        self.optimizer = RMSprop(self.embedding.parameters(), lr=args.lr)
        # self.optimizer = Adam(self.embedding.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max

        self.experiences = []
        self.keys = []
        self.q_val_estimates = []

        self.table_updates = 0
Ejemplo n.º 3
0
    def __init__(self, args, exp_model, logging_func):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_func["log"]
        self.log_image = logging_func["image"]
        os.makedirs("{}/transition_model".format(args.log_path))

        # Experience Replay
        self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized)

        # DQN and Target DQN
        model = get_models(args.model)
        print("\n\nDQN")
        self.dqn = model(actions=args.actions)
        print("Target DQN")
        self.target_dqn = model(actions=args.actions)

        dqn_params = 0
        for weight in self.dqn.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            dqn_params += weight_params
        print("Model DQN has {:,} parameters.".format(dqn_params))

        self.target_dqn.eval()

        if args.gpu:
            print("Moving models to GPU.")
            self.dqn.cuda()
            self.target_dqn.cuda()

        # Optimizer
        # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr)
        self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max

        # Action sequences
        self.actions_to_take = []
Ejemplo n.º 4
0
    def __init__(self, args, exp_model, logging_func):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_func["log"]

        # Experience Replay
        if self.args.set_replay:
            self.replay = ExpReplaySet(10, 10, exp_model, args, priority=False)
        else:
            self.replay = ExpReplay(args.exp_replay_size,
                                    args.stale_limit,
                                    exp_model,
                                    args,
                                    priority=self.args.prioritized)

        # DQN and Target DQN
        model = get_models(args.model)
        self.dqn = model(actions=args.actions)
        self.target_dqn = model(actions=args.actions)

        dqn_params = 0
        for weight in self.dqn.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            dqn_params += weight_params
        print("DQN has {:,} parameters.".format(dqn_params))

        self.target_dqn.eval()

        if args.gpu:
            print("Moving models to GPU.")
            self.dqn.cuda()
            self.target_dqn.cuda()

        # Optimizer
        # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr)
        self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max
Ejemplo n.º 5
0
    def __init__(self, args, exp_model, logging_func):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_func["log"]

        # Experience Replay
        if self.args.set_replay:
            self.replay = ExpReplaySet(10, 10, exp_model, args, priority=False)
        else:
            self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized)

        # DQN and Target DQN
        model = get_models(args.model)
        self.dqn = model(actions=args.actions)
        self.target_dqn = model(actions=args.actions)

        dqn_params = 0
        for weight in self.dqn.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            dqn_params += weight_params
        print("DQN has {:,} parameters.".format(dqn_params))

        self.target_dqn.eval()

        if args.gpu:
            print("Moving models to GPU.")
            self.dqn.cuda()
            self.target_dqn.cuda()

        # Optimizer
        # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr)
        self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max
Ejemplo n.º 6
0
    def __init__(self, args, exp_model, logging_func):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_func["log"]

        # Experience Replay
        self.replay = ExpReplay(args.exp_replay_size, args)
        self.dnds = [DND(kernel=kernel, num_neighbors=args.nec_neighbours, max_memory=args.dnd_size, embedding_size=args.nec_embedding) for _ in range(self.args.actions)]

        # DQN and Target DQN
        model = get_models(args.model)
        self.embedding = model(embedding=args.nec_embedding)

        embedding_params = 0
        for weight in self.embedding.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            embedding_params += weight_params
        print("Embedding Network has {:,} parameters.".format(embedding_params))

        if args.gpu:
            print("Moving models to GPU.")
            self.embedding.cuda()

        # Optimizer
        self.optimizer = RMSprop(self.embedding.parameters(), lr=args.lr)
        # self.optimizer = Adam(self.embedding.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max

        self.experiences = []
        self.keys = []
        self.q_val_estimates = []

        self.table_updates = 0
Ejemplo n.º 7
0
    print(" {}: {}".format(arg, getattr(args, arg)))
print("=" * 40)
print()

# Experience Replay
replay = None
if args.prioritized:
    replay = Prioritised_ExpReplay_Options(args.exp_replay_size)
else:
    replay = ExperienceReplay_Options(args.exp_replay_size)

# DQN
print("\n" + "=" * 40)
print(16 * " " + "Models:" + " " * 16)
print("=" * 40)
dqn = get_models(args.model)(name="DQN")
print()
target_dqn = get_models(args.model)(name="Target_DQN")
print("=" * 40)

# Optimizer
optimiser = tf.train.AdamOptimizer(args.lr)

# Tensorflow Operations
with tf.name_scope("Sync_Target_DQN"):
    dqn_vars = dqn.variables
    target_dqn_vars = target_dqn.variables
    sync_vars_list = []
    for (ref, val) in zip(target_dqn_vars, dqn_vars):
        sync_vars_list.append(tf.assign(ref, val))
    sync_vars = tf.group(*sync_vars_list)
Ejemplo n.º 8
0
    def __init__(self, args, exp_model, logging_funcs):
        self.args = args

        self.log = logging_funcs["log"]
        self.log_image = logging_funcs["image"]
        os.makedirs("{}/goal_states".format(args.log_path))
        self.env = logging_funcs["env"]

        # Exploration Model
        self.exp_model = exp_model

        # Experience Replay
        self.replay = ExpReplay(args.exp_replay_size,
                                args.stale_limit,
                                exp_model,
                                args,
                                priority=self.args.prioritized)

        # DQN and Target DQN
        model = get_models(args.model)
        self.model = model
        self.dqn = model(actions=args.actions)
        self.target_dqn = model(actions=args.actions)

        dqn_params = 0
        for weight in self.dqn.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            dqn_params += weight_params
        print("DQN has {:,} parameters.".format(dqn_params))

        self.target_dqn.eval()

        if args.gpu:
            print("Moving models to GPU.")
            self.dqn.cuda()
            self.target_dqn.cuda()

        # Optimizer
        self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr)
        # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max

        # Hierarhcical stuff
        self.goal_state_T = 0
        self.goal_state = None

        self.max_bonus = 0

        self.goal_optimizer = None

        self.goal_dqn = None
        self.goal_dqns = []
        self.goal_dqn_states = []

        self.executing_option = False
        self.option_num = 0
        self.option_steps = 0

        self.training_goal_T = 0