def __init__(self, args, exp_model, logging_funcs): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_funcs["log"] self.env = logging_funcs["env"] self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) self.bonus_replay = ExpReplay(args.bonus_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) self.bonus_replay_stuff = 0 # DQN and Target DQN model = get_models(args.model) self.dqn = model(actions=args.actions) self.target_dqn = model(actions=args.actions) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr) # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max self.max_bonus = 0 self.player_bonus_positions = []
def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_func["log"] # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args) self.dnds = [ DND(kernel=kernel, num_neighbors=args.nec_neighbours, max_memory=args.dnd_size, embedding_size=args.nec_embedding) for _ in range(self.args.actions) ] # DQN and Target DQN model = get_models(args.model) self.embedding = model(embedding=args.nec_embedding) embedding_params = 0 for weight in self.embedding.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s embedding_params += weight_params print( "Embedding Network has {:,} parameters.".format(embedding_params)) if args.gpu: print("Moving models to GPU.") self.embedding.cuda() # Optimizer self.optimizer = RMSprop(self.embedding.parameters(), lr=args.lr) # self.optimizer = Adam(self.embedding.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max self.experiences = [] self.keys = [] self.q_val_estimates = [] self.table_updates = 0
def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_func["log"] self.log_image = logging_func["image"] os.makedirs("{}/transition_model".format(args.log_path)) # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) # DQN and Target DQN model = get_models(args.model) print("\n\nDQN") self.dqn = model(actions=args.actions) print("Target DQN") self.target_dqn = model(actions=args.actions) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("Model DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max # Action sequences self.actions_to_take = []
def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_func["log"] # Experience Replay if self.args.set_replay: self.replay = ExpReplaySet(10, 10, exp_model, args, priority=False) else: self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) # DQN and Target DQN model = get_models(args.model) self.dqn = model(actions=args.actions) self.target_dqn = model(actions=args.actions) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max
def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_func["log"] # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args) self.dnds = [DND(kernel=kernel, num_neighbors=args.nec_neighbours, max_memory=args.dnd_size, embedding_size=args.nec_embedding) for _ in range(self.args.actions)] # DQN and Target DQN model = get_models(args.model) self.embedding = model(embedding=args.nec_embedding) embedding_params = 0 for weight in self.embedding.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s embedding_params += weight_params print("Embedding Network has {:,} parameters.".format(embedding_params)) if args.gpu: print("Moving models to GPU.") self.embedding.cuda() # Optimizer self.optimizer = RMSprop(self.embedding.parameters(), lr=args.lr) # self.optimizer = Adam(self.embedding.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max self.experiences = [] self.keys = [] self.q_val_estimates = [] self.table_updates = 0
print(" {}: {}".format(arg, getattr(args, arg))) print("=" * 40) print() # Experience Replay replay = None if args.prioritized: replay = Prioritised_ExpReplay_Options(args.exp_replay_size) else: replay = ExperienceReplay_Options(args.exp_replay_size) # DQN print("\n" + "=" * 40) print(16 * " " + "Models:" + " " * 16) print("=" * 40) dqn = get_models(args.model)(name="DQN") print() target_dqn = get_models(args.model)(name="Target_DQN") print("=" * 40) # Optimizer optimiser = tf.train.AdamOptimizer(args.lr) # Tensorflow Operations with tf.name_scope("Sync_Target_DQN"): dqn_vars = dqn.variables target_dqn_vars = target_dqn.variables sync_vars_list = [] for (ref, val) in zip(target_dqn_vars, dqn_vars): sync_vars_list.append(tf.assign(ref, val)) sync_vars = tf.group(*sync_vars_list)
def __init__(self, args, exp_model, logging_funcs): self.args = args self.log = logging_funcs["log"] self.log_image = logging_funcs["image"] os.makedirs("{}/goal_states".format(args.log_path)) self.env = logging_funcs["env"] # Exploration Model self.exp_model = exp_model # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) # DQN and Target DQN model = get_models(args.model) self.model = model self.dqn = model(actions=args.actions) self.target_dqn = model(actions=args.actions) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr) # self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max # Hierarhcical stuff self.goal_state_T = 0 self.goal_state = None self.max_bonus = 0 self.goal_optimizer = None self.goal_dqn = None self.goal_dqns = [] self.goal_dqn_states = [] self.executing_option = False self.option_num = 0 self.option_steps = 0 self.training_goal_T = 0