def __init__(self, qec_table, ec_discount, num_actions, epsilon_start, epsilon_min, epsilon_decay, exp_pref, rng): self.qec_table = qec_table self.ec_discount = ec_discount self.num_actions = num_actions self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.exp_pref = exp_pref self.rng = rng self.trace_list = EC_functions.TraceRecorder() self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 # CREATE A FOLDER TO HOLD RESULTS time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) self.exp_dir = self.exp_pref + time_str + \ "{}".format(self.ec_discount).replace(".", "p") try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self._open_results_file() self.step_counter = None self.episode_reward = None self.total_reward = 0. self.total_episodes = 0 self.start_time = None self.last_img = None self.last_action = None self.steps_sec_ema = 0.
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) agent = None if parameters.use_episodic_control: if parameters.qec_table is None: qec_table = EC_functions.QECTable( parameters.knn, parameters.state_dimension, parameters.projection_type, defaults.RESIZED_WIDTH * defaults.RESIZED_HEIGHT, parameters.buffer_size, num_actions, rng) else: handle = open(parameters.qec_table, 'r') qec_table = pickle.load(handle) agent = EC_agent.EpisodicControl(qec_table, parameters.ec_discount, num_actions, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.experiment_prefix, rng) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) # FOR VISUALIZATION USE_SDL = False if parameters.display_screen: if USE_SDL: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) agent = None if parameters.use_episodic_control: if parameters.qec_table is None: qec_table = EC_functions.QECTable( parameters.knn, parameters.state_dimension, parameters.projection_type, defaults.RESIZED_WIDTH * defaults.RESIZED_HEIGHT, parameters.buffer_size, num_actions, rng) else: handle = open(parameters.qec_table, 'r') qec_table = cPickle.load(handle) #If this doesnt work load using the function below # def try_to_load_as_pickled_object_or_None(filepath): # """ # This is a defensive way to write pickle.load, allowing for very large files on all platforms # """ # max_bytes = 2 ** 31 - 1 # try: # input_size = os.path.getsize(filepath) # bytes_in = bytearray(0) # with open(filepath, 'rb') as f_in: # for _ in range(0, input_size, max_bytes): # bytes_in += f_in.read(max_bytes) # obj = cPickle.loads(bytes_in) # except: # return None # return obj # qec_table = try_to_load_as_pickled_object_or_None(handle) agent = IBL_agent.EpisodicControl(qec_table, parameters.ec_discount, num_actions, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.experiment_prefix, rng) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) experiment.run()
def __init__(self, qec_table, ec_discount, num_actions, epsilon_start, epsilon_min, epsilon_decay, exp_pref, rng): self.qec_table = qec_table self.ec_discount = ec_discount self.num_actions = num_actions self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.exp_pref = exp_pref self.rng = rng self.trace_list = EC_functions.TraceRecorder() self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 # CREATE IBL AGENT # Older # attrs = OrderedDict({'key' + str(i): i for i in range(0, 63)}) # Maybe 64! # self.DM = Agent('DM', attrs.keys()) # Newest self.DM = Agent('DM',['state']) # no need to have anything here at the beginning self.DM.defaultUtility = 1000 # Instead you can prepopulate with the random initial actions in the while loop in ale_experiment.py def eucl(v1, v2): v1 = np.array(v1) v2 = np.array(v2) return np.linalg.norm(v1 - v2) # Init Partial Matching self.DM.similarity('state', eucl) self.DM.mismatchPenalty = 0.5 # Create general situation and situationdecision sets. They will be updated later on. self.situation = Situation(state=()) # state attribute is empty TUPLE self.situationdecisions = [SituationDecision(str(action), self.situation) for action in range(self.num_actions)] # You might need to pre-populate with few instances # CREATE A FOLDER TO HOLD RESULTS time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) self.exp_dir = self.exp_pref + time_str + \ "{}".format(self.ec_discount).replace(".", "p") try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self._open_results_file() self.step_counter = None self.episode_reward = None self.total_reward = 0. self.total_episodes = 0 self.start_time = None self.last_img = None self.last_action = None self.steps_sec_ema = 0.
def __init__(self, session, args): self.n_input = args.input_size # Number of features in each observation self.num_obs = 2 # Number of observations in each state self.n_actions = args.num_actions # Number of output q_values self.discount = args.discount # Discount factor self.epsilon = args.epsilon # Epsilon self.learning_rate = args.learning_rate self.beta = args.beta self.delta = 0.01 self.number_nn = 50 self.layer_sizes = [self.n_input] + args.layer_sizes self.session = session self.memory = ReplayMemory(args) self.old_way = False # Tensorflow variables: # Model for Embeddings self.state = tf.placeholder("float", [None, self.n_input]) self.action = tf.placeholder(tf.int64, [None]) with tf.variable_scope('embedding'): self.state_embeddings, self.weights = self.network( self.state, self.layer_sizes) # DNDs self.DNDs = [] for a in xrange(self.n_actions): new_DND = EC_functions.LRU_KNN( 5000, self.state_embeddings.get_shape()[-1]) self.DNDs.append(new_DND) # DND Calculations (everything from here on needs these placeholders filled) if self.old_way: self.dnd_embeddings = tf.placeholder( "float", [None, None, self.state_embeddings.get_shape()[-1]], name="dnd_embeddings") self.dnd_values = tf.placeholder("float", [None, None], name="dnd_values") else: # Call on DND directly embs_and_values = tf.py_func(self.get_nearest_neighbours, [self.state_embeddings, self.action], [tf.float64, tf.float64]) self.dnd_embeddings = tf.to_float(embs_and_values[0]) self.dnd_values = tf.to_float(embs_and_values[1]) weightings = 1.0 / (tf.reduce_sum(tf.square( self.dnd_embeddings - tf.expand_dims(self.state_embeddings, 1)), axis=2) + [self.delta]) normalised_weightings = weightings / tf.reduce_sum( weightings, axis=1, keep_dims=True) #keep dims for broadcasting if self.beta == 0: self.pred_q = tf.reduce_sum(self.dnd_values * normalised_weightings, axis=1) #self.pred_q = tf.reduce_mean(self.dnd_values, axis=1) else: self.pred_q = tf.log( tf.reduce_sum(tf.exp(self.beta * self.dnd_values) * normalised_weightings, axis=1)) # Loss Function self.target_q = tf.placeholder("float", [None]) self.td_err = self.target_q - self.pred_q total_loss = tf.reduce_sum(tf.square(self.td_err)) self.optim = tf.train.AdamOptimizer( self.learning_rate).minimize(total_loss)
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) agent = None if parameters.method == 'ec_dqn': if parameters.nn_file is None: network = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, use_ec=True, double=parameters.double_dqn) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) if parameters.qec_table is None: qec_table = EC_functions.QECTable(parameters.knn, parameters.state_dimension, parameters.projection_type, defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT, parameters.buffer_size, num_actions, rng, parameters.rebuild_knn_frequency) else: handle = open(parameters.qec_table, 'r') qec_table = cPickle.load(handle) agent = ale_agents.EC_DQN(network, qec_table, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, parameters.ec_discount, num_actions, parameters.ec_testing, rng) if parameters.method == 'dqn_episodic_memory1': if parameters.nn_file is None: network = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, use_episodic_mem=True, double=parameters.double_dqn) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) if parameters.qec_table is None: qec_table = EC_functions.QECTable(parameters.knn, parameters.state_dimension, parameters.projection_type, defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT, parameters.buffer_size, num_actions, rng, parameters.rebuild_knn_frequency) else: handle = open(parameters.qec_table, 'r') qec_table = cPickle.load(handle) agent = ale_agents.NeuralNetworkEpisodicMemory1(network, qec_table, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, parameters.ec_discount, num_actions, parameters.ec_testing, rng) if parameters.method == 'dqn_episodic_memory2': if parameters.nn_file is None: network = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, use_episodic_mem=True, double=parameters.double_dqn) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) if parameters.qec_table is None: qec_table = EC_functions.QECTable(parameters.knn, parameters.state_dimension, parameters.projection_type, defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT, parameters.buffer_size, num_actions, rng, parameters.rebuild_knn_frequency) else: handle = open(parameters.qec_table, 'r') qec_table = cPickle.load(handle) if parameters.method == 'dqn_episodic_memory3': if parameters.nn_file is None: network = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, use_episodic_mem=True, double=parameters.double_dqn) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) if parameters.qec_table is None: qec_table = EC_functions.LshHash(parameters.state_dimension, defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT, parameters.buffer_size, rng) else: handle = open(parameters.qec_table, 'r') qec_table = cPickle.load(handle) agent = ale_agents.NeuralNetworkEpisodicMemory3(network, qec_table, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, parameters.ec_discount, num_actions, parameters.ec_testing, rng) if parameters.method == 'dqn': if parameters.nn_file is None: network = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, double=parameters.double_dqn) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agents.NeuralAgent(network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) if parameters.method == 'episodic_control': if parameters.qec_table is None: qec_table = EC_functions.QECTable(parameters.knn, parameters.state_dimension, parameters.projection_type, defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT, parameters.buffer_size, num_actions, rng, parameters.rebuild_knn_frequency) else: handle = open(parameters.qec_table, 'r') qec_table = cPickle.load(handle) agent = ale_agents.EpisodicControl(qec_table, parameters.ec_discount, num_actions, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.experiment_prefix, parameters.ec_testing, rng) experiment = ale_experiment.ALEExperiment(ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) experiment.run()