def __init__(self, weights_path, train_conv_layers): self.__angle_values = [-1, -0.5, 0, 0.5, 1] self.__nb_actions = 5 self.__gamma = 0.99 #Define the model activation = 'relu' pic_input = Input(shape=(59,255,3)) img_stack = Conv2D(16, (3, 3), name='convolution0', padding='same', activation=activation, trainable=train_conv_layers)(pic_input) img_stack = MaxPooling2D(pool_size=(2,2))(img_stack) img_stack = Conv2D(32, (3, 3), activation=activation, padding='same', name='convolution1', trainable=train_conv_layers)(img_stack) img_stack = MaxPooling2D(pool_size=(2, 2))(img_stack) img_stack = Conv2D(32, (3, 3), activation=activation, padding='same', name='convolution2', trainable=train_conv_layers)(img_stack) img_stack = MaxPooling2D(pool_size=(2, 2))(img_stack) img_stack = Flatten()(img_stack) img_stack = Dropout(0.2)(img_stack) img_stack = Dense(128, name='rl_dense', kernel_initializer=random_normal(stddev=0.01))(img_stack) img_stack=Dropout(0.2)(img_stack) output = Dense(self.__nb_actions, name='rl_output', kernel_initializer=random_normal(stddev=0.01))(img_stack) opt = Adam() self.__action_model = Model(inputs=[pic_input], outputs=output) self.__action_model.compile(optimizer=opt, loss='mean_squared_error') self.__action_model.summary() # If we are using pretrained weights for the conv layers, load them and verify the first layer. if (weights_path is not None and len(weights_path) > 0): print('Loading weights from my_model_weights.h5...') print('Current working dir is {0}'.format(os.getcwd())) self.__action_model.load_weights(weights_path, by_name=True) print('First layer: ') w = np.array(self.__action_model.get_weights()[0]) print(w) else: print('Not loading weights') # Set up the target model. # This is a trick that will allow the model to converge more rapidly. self.__action_context = tf.get_default_graph() self.__target_model = clone_model(self.__action_model) self.__target_context = tf.get_default_graph() self.__model_lock = threading.Lock()
def nngame_train( # pylint: disable=too-many-arguments,too-many-locals game, epochs=100, layer_sizes=(32, 32), dropout=0.2, verbosity=0, optimizer='sgd', loss='mean_squared_error'): """Train a neural network regression model This mostly exists as a proof of concept, individual testing should be done to make sure it is working sufficiently. This API will likely change to support more general architectures and training. """ utils.check(layer_sizes, 'must have at least one layer') utils.check(0 <= dropout < 1, 'dropout must be a valid probability') # This is for delayed importing inf tensor flow from keras import models, layers model = models.Sequential() lay_iter = iter(layer_sizes) model.add(layers.Dense( next(lay_iter), input_shape=[game.num_strats], activation='relu')) for units in lay_iter: model.add(layers.Dense(units, activation='relu')) if dropout: model.add(layers.Dropout(dropout)) model.add(layers.Dense(1, activation='sigmoid')) regs = [] offsets = np.empty(game.num_strats) scales = np.empty(game.num_strats) for i, profs, pays in _dev_profpay(game): # XXX Payoff normalization specific to sigmoid. If we accept alternate # models, we need a way to compute how to potentially normalize # payoffs. min_pay = pays.min() offsets[i] = min_pay max_pay = pays.max() scale = 1 if np.isclose(max_pay, min_pay) else max_pay - min_pay scales[i] = scale reg = models.clone_model(model) reg.compile(optimizer=optimizer, loss=loss) reg.fit(profs, (pays - min_pay) / scale, epochs=epochs, verbose=verbosity) regs.append(reg) return _DevRegressionGame( game, tuple(regs), offsets, scales, game.min_strat_payoffs(), game.max_strat_payoffs(), np.ones(game.num_strats, bool))
def train(self, env): memory = deque(maxlen=self.replay_memory) episode_number = 0 epsilon = self.init_epsilon epsilon_decay = (self.init_epsilon - self.final_epsilon) / self.epsilon_step_num global_step = 0 if self.resume: model = load_model(self.restore_file_path) epsilon = self.final_epsilon else: model = self.def_model() model_target = clone_model(model) model_target.set_weights(model.get_weights()) while episode_number < self.num_episode: done = False dead = False step, score, start_life = 0, 0, 5 loss = 0.0 observe = env.reset() for _ in range(random.randint(1, self.no_op_steps)): observe, score, done, info = env.step(1) if self.debug: print("Time Score: {0}".format(score)) state = self.pre_processing(observe) history = np.stack((state, state, state, state), axis=2) history = np.reshape([history], (1, 84, 84, 4)) while not done: if self.render: env.render() time.sleep(0.01) action = self.get_action(history, epsilon, global_step, model_target) if epsilon > self.final_epsilon and global_step > self.observe_step_num: epsilon -= epsilon_decay observe, reward, done, info = env.step(action) next_state = self.pre_processing(observe) next_state = np.reshape([next_state], (1, 84, 84, 1)) next_history = np.append(next_state, history[:, :, :, :3], axis=3) if start_life > info['ale.lives']: dead = True start_life = info['ale.lives'] self.store_memory(memory, history, action, reward, next_history, dead) if global_step > self.observe_step_num: loss = loss + self.train_memory_batch(memory, model) if global_step % self.refresh_target_model_num == 0: model_target.set_weights(model.get_weights()) score += reward if dead: dead = False else: history = next_history global_step += 1 step += 1 if done: if global_step <= self.observe_step_num: state = "observe" elif self.observe_step_num < global_step <= self.observe_step_num + self.epsilon_step_num: state = "explore" else: state = "train" print('state: {0}, episode: {1}, score: {2}'.format( state, episode_number, score)) if episode_number % 100 == 0 or (episode_number + 1) == self.num_episode: now = datetime.utcnow().strftime("%Y%m%d%H%M%S") file_name = "{0}_model_{1}.h5".format(self.game, now) model_path = os.path.join(self.train_dir, file_name) model.save(model_path) episode_number += 1
def clone_model(self): print("cloning model") """Returns a copy of a keras model.""" temp_model = clone_model(self.model) temp_model.set_weights(self.model.get_weights()) return temp_model
def agent_init(): a_globs.cur_epsilon = a_globs.EPSILON print("Epsilon at run start: {}".format(a_globs.cur_epsilon)) if a_globs.AGENT == a_globs.REWARD: num_outputs = 1 cur_activation = 'sigmoid' loss = { 'main_output': 'mean_squared_error', 'aux_output': 'mean_squared_error' } a_globs.non_zero_reward_buffer = [] a_globs.zero_reward_buffer = [] a_globs.buffer_container = [ a_globs.non_zero_reward_buffer, a_globs.zero_reward_buffer ] elif a_globs.AGENT == a_globs.NOISE: num_outputs = a_globs.NUM_NOISE_NODES cur_activation = 'linear' loss = { 'main_output': 'mean_squared_error', 'aux_output': 'mean_squared_error' } a_globs.generic_buffer = [] a_globs.buffer_container = [a_globs.generic_buffer] elif a_globs.AGENT == a_globs.STATE: num_outputs = a_globs.FEATURE_VECTOR_SIZE cur_activation = 'softmax' if a_globs.IS_1_HOT: loss = { 'main_output': 'mean_squared_error', 'aux_output': 'categorical_crossentropy' } else: loss = { 'main_output': 'mean_squared_error', 'aux_output': 'mean_squared_error' } a_globs.deterministic_state_buffer = [] a_globs.stochastic_state_buffer = [] if a_globs.IS_STOCHASTIC: a_globs.buffer_container = [ a_globs.deterministic_state_buffer, a_globs.stochastic_state_buffer ] else: a_globs.buffer_container = [a_globs.deterministic_state_buffer] elif a_globs.AGENT == a_globs.REDUNDANT: num_outputs = a_globs.NUM_ACTIONS * a_globs.NUM_REDUNDANT_TASKS cur_activation = 'linear' loss = { 'main_output': 'mean_squared_error', 'aux_output': 'mean_squared_error' } a_globs.generic_buffer = [] a_globs.buffer_container = [a_globs.generic_buffer] #Specify the model init_weights = he_normal() main_input = Input(shape=(a_globs.FEATURE_VECTOR_SIZE, )) shared_1 = Dense(a_globs.NUM_NERONS_LAYER_1, activation='relu', kernel_initializer=init_weights, name='shared_1')(main_input) main_task_full_layer = Dense(a_globs.NUM_NERONS_LAYER_2, activation='relu', kernel_initializer=init_weights, name='main_task_full_layer')(shared_1) aux_task_full_layer = Dense(a_globs.NUM_NERONS_LAYER_2, activation='relu', kernel_initializer=init_weights)(shared_1) main_output = Dense(a_globs.NUM_ACTIONS, activation='linear', kernel_initializer=init_weights, name='main_output')(main_task_full_layer) aux_output = Dense(num_outputs, activation=cur_activation, kernel_initializer=init_weights, name='aux_output')(aux_task_full_layer) #Initialize the model loss_weights = {'main_output': 1.0, 'aux_output': a_globs.LAMBDA} a_globs.model = Model(inputs=main_input, outputs=[main_output, aux_output]) a_globs.model.compile(optimizer=Adam(lr=a_globs.ALPHA), loss=loss, loss_weights=loss_weights) summarize_model(a_globs.model, a_globs.AGENT) #Create the target network to use in the update rule a_globs.target_network = clone_model(a_globs.model) a_globs.target_network.set_weights(a_globs.model.get_weights())
def train_model_aggregate(self): # Training and evaluation loop for i in range(self.num_grand_epochs): print("Grand Epoch:", i + 1, "/", self.num_grand_epochs) # Re-define the aggregate model (stored on the master node, and ultimately returned), also re-initialize its weights self.aggregate_model = self.get_new_model() # Define a plotting object for every numpy array that comprises the weights of our neural network, only if the algorithm is on its last grand epoch if i == self.num_grand_epochs + 1: self.plots = [ pca_weights_plotter() for j in range(len(self.aggregate_model.get_weights())) ] # Train individual models for specified number of epochs start_time = time.time() for segnum in list(range(self.num_segments)): self.train_segment(segnum, i) print("Time:", time.time() - start_time) # Average the weights of the trained models on the segments, add these weights to the aggregate model avg_weights = [] for i in range(len(self.aggregate_model.get_weights())): np_arrays = [ self.segment_models[segment].get_weights()[i] for segment in self.segment_models ] avg_weights.append(sum(np_arrays) / self.num_segments) self.aggregate_model.set_weights(avg_weights) # Compile aggregate model self.aggregate_model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) # Evaluate aggregate model on the test set score = self.aggregate_model.evaluate(self.x_test, self.y_test, verbose=1) print("Aggregate model accuracy on test set:", score[1]) # Plot the average model's weights and show the plots, only if the algorithm is on its last grand epoch if i == self.num_grand_epochs + 1: avg_weights = self.aggregate_model.get_weights() for j in range(len(avg_weights)): plot = self.plots[j] plot.plot_data(avg_weights[j], "dark orange", 'x') plot.show_plot() # Redistribute the aggregate model to each segment for the next grand epoch of training, if not on last grand epoch if i != self.num_grand_epochs - 1: for segment in self.segment_models: self.segment_models[segment] = clone_model( self.aggregate_model) print('') print( '-------------------------------------------------------------------------------------------------' ) # Conduct final testing with the weight-average aggregate model approach (non-ensemble) train_score_merged = self.aggregate_model.evaluate(self.x_train, self.y_train, verbose=0) test_score_merged = self.aggregate_model.evaluate(self.x_test, self.y_test, verbose=0) print("Training set prediction accuracy with aggregate model:", train_score_merged[1]) print("Test set prediction accuracy with aggregate model:", test_score_merged[1]) print( '-------------------------------------------------------------------------------------------------' ) # Conduct final testing with the consensus prediction ensemble approach, [include aggregate model in the ensemble] # self.segment_models['agg'] = self.aggregate_model start_time = time.time() train_score_consensus = self.consensus_predict_ensemble_evaluate( self.x_train, self.y_train) test_score_consensus = self.consensus_predict_ensemble_evaluate( self.x_test, self.y_test) print( "Training set prediction accuracy with consensus prediction ensembling:", train_score_consensus) print( "Test set prediction accuracy with consensus prediction ensembling:", test_score_consensus) print("Time:", time.time() - start_time) print( '-------------------------------------------------------------------------------------------------' ) # Conduct final testing with the neural boosted ensemble approach start_time = time.time() self.neural_boosted_ensemble_train() train_score_neural = self.neural_boosted_ensemble_evaluate( self.x_train, self.y_train) test_score_neural = self.neural_boosted_ensemble_evaluate( self.x_test, self.y_test) print( "Training set prediction accuracy with neural boosted ensembling:", train_score_neural) print("Test set prediction accuracy with neural boosted ensembling:", test_score_neural) print("Time:", time.time() - start_time) print( '-------------------------------------------------------------------------------------------------' ) # Conduct final testing with the convolutional boosted ensemble approach # assert self.num_classes == self.num_segments, "Cannot perform convolutional ensembling at the moment" start_time = time.time() self.convolutional_boosted_ensemble_train() train_score_convolutional = self.convolutional_boosted_ensemble_evaluate( self.x_train, self.y_train) test_score_convolutional = self.convolutional_boosted_ensemble_evaluate( self.x_test, self.y_test) print( "Training set prediction accuracy with convolutional boosted ensembling:", train_score_convolutional) print( "Test set prediction accuracy with convolutional boosted ensembling:", test_score_convolutional) print("Time:", time.time() - start_time) print( '-------------------------------------------------------------------------------------------------' )
action_size = len(env.action_space) #.n agent = DQNAgent(state_size, action_size) #agent.load("model.h5") done = False episodelist = list() scorelist = list() output = list() e = 0 # while time.time() < end: #for e in range(EPISODES): e += 1 agent.state = env.reset() stationary_model = clone_model(agent.model) while True: agent.action, q_ = agent.act(agent.state, env.actionlimit, env.epsilonmod[env.turncounter - 1]) next_state, reward, done = env.step(agent.action) agent.memorize(agent.state, agent.action, reward, next_state, done, q_) agent.state = next_state if done: #print("episode: {}/{}, score: {}, e: {:.2}, actions: {}, expmod: {}" # .format(e, EPISODES, env.discountedmined, agent.epsilon, env.actionslist, env.epsilonmod)) episodelist.append(e)
## save base model to restore later (x_train, y_train), (x_test, y_test) = cifar10.load_data() # x_train = x_train.astype('float32') / 255 # x_test = x_test.astype('float32') / 255 x_train, x_test = vgg_normalize(x_train, x_test) (x_train, y_train) = shuffle_and_reduce(reduce_percent, x_train, y_train) y_test = to_categorical(y_test) y_train = to_categorical(y_train) scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) base = clone_model(model) base.set_weights(model.get_weights()) # gradients_train = get_layer_output_grad(base, x_train, y_train, -6) # gradients_test = get_layer_output_grad(base, x_test, y_test, -6) # print(gradients) base = train_chosen(model, base, x_train, x_test, y_train, y_test, index) # scores = base.evaluate(x_test, y_test, verbose=1) # print('Test loss:', scores[0]) # print('Test accuracy:', scores[1]) base.save(output)
def copy_model(self, game): model_copy = clone_model(self.model) model_copy.build(self.input_layer(game)) self.compile_model(model_copy, game) model_copy.set_weights(self.model.get_weights()) return model_copy
def multi_gpu_model(model, gpus=None, cpu_merge=True, cpu_relocation=False): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. # Arguments model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2 or list of integers, number of GPUs or list of GPU IDs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. # Returns A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. # Example 1 - Training models with weights merge on CPU ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) # Save model via the template model (which shares the same weights): model.save('my_model.h5') ``` # Example 2 - Training models with weights merge on CPU using cpu_relocation ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_relocation=True) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` # Example 3 - Training models with weights merge on GPU (recommended for NV-link) ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_merge=False) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` # On model saving To save the multi-gpu model, use `.save(fname)` or `.save_weights(fname)` with the template model (the argument you passed to `multi_gpu_model`), rather than the model returned by `multi_gpu_model`. """ if K.backend() != 'tensorflow': raise ValueError('`multi_gpu_model` is only available ' 'with the TensorFlow backend.') available_devices = _get_available_devices() available_devices = [ _normalize_device_name(name) for name in available_devices ] if not gpus: # Using all visible GPUs when not specifying `gpus` # e.g. CUDA_VISIBLE_DEVICES=0,2 python keras_mgpu.py gpus = len([x for x in available_devices if 'gpu' in x]) if isinstance(gpus, (list, tuple)): if len(gpus) <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `len(gpus) >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = len(gpus) target_gpu_ids = gpus else: if gpus <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `gpus >= 2`. ' 'Received: `gpus=%d`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) import tensorflow as tf target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] for device in target_devices: if device not in available_devices: raise ValueError( 'To call `multi_gpu_model` with `gpus=%s`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): shape = K.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == parts - 1: size = batch_size - step * i else: size = step size = K.concatenate([size, input_shape], axis=0) stride = K.concatenate([step, input_shape * 0], axis=0) start = stride * i return K.slice(data, start, size) # Relocate the model definition under CPU device scope if needed if cpu_relocation: with tf.device('/cpu:0'): model = clone_model(model) all_outputs = [] for i in range(len(model.outputs)): all_outputs.append([]) # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: # In-place input splitting which is not only # 5% ~ 12% faster but also less GPU memory # duplication. with tf.device(x.device): input_shape = K.int_shape(x)[1:] slice_i = Lambda(get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': num_gpus })(x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) outputs = to_list(outputs) # Save the outputs for merging back together later. for o in range(len(outputs)): all_outputs[o].append(outputs[o]) # Deduplicate output names to handle Siamese networks. occurrences = {} for n in model.output_names: if n not in occurrences: occurrences[n] = 1 else: occurrences[n] += 1 conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} output_names = [] for n in model.output_names: if n in conflict_counter: conflict_counter[n] += 1 n += '_%d' % conflict_counter[n] output_names.append(n) # Merge outputs under expected scope. with tf.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] for name, outputs in zip(output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged)
def __init__(self, config=None): if config is None: config = {} self.env = wrap_dqn(gym.make(config.get('game', 'PongNoFrameskip-v4'))) self.action_size = self.env.action_space.n self.to_vis = config.get('visualize', False) self.verbose = config.get('verbose', True) self.backup = config.get('backup', 25) self.episodes = config.get('episodes', 300) self.depth = config.get('depth', 4) self.state_size = config.get('space', (84, 84)) self.model = None self._target_model = None self.prioritized = config.get(('prioritized', False)) if self.prioritized: self.memory = PrioritizedMemory( max_len=config.get('mem_size', 100000)) else: self.memory = SimpleMemory(max_len=config.get('mem_size', 100000)) if config.get('duel', False): self.model = self._duel_conv() else: self.model = self._conv() self.model.compile(Adam(lr=config.get('lr', 1e-4)), loss=huber_loss) if config.get('target', True): self._target_model = clone_model(self.model) self._target_model.set_weights(self.model.get_weights()) self._time = 0 self.update_time = config.get('target_update', 1000) self.env._max_episode_steps = None self.batch_size = config.get('batch', 32 * 3) self.to_observe = config.get('to_observe', 10000) self.log_dir = config['log_dir'] if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) plot_model(self.model, to_file=os.path.join(self.log_dir, 'model.png'), show_shapes=True) attr = { 'batch size': self.batch_size, 'to observe': self.to_observe, 'depth': self.depth } self.results = {'info': attr} load_prev = config.get('load', False) self.gamma = None pol = None if 'pol' in config: if config['pol'] == 'random': pol = policy.RandomPolicy() elif config['pol'] == 'eps': pol = policy.EpsPolicy(config.get('pol_eps', 0.1)) self.pol = pol if load_prev: path = sorted([ int(x) for x in os.listdir(self.log_dir) if os.path.isdir(os.path.join(self.log_dir, x)) ]) if len(path) != 0: load_prev = self.load(os.path.join(self.log_dir, str(path[-1]))) if self.pol is None: self.pol = policy.AnnealedPolicy( inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=config.get('ex_min', 0.02), value_test=0.5, nb_steps=config.get('ex_steps', 100000)) if self.gamma is None: self.gamma = policy.EpsPolicy(float(config.get('gamma', 0.99))).get_value
def _clone(self): self.qs1 = [] for q in self.qs: q1 = clone_model(q) q1.set_weights(q.get_weights()) self.qs1.append(q1)
def startIteration(model, args): log_file_name = datetime.datetime.now().strftime("log_%Y_%m_%d_%H_%M_%S.txt") log_file = open(log_file_name, "w") backup = sys.stdout sys.stdout = Tee(sys.stdout, log_file) # store N_t(a) Nt = np.zeros(ACTIONS) # open up a game state to communicate with emulator game_state = game.GameState() # store the previous observations in replay memory D = deque() # get the first state by doing nothing and preprocess the image to 80x80x4 do_nothing = np.zeros(ACTIONS) do_nothing[0] = 1 Nt[0] += 1 x_t, r_0, terminal, curr_score = game_state.frame_step(do_nothing) x_t = skimage.color.rgb2gray(x_t) x_t = skimage.transform.resize(x_t,(80,80)) x_t = skimage.exposure.rescale_intensity(x_t,out_range=(0,255)) s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) #In Keras, need to reshape s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) #shape(1,80,80,4) #Create target network if args['training_algorithm'] == "doubleDQN": target_model = clone_model(model) target_model.set_weights(model.get_weights()) if args['mode'] == 'run': OBSERVE = 999999999 #We keep observe, never train epsilon = FINAL_EPSILON print ("Now we load weights") if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): for i in range(BOOTSTRAP_K): if os.path.isfile("model_%d.h5" % (i)): model[i].load_weights("model_%d.h5" % (i)) print ("Weight for head %d load successfully", (i)) else: if os.path.isfile("model.h5"): model.load_weights("model.h5") print ("Weight load successfully") else: #We go to training mode OBSERVE = OBSERVATION epsilon = INITIAL_EPSILON t = 0 total_reward = 0 while (True): loss = 0 Q_sa = 0 action_index = 0 r_t1 = 0 a_t = np.zeros([ACTIONS]) if t % FRAME_PER_ACTION == 0: if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): chosen = np.random.randint(BOOTSTRAP_K) q = model[chosen].predict(s_t) max_Q = np.argmax(q) action_index = max_Q a_t[action_index] = 1 else: #choose an action epsilon greedy if random.random() <= epsilon: print("----------Random Action----------") action_index = random.randrange(ACTIONS) a_t[action_index] = 1 else: q = model.predict(s_t) #input a stack of 4 images, get the prediction max_Q = np.argmax(q) action_index = max_Q a_t[action_index] = 1 Nt[action_index] += 1 #We reduced the epsilon gradually if epsilon > FINAL_EPSILON and t > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE #run the selected action and observed next state and reward x_t1_colored, r_t1, terminal, curr_score = game_state.frame_step(a_t) terminal_check = terminal x_t1 = skimage.color.rgb2gray(x_t1_colored) x_t1 = skimage.transform.resize(x_t1,(80,80)) x_t1 = skimage.exposure.rescale_intensity(x_t1, out_range=(0, 255)) x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x80x80x1 s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) #calculate bootstrap mask #the authors use Bernoulli(0.5), but that essentially means #choose with 0.5 probability on each head mask = np.random.choice(2, BOOTSTRAP_K, p=[0.5,]*2) # store the transition in D if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): D.append((s_t, action_index, r_t1, s_t1, terminal, mask)) else: D.append((s_t, action_index, r_t1, s_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() #only train if done observing if t > OBSERVE: #sample a minibatch to train on minibatch = random.sample(D, BATCH) inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3])) #shape(32, 80, 80, 4) print (inputs.shape) targets = np.zeros((inputs.shape[0], ACTIONS)) #shape(32, 2) #Now we do the experience replay for i in range(0, len(minibatch)): state_t = minibatch[i][0] action_t = minibatch[i][1] #This is action index reward_t = minibatch[i][2] state_t1 = minibatch[i][3] terminal = minibatch[i][4] if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): mask = minibatch[i][5] inputs[i:i + 1] = state_t #I saved down s_t #Hitting each buttom probability if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): targets[i] = model[chosen].predict(state_t) else: targets[i] = model.predict(state_t) if terminal: targets[i, action_t] = reward_t else: if args['training_algorithm'] == "DQN": Q_sa = model.predict(state_t1) targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa) elif args['training_algorithm'] == "doubleDQN": Q_sa = model.predict(state_t1) Q_target = target_model.predict(state_t1) maxQ_ind = np.argmax(Q_sa,axis = 1) targets[i, action_t] = reward_t + GAMMA * Q_target[0][maxQ_ind] elif args['training_algorithm'] == "DQN+UCB": Q_sa = model.predict(state_t1) modified_Q_sa = Q_sa+np.sqrt(2*np.log(t)/(Nt)) targets[i, action_t] = reward_t + GAMMA * np.max(modified_Q_sa) elif args['training_algorithm'] == "bootstrappedDQN": Q_sa = model[chosen].predict(state_t1) targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa) elif args['training_algorithm'] == "bootstrappedDQN+UCB": Q_sa = model[chosen].predict(state_t1) modified_Q_sa = Q_sa+np.sqrt(2*np.log(t)/(Nt)) targets[i, action_t] = reward_t + GAMMA * np.max(modified_Q_sa) if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): for idx in range(BOOTSTRAP_K): if mask[idx] == 1: loss += model[idx].train_on_batch(inputs, targets) else: loss += model.train_on_batch(inputs, targets) s_t = s_t1 t = t + 1 if args['training_algorithm'] == "doubleDQN" and t % TARGET_UPDATE == 0 : print("----------------------------Copy to target model----------------------------") target_model.set_weights(model.get_weights()) # save progress every 10000 iterations if t % 1000 == 0: print("Now we save model") if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"): for i in range(BOOTSTRAP_K): model[i].save_weights("model_%d.h5" % (i), overwrite=True) with open("model_%d.json" % (i), "w") as outfile: json.dump(models[i].to_json(), outfile) else: model.save_weights("model.h5", overwrite=True) with open("model.json", "w") as outfile: json.dump(model.to_json(), outfile) # print info state = "" if t <= OBSERVE: state = "observe" elif t > OBSERVE and t <= OBSERVE + EXPLORE: state = "explore" else: state = "train" printInfo(t, state, action_index, r_t1, Q_sa, loss) score_file = open("scores","a") score_file.write(str(curr_score)+"\n") score_file.close() if terminal_check: print("Total rewards: ", total_reward) out_file = open("total_reward","a") out_file.write(str(total_reward)+"\n") out_file.close() total_reward = 0 else: total_reward = total_reward + r_t1 print("Episode finished!") print("************************")
def reduce_keras_model(model, verbose=False): from keras.models import Model from keras.models import clone_model x = [] input = [] skip_layers = [] keras_sub_version = get_keras_sub_version() if verbose: print('Keras sub version: {}'.format(keras_sub_version)) # Find all inputs for level_id in range(len(model.layers)): layer = model.layers[level_id] layer_type = layer.__class__.__name__ if layer_type == 'InputLayer': inp1 = get_copy_of_layer(layer, verbose) x.append(inp1) input.append(inp1.output) tmp_model = Model(inputs=input, outputs=input) for level_id in range(len(model.layers)): layer = model.layers[level_id] layer_type = layer.__class__.__name__ # Skip input layers if layer_type == 'InputLayer': continue input_layers = get_input_layers_ids(model, layer, verbose) output_layers = get_output_layers_ids(model, layer, verbose) if verbose: print('Go for {}: {} ({}). Input layers: {} Output layers: {}'.format(level_id, layer_type, layer.name, input_layers, output_layers)) if level_id in skip_layers: if verbose: print('Skip layer because it was removed during optimization!') continue # Special cases for reducing if len(output_layers) == 1: next_layer = model.layers[output_layers[0]] next_layer_type = next_layer.__class__.__name__ if layer_type in ['Conv2D', 'DepthwiseConv2D'] and next_layer_type == 'BatchNormalization': tmp_model = optimize_conv2d_batchnorm_block(tmp_model, model, input_layers, layer, next_layer, verbose) x = tmp_model.layers[-1].output skip_layers.append(output_layers[0]) continue if layer_type in ['SeparableConv2D'] and next_layer_type == 'BatchNormalization': tmp_model = optimize_separableconv2d_batchnorm_block(tmp_model, model, input_layers, layer, next_layer, verbose) x = tmp_model.layers[-1].output skip_layers.append(output_layers[0]) continue if layer_type == 'Model': new_layer = clone_model(layer) new_layer.set_weights(layer.get_weights()) else: new_layer = get_copy_of_layer(layer, verbose) prev_layer = [] for i in range(len(set(input_layers))): search_layer = tmp_model.get_layer(name=model.layers[input_layers[i]].name) try: tens = search_layer.output prev_layer.append(tens) except: # Ugly need to check for correctness for node in search_layer._inbound_nodes: for i in range(len(node.inbound_layers)): outbound_tensor_index = node.tensor_indices[i] prev_layer.append(node.output_tensors[outbound_tensor_index]) if len(prev_layer) == 1: prev_layer = prev_layer[0] output_tensor, output_names = get_layers_without_output(tmp_model, verbose) if layer_type == 'Model': for f in prev_layer: x = new_layer(f) if f in output_tensor: output_tensor.remove(f) output_tensor.append(x) else: x = new_layer(prev_layer) if type(prev_layer) is list: for f in prev_layer: if f in output_tensor: output_tensor.remove(f) else: if prev_layer in output_tensor: output_tensor.remove(prev_layer) if type(x) is list: output_tensor += x else: output_tensor.append(x) tmp_model = Model(inputs=input, outputs=output_tensor) tmp_model.get_layer(name=layer.name).set_weights(layer.get_weights()) output_tensor, output_names = get_layers_without_output(tmp_model, verbose) if verbose: print('Output names: {}'.format(output_names)) model = Model(inputs=input, outputs=output_tensor) return model
def copy_agent(agent): weights = agent.model.get_weights() copied_model = clone_model(agent.model) copied = DQNAgent(*agent.get_init_info()) copied.set_model(copied_model, weights) return copied
def trainModelWithDMC(p_boardSizeX, p_boardSizeY, p_episodes, savePath, p_temperature=15.0, p_gamma=0.9, howFar=2, rewardFunc=game.rewardFunc2, selfPlayFixOpp=False, startingModel=None, opponentConstr=None, opponentStrength=None): model = startingModel if startingModel is None: model = Sequential() model.add(Dense(70, kernel_initializer='lecun_uniform', activation='relu', input_shape=(2*(4+(2*howFar+1)*(2*howFar+1)),))) model.add(Dense(35, kernel_initializer='lecun_uniform', activation='relu')) model.add(Dense(3, kernel_initializer='lecun_uniform', activation='linear')) model.compile(loss='mean_squared_error', optimizer=RMSprop()) episodes = p_episodes gamma = p_gamma startingTemperature = p_temperature temperature = p_temperature batchSize = 1 buffer = 1 replay = [] h = 0 updateStep = 0 for i in range(episodes): rewardList1 = [] stateList1 = [] actionList1 = [] rewardList2 = [] stateList2 = [] actionList2 = [] #init board board = game.Board(p_boardSizeX,p_boardSizeY) #set opponent if(opponentConstr is None): if(selfPlayFixOpp == True): oppModel = clone_model(model) oppModel.set_weights(model.get_weights()) board.setPlayers(game.AITrainingReduFeatWOPlayer("1",board,model,howFar), game.AITrainingReduFeatWOPlayer("2",board,oppModel,howFar)) else: #default board.setPlayers(game.AITrainingReduFeatWOPlayer("1",board,model,howFar), game.AITrainingReduFeatWOPlayer("2",board,model,howFar)) else: try: opp = opponentConstr("2",board,opponentStrength) except TypeError: opp = opponentConstr("2") board.setPlayers(game.AITrainingReduFeatWOPlayer("1",board,model,howFar), opp) #set start strategy board.startGameWithPseudoRandomStartPositions() while(board.checkGameStatus() == 0): #Boltzman action selection board.player1.getDirection() Qprobs = game.softmax(board.player1.vals/temperature) action_value = np.random.choice(Qprobs[0],p=Qprobs[0]) action1 = np.argmax(Qprobs[0] == action_value) - 1 actionList1.append(action1) board.player2.getDirection() Qprobs = game.softmax(board.player2.vals/temperature) action_value = np.random.choice(Qprobs[0],p=Qprobs[0]) action2 = np.argmax(Qprobs[0] == action_value) - 1 actionList2.append(action2) #Take action, observe new state S' state1 = board.to01ReducedFeaturesWithOpponent(board.player1, board.player2, howFar).reshape(1,2*(4+(2*howFar+1)*(2*howFar+1))) stateList1.append(state1) state2 = board.to01ReducedFeaturesWithOpponent(board.player2, board.player1, howFar).reshape(1,2*(4+(2*howFar+1)*(2*howFar+1))) stateList2.append(state2) board.movePlayers(action1, action2) gameStatus = board.checkGameStatus() #only with rewardFunc1 reward = game.rewardFunc2(gameStatus) reward2 = 1.0*reward if(gameStatus == 1) or (gameStatus == 2): reward2 = -1.0*reward2 rewardList1.append(reward) rewardList2.append(reward2) dAC1 = discountedAccRewards(rewardList1,gamma) dAC2 = discountedAccRewards(rewardList2,gamma) stateList1 = stateList1 + stateList2 actionList1 = actionList1 + actionList2 dAC1 = np.append(dAC1,dAC2) for stepNum in range(len(stateList1)): if (len(replay) < buffer): replay.append((stateList1[stepNum], actionList1[stepNum], dAC1[stepNum])) else: if (h < (buffer-1)): h += 1 else: h = 0 replay[h] = (stateList1[stepNum], actionList1[stepNum], dAC1[stepNum]) #randomly sample our experience replay memory minibatch = random.sample(replay, batchSize) X_train = [] y_train = [] for memory in minibatch: state, action, accReward = memory Qvals = model.predict(state, batch_size=1) y = np.zeros((1,3)) y[:] = Qvals[:] y[0][action+1] = accReward #action + 1 because actions are -1,0,1 X_train.append(state.reshape(2*(4+(2*howFar+1)*(2*howFar+1)),)) y_train.append(y.reshape(3,)) X_train = np.array(X_train) y_train = np.array(y_train) print("Game #: %s" % (i,)) model.fit(X_train, y_train, batch_size=batchSize, epochs=1, verbose=1) updateStep += 1 if i % 10000 == 0: model.save(savePath) if temperature > 1.0: temperature -= (startingTemperature/episodes) else: temperature = 1.0 model.save(savePath)
total_profit = 0 agent.inventory = [] # agent.inventory.append(data[0]) # next_state = getState(data, 0 + 1, window_size + 1) # reward = 0 # agent.memory.append((state, action, reward, next_state, done)) for t in range(l): if t == 0: action = 1 else: action = agent.act(state) if ((t % 20) == 0): agent.target_model = clone_model(agent.model) agent.target_model.set_weights(agent.model.get_weights()) # sit next_state = getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t]) print("Buy: " + formatPrice(data[t])) elif action == 2 and len(agent.inventory) > 0: # sell bought_price = agent.inventory.pop(0) reward = max(data[t] - bought_price, 0) total_profit += data[t] - bought_price print("Sell: " + formatPrice(data[t]) + " | Profit: " +
def clone_model(self): model_copy = clone_model(self.model) model_copy.set_weights(self.model.get_weights()) return model_copy
def __init__(self, state_dim, action_size=3, strategy="t-dqn", dueling_type='no', use_PER='True', epsilon_start=1.0, epsilon_end=0.01, epsilon_decay_steps=25000, reset_every=100, pretrained=False, model_name=None): self.strategy = strategy # agent config self.state_dim = state_dim # normalized self.action_size = action_size # default = 3 [sit, buy, sell] self.model_name = model_name self.inventory = [] self.first_iter_trading = False self.total_steps = 0 self.episodes = self.episode_length = 0 self.steps_per_episode = [] self.episode_reward = 0 self.rewards_history = [] self.losses = [] self.epsilon = epsilon_start self.epsilon_decay_steps = epsilon_decay_steps self.epsilon_decay = (epsilon_start - epsilon_end) / epsilon_decay_steps self.epsilon_history = [] # model config self.model_name = model_name self.gamma = 0.99 # affinity for long term reward self.l2_reg = 1e-6 self.dueling_type = dueling_type # self.epsilon = 1.0 # self.epsilon_min = 0.01 # self.epsilon_decay = 0.995 self.learning_rate = 0.0001 self.loss = huber_loss self.custom_objects = { "huber_loss": huber_loss } # important for loading the model from memory self.optimizer = Adam(lr=self.learning_rate) self.with_per = use_PER self.pretrained = pretrained self.results_dir = 'results' if pretrained and self.model_name is not None: self.model = self.load() else: self.model = self._model() # strategy config if self.strategy in ["t-dqn", "double-dqn"]: self.total_steps = 1 self.reset_every = reset_every # target network self.target_model = clone_model(self.model) self.target_model.set_weights(self.model.get_weights()) else: self.with_per = False # Memory Buffer for Experience Replay if self.with_per: self.buffer = MemoryBuffer(int(100000)) print("Agent with Prioritized Experience Replay") else: self.memory = deque(maxlen=100000)
def copy(self, data): model = clone_model(data) model.set_weights(data.get_weights()) return model
foldA_test_labels, foldB_test_labels = foldB_train_labels, foldA_train_labels # dimension of data and number of classes dimension = 4 num_classes = 3 ##################################### model = Sequential() model.add(Dense(128, activation='relu', input_dim=dimension)) model.add(Dense(64, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(num_classes, activation='softmax')) ###########FOLD A############### from keras.models import clone_model model_A = clone_model(model) model_A.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model_A.fit(foldA_train, foldA_train_labels, epochs=20) loss, accuracy1 = model_A.evaluate(foldA_test, foldA_test_labels) ###########FOLD B############### from keras.models import clone_model model_B = clone_model(model) model_B.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model_B.fit(foldB_train, foldB_train_labels, epochs=20)
online_network = create_dqn_model(input_shape, nb_actions, dense_layers, dense_units) def epsilon_greedy(q_values, epsilon, n_outputs): if random.random() < epsilon: return random.randrange(n_outputs) # random action else: return np.argmax(q_values) # q-optimal action replay_memory_maxlen = 1000000 replay_memory = deque([], maxlen=replay_memory_maxlen) target_network = clone_model(online_network) target_network.set_weights(online_network.get_weights()) name = 'MsPacman_DQN' # used in naming files (weights, logs, etc) n_steps = 50000 # total number of training steps (= n_epochs) warmup = 1000 # start training after warmup iterations training_interval = 20 # period (in actions) between training steps save_steps = int( n_steps / 10) # period (in training steps) between storing weights to file copy_steps = 500 # period (in training steps) between updating target_network weights gamma = 0.8 # discount rate skip_start = 90 # skip the start of every game (it's just freezing time before game starts) batch_size = 128 # size of minibatch that is taken randomly from replay memory every training step double_dqn = False # whether to use Double-DQN approach or simple DQN (see above) # eps-greedy parameters: we slowly decrease epsilon from eps_max to eps_min in eps_decay_steps eps_max = 1.0