def load_model(self, model, env_shape, action_shape, **kwargs): input_layer = Input(shape=env_shape) m = model(input_layer) output = Dense(action_shape, activation='linear')(m) self.model = Model(input_layer, output, name='dqn_model') # create loss delta = kwargs.get('loss_delta', 1.0) # delta loss is the value at which # the huber loss function transitions from # a quadratic function to a linear funtion loss_function = kwargs.get('loss_function', 'huber') if loss_function == 'huber': loss = Huber(delta=delta) elif loss_function == 'mse': loss = MeanSquaredError() else: loss = Huber(delta=delta) # create optimizer LR = kwargs.get('LR', 0.001) # learning rate LR_decay1 = kwargs.get('LR_decay1', 0.9) # Learning rate decay LR_decay2 = kwargs.get('LR_decay2', 0.999) # The exponential decay rate for the 2nd moment estimates optimizer = Adam(learning_rate=LR, beta_1=LR_decay1, beta_2=LR_decay2) self.model.compile(optimizer=optimizer, loss=loss, metrics=kwargs.get('metrics')) print(self.model.summary())
def learn(self, gamma: float, frame_number: int, priority_scale: float=1.0) -> Tuple[float, np.ndarray]: if self.use_per: (states, actions, rewards, new_states, terminal), importance, indices = self.replay_buffer.get_minibatch(self.batch_size, priority_scale=priority_scale) importance = importance ** (1-self.calc_epsilon(frame_number)) else: states, actions, rewards, new_states, terminal = self.replay_buffer.get_minibatch(self.batch_size, priority_scale=priority_scale) main_q_values = self.main_q.predict(new_states).argmax(axis=1) future_q_values = self.target_q.predict(new_states) double_q = future_q_values[range(self.batch_size), main_q_values] # bad use of range target_q = rewards + (gamma * double_q * (1 - terminal)) # error with terminal with GradientTape() as tape: q_values = self.main_q(states) # one_hot_actions = to_categorical(actions, self.n_actions, dtype=np.float32) Q = tf.reduce_sum(tf.multiply(q_values, one_hot_actions), axis=1) error = Q - target_q loss = Huber()(target_q, Q) if self.use_per: loss = tf.reduce_mean(loss * importance) gradients = tape.gradient(loss, self.main_q.trainable_variables) self.main_q.optimizer.apply_gradients(zip(gradients, self.main_q.trainable_variables)) if self.use_per: self.replay_buffer.set_priorities(indices, error) return float(loss.numpy()), error
def __init__( self, # main_q: Model, # target_q: Model, # replay_memory: ReplayMemory, n_actions: int, input_shape: Tuple = (84, 84), batch_size: int=32, history_length: int=4, learning_rate: float=0.00001, eps_initial: int=1, eps_final: float=0.1, eps_final_frame: float=0.0, eps_evaluation: float=0.0, eps_annealing_frames: int=1000000, replay_buffer_size: int = 1000000, replay_buffer_start_size: int=50000, max_frames: int=25000000, use_per: bool=True) -> None: self.n_actions = n_actions self.input_shape = input_shape self.history_length = history_length self.learning_rate = learning_rate self.replay_buffer_start_size = replay_buffer_start_size self.max_frames = max_frames self.batch_size = batch_size # self.replay_buffer = replay_memory self.use_per = use_per self.eps_initial = eps_initial self.eps_final = eps_final self.eps_final_frame = eps_final_frame self.eps_evaluation = eps_evaluation self.eps_annealing_frames = eps_annealing_frames self.replay_buffer_size = replay_buffer_size self.slope = -(self.eps_initial - self.eps_final) / self.eps_annealing_frames self.intercept = self.eps_initial - self.slope*self.replay_buffer_start_size self.slope_2 = -(self.eps_final - self.eps_final_frame) / (self.max_frames - self.eps_annealing_frames - self.replay_buffer_start_size) self.intercept_2 = self.eps_final_frame - self.slope_2*self.max_frames self.replay_buffer: ReplayMemory = ReplayMemory( size=self.replay_buffer_size, input_shape=self.input_shape, history_length=self.history_length, use_per=self.use_per) # self.main_q: Model = DuelingDQN(self.n_actions, self.input_shape, self.history_length) # self.target_q: Model = DuelingDQN(self.n_actions, self.input_shape, self.history_length) # self.main_q.build((self.input_shape[0], self.input_shape[1], self.history_length)) # self.target_q.build((self.input_shape[0], self.input_shape[1], self.history_length)) self.main_q = build_q_network(self.n_actions, self.input_shape, self.history_length) self.target_q = build_q_network(self.n_actions, self.input_shape, self.history_length) self.main_q.compile(optimizer=Adam(self.learning_rate), loss=Huber()) self.target_q.compile(optimizer=Adam(self.learning_rate), loss=Huber())
def __init__(self, alpha=0.031157, delta=0.13907, epsilon_decay=0.99997, eta=0.044575, gamma=0.013082, learning_rate=0.050023): super().__init__("DQN") self.action_size = State.ACTION_SIZE self.state_size = State.STATE_SIZE self.memory_rl = PrioritizedReplayBuffer(2000000) self.memory_sl = SupervisedMemory(2000000) self.batch_size = 512 self.model_update_frequency = 10 self.model_save_frequency = 100 self.alpha = alpha # Pred opt: 0.7 self.delta = delta # Pred opt: 0.5 self.epsilon = 1 self.epsilon_min = 0.001 self.epsilon_decay = epsilon_decay # Pred opt: 0.99999 self.gamma = gamma # 0 # 0.029559 # Pred opt: 0.01 self.learning_rate = learning_rate # Pred opt: 0.1 self.learning_rate_sl = 0.005 self.eta = eta # Pred opt: 0.1 self.number_of_episodes = 1000 self.reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=0) self.policy_network = self.build_model( self.learning_rate, 'linear', Huber(reduction=Reduction.SUM, delta=self.delta)) self.target_network = self.build_model( self.learning_rate, 'linear', Huber(reduction=Reduction.SUM, delta=self.delta)) self.target_network.set_weights(self.policy_network.get_weights()) self.supervised_learning_network = self.build_model( self.learning_rate_sl, 'softmax', tf.keras.losses.sparse_categorical_crossentropy) self.total_rewards_p1 = [] self.total_rewards_p2 = [] self.losses = [] self.steps = 0 self.p2 = AgentPlaceholder() self.rounds = 0 self.n_batches = 0 self.save_model = True self.env = UnoEnvironment(False)
def build_model(self) -> Sequential: """ Constructs and returns a Convolutional Deep-Q-Network """ model = Sequential() model.add( Conv2D(filters=8, kernel_size=4, strides=(2, 2), padding="valid", activation="relu", input_shape=self.image_shape)) #model.add(Conv2D(filters=64, kernel_size=4, strides=(2,2), # padding="valid", activation="relu")) #model.add(Conv2D(filters=64, kernel_size=3, strides=(1,1), # padding="valid", activation="relu")) model.add(Flatten()) model.add(Dense(units=128, activation="relu")) model.add(Dense(self.action_size)) model.compile(loss=Huber(), optimizer=Adam(learning_rate=0.01), metrics=["accuracy"]) return model
def __rossNet(self): ''' Notes ------------ Ref: https://doi.org/10.1029/2017JB015251 ''' model = Sequential() model.add(Conv1D( 32, 21, activation='relu', )) model.add(BatchNormalization()) model.add(MaxPooling1D(pool_size=2)) model.add(Conv1D(64, 15, activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling1D(pool_size=2)) model.add(Conv1D(128, 11, activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dense(512, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(loss=Huber(), optimizer=Adam()) return model
def dueling_dqn(input_shape: Tuple[int], action_size: int, learning_rate: float, noisy: bool) -> Model: # Build the convolutional network section and flatten the output state_input, x = build_base_cnn(input_shape, noisy) # Determine the type of the fully collected layer dense_layer = NoisyDense if noisy else Dense # State value tower - V state_value = dense_layer(256, activation='relu', kernel_initializer=he_uniform())(x) state_value = dense_layer(1, kernel_initializer=he_uniform())(state_value) state_value = Lambda(lambda s: K.expand_dims(s[:, 0], axis=-1), output_shape=(action_size, ))(state_value) # Action advantage tower - A action_advantage = dense_layer(256, activation='relu', kernel_initializer=he_uniform())(x) action_advantage = dense_layer( action_size, kernel_initializer=he_uniform())(action_advantage) action_advantage = Lambda( lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(action_size, ))(action_advantage) # Merge to state-action value function Q state_action_value = add([state_value, action_advantage]) model = Model(inputs=state_input, outputs=state_action_value) model.compile(loss=Huber(), optimizer=Adam(lr=learning_rate)) return model
def dueling_build_model(input_dimension, output_dimension, nodes_per_layer, hidden_layer_count, learning_rate): inputs = Input(shape=(input_dimension,)) # Build Advantage layer advantage_hidden_layer = inputs for _ in range(hidden_layer_count): advantage_hidden_layer = Dense(nodes_per_layer, activation='relu')(advantage_hidden_layer) predictions_advantage = Dense(output_dimension, activation='linear')(advantage_hidden_layer) # Build Value layer value_hidden_layer = inputs for _ in range(hidden_layer_count): value_hidden_layer = Dense(nodes_per_layer, activation='relu')(value_hidden_layer) predictions_value = Dense(1, activation='linear')(value_hidden_layer) # Combine layers advantage_average = Lambda(mean)(predictions_advantage) advantage = Subtract()([predictions_advantage, advantage_average]) predictions = Add()([advantage, predictions_value]) model = Model(inputs=inputs, outputs=predictions) model.compile(optimizer=Adam(lr=learning_rate), loss=Huber()) return model
def __init__(self, name, alpha, gamma, input_layer_size, number_of_parameters, out_layer_size, memory_size=10000, batch_size=32): config = configparser.ConfigParser() config.read('configuration/agent_config.ini') config.sections() enable_model_load = config['model_weights'].getboolean( 'enable_load_model_weights') self.enable_model_save = config['model_weights'].getboolean( 'enable_save_model_weights') self.tensorboard_visualization = config['tensorboard'].getboolean( 'enable_dqn') # Hyperparameters self.memory = MemoryBuffer(max_size=memory_size, number_of_parameters=number_of_parameters) self.gamma = gamma self.learning_rate = alpha self.batch_size = batch_size self.out_layer_size = out_layer_size self.action_space = [i for i in range(out_layer_size)] loss = Huber() optimizer = 'adam' # Epsilon Greedy Strategy self.epsilon = 1.0 # enable epsilon = 1.0 only when changing model, else learned weights from .h5 are used. self.epsilon_decay = 0.9985 self.epsilon_min = 0.005 # Keras Models hl1_dims = 128 hl2_dims = 64 hl3_dims = 64 self.dqn_eval = self._build_model(hl1_dims, hl2_dims, hl3_dims, input_layer_size, out_layer_size, optimizer, loss) if self.tensorboard_visualization: comment = 'adam-huber-reward_per' path = config['tensorboard']['file_path'] tboard_name = '{}{}-cmt-{}_hl1_dims-{}_hl2_dims-{}-time-{}'.format( path, name, comment, hl1_dims, hl2_dims, int(time.time())) self.tensorboard = TensorBoard(tboard_name.format()) self.keras_weights_filename = '{}.keras'.format(name) self.model_loaded = False if enable_model_load: self.load_model() else: print('Applying epsilon greedy strategy')
def __init__(self, obs_size=4, action_size=2, lr=0.001): # TODO self.model = Sequential() self.model.add(Dense(16, activation='relu', input_dim=obs_size)) self.model.add(Dense(16, activation='relu')) self.model.add(Dense(16, activation='relu')) self.model.add(Dense(action_size, activation='linear')) self.model.compile(loss=Huber(), optimizer=Adam(lr=lr))
def learning(model, code, train_data, valid_data): global FILE_PATH loss = Huber() optimizer = Adam(0.0005) model.compile(loss=loss, optimizer=optimizer, metrics=['mse']) earlystopping = EarlyStopping(monitor='val_loss', patience=10) foldername = FILE_PATH + '/' + code + '/' filename = os.path.join(foldername, code + '.ckpt') checkpoint = ModelCheckpoint(filename, save_weights_only=True, save_best_only=True, monitor='val_loss', verbose=1) print('IN learning, print model', model) # 디버깅 위해 작성 (add_layer 함수 제대로 동작하나) for data in train_data.take(1): print('train_data X shape( BATCH_SIZE, WINDOW_SIZE, feature ) :', data[0].shape) print('train_data Y shape( BATCH_SIZE, WINDOW_SIZE, feature ) :', data[1].shape) for data in valid_data.take(1): print('valid_data X shape( BATCH_SIZE, WINDOW_SIZE, feature ) :', data[0].shape) print('valid_data Y shape( BATCH_SIZE, WINDOW_SIZE, feature ) :', data[1].shape) history = model.fit(train_data, validation_data=(valid_data), epochs=200, callbacks=[checkpoint, earlystopping]) print('End learning !')
def regression_loss(self, gt_bboxes, pred_bboxes, positive_mask): """ Apply regression function to the classes in order to compute the localization loss Parameters ---------- gt_bboxes: ground truth bboxes pred_bboxes: predicted bboxes positive_mask: boolean mask for positive examples Return ------ l_loc: localization loss """ if self.regression_type == 'smooth_l1': localization_loss = \ Huber(delta=1.0, reduction='sum') l_loc = localization_loss(gt_bboxes[positive_mask], pred_bboxes[positive_mask]) elif self.regression_type == 'DIoU' or self.regression_type == 'CIoU': pred_bboxes = decode_boxes(self.default_boxes, pred_bboxes)[positive_mask] gt_bboxes = decode_boxes(self.default_boxes, gt_bboxes)[positive_mask] l_loc = self.iou_loss(gt_bboxes, pred_bboxes) return l_loc
def _build_model(self): """ Builds a CNN model that will be used by the agent to predict Q-values. Returns ------- A compiled Keras model with Adam Optimizer and Huber loss. """ input = Input(shape=self._STATE_SPACE) x = Sequential([ Conv2D(filters=32, kernel_size=(8, 8), strides=(4, 4), padding='valid', activation='relu', kernel_initializer=VarianceScaling(scale=2.0)), Conv2D(filters=64, kernel_size=(4, 4), strides=(2, 2), activation='relu', padding='valid', kernel_initializer=VarianceScaling(scale=2.0)), Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer=VarianceScaling(scale=2.0)), Conv2D(filters=1024, kernel_size=(7, 7), strides=(1, 1), activation='relu', padding='valid', kernel_initializer=VarianceScaling(scale=2.0)), ])(input) value_tensor, advantage_tensor = Lambda( lambda x: tf.split(x, 2, axis=3))(x) value_tensor = Flatten()(value_tensor) advantage_tensor = Flatten()(advantage_tensor) advantage = Dense( self._NUM_ACTIONS, kernel_initializer=VarianceScaling(scale=2.0))(advantage_tensor) value = Dense( 1, kernel_initializer=VarianceScaling(scale=2.0))(value_tensor) mean_advantage = Lambda( lambda x: tf.reduce_mean(x, axis=1, keepdims=True))(advantage) normalized_advantage = Subtract()([advantage, mean_advantage]) output = Add()([value, normalized_advantage]) model = Model(inputs=input, outputs=output) optimizer = Adam(1e-5) loss = Huber(delta=1.0) model.compile(optimizer=optimizer, loss=loss) return model
def get_best_learningrate(model): lr_scheduler = tf.keras.callbacks.LearningRateScheduler( lambda epoch: 1e-8 * 10**(epoch / 20)) model.compile(loss=Huber(), optimizer=SGD(lr=1e-8), metrics=['mae']) history = model.fit(training_set, epochs=100, callbacks=[lr_scheduler]) lrs = 1e-8 * 10**(np.arange(100) / 20) plt.semilogx(history.history['lr'], history.history['loss']) plt.show()
def construct_model(): # 天气预测 weather_in = Input(shape=(360, 17), name='weather_in') # 使用L2正规化防止过拟合 rglrz = l2(1e-4) # 三层 Bidirectional LSTM堆叠 lstm = Bidirectional(LSTM(32, kernel_regularizer=rglrz, recurrent_regularizer=rglrz, bias_regularizer=rglrz, recurrent_initializer='glorot_uniform', return_sequences=True, name='lstm1'), merge_mode='concat', name='Bid_1')(weather_in) lstm = Bidirectional(LSTM(32, kernel_regularizer=rglrz, recurrent_regularizer=rglrz, bias_regularizer=rglrz, recurrent_initializer='glorot_uniform', return_sequences=True, name='lstm2'), merge_mode='concat', name='Bid_2')(lstm) lstm = Bidirectional(LSTM(32, kernel_regularizer=rglrz, recurrent_regularizer=rglrz, bias_regularizer=rglrz, recurrent_initializer='glorot_uniform', return_sequences=False, name='lstm3'), merge_mode='concat', name='Bid_3')(lstm) # 全联接网络产生预测结果 prediction = Dense(1, kernel_initializer='glorot_uniform', kernel_regularizer=rglrz, name='prediction')(lstm) # 地理位置情况 county_in = Input(shape=(88, ), name='county_in') county = Dense(32, activation='tanh', name='county1')(county_in) county = Dense(1, activation='sigmoid', name='county2')(county) # 合并两个网络的结果,使用乘 pre_county = Multiply(name='pre_county')([prediction, county]) merge_model = Model(inputs=[weather_in, county_in], outputs=pre_county, name='merge_model') # 编译 merge_model.compile(optimizer=Adam(), loss=Huber(), metrics=[mse]) return merge_model
def __init__(self, name, alpha, gamma, input_layer_size, number_of_parameters, out_layer_size, memory_size=50000, batch_size=64): config = configparser.ConfigParser() config.read('configuration/agent_config.ini') config.sections() enable_model_load = config['model_weights'].getboolean('enable_load_model_weights') self.enable_model_save = config['model_weights'].getboolean('enable_save_model_weights') self.tensorboard_visualization = config['tensorboard'].getboolean('enable_ddqnper') # Hyperparameters self.memory = MemoryBuffer(max_size=memory_size, number_of_parameters=input_layer_size, with_per=True) self.with_per = True self.gamma = gamma self.learning_rate = alpha self.batch_size = batch_size self.out_layer_size = out_layer_size self.replace_target_network_after = config['model_settings'].getint('ddqn_replace_network_interval') self.action_space = [i for i in range(out_layer_size)] self.priority_offset = 0.1 # used for priority, as we do not want to have priority 0 samples self.priority_scale = 0.7 # priority_scale, suggested by Paper loss = Huber() optimizer = Adam(learning_rate=alpha) # Epsilon Greedy Strategy self.epsilon = 1.0 # enable epsilon = 1.0 only when changing model, else learned weights from .h5 are used. self.epsilon_decay = 0.9985 self.epsilon_min = 0.005 # Keras Models hl1_dims = 128 hl2_dims = 64 hl3_dims = 64 self.dqn_eval = self._build_model(hl1_dims, hl2_dims, hl3_dims, input_layer_size, out_layer_size, optimizer, loss) self.dqn_target = self._build_model(hl1_dims, hl2_dims, hl3_dims, input_layer_size, out_layer_size, optimizer, loss) # self.history = History() if self.tensorboard_visualization: comment = 'adam-huber-reward_per2' path = config['tensorboard']['file_path'] tboard_name = '{}{}-cmt-{}_hl1_dims-{}_hl2_dims-{}_hl3_dims-{}-time-{}'.format(path, name, comment, hl1_dims, hl2_dims, hl3_dims, int(time.time())) self.tensorboard = TensorBoard(tboard_name.format()) self.keras_weights_filename = '{}.keras'.format(name) self.model_loaded = False if enable_model_load: self.load_model() else: print('Applying epsilon greedy strategy')
def _build_model(self): model = Sequential() model.add(Flatten(input_shape=self.input_shape)) model.add(Dense(24, activation='relu')) model.add(Dense(24, activation='relu')) model.add(Dense(self.action_size, activation='linear')) model.compile(loss=Huber(), optimizer=Adam(learning_rate=self.lr)) return model
def __init__(self, img_shape: tuple, num_actions: int, learning_rate: float): super().__init__() self.img_shape = img_shape self.num_actions = num_actions self.learning_rate = learning_rate self.loss = Huber() self.optimizer = RMSprop(learning_rate=0.00025, rho=0.95, epsilon=0.01) self.internal_model = self.build_model()
def __init__(self, env: '', episodes=1000, alpha=0.01, gamma=0.9, alpha_decay_rate=0.9): self.env = Environment(env=env) self.episodes = episodes self.lr = ExponentialDecay(alpha, episodes, alpha_decay_rate) self.optimizer = Adam(self.lr) self.action_count, self.states_count = self.env.spaces_count() self.gamma = gamma self._net = ReinforcePolicyNet(action_count=self.action_count, states_count=self.states_count) self._model = ReinforcePolicyModel(self._net) self._agent = ReinforcePolicyAgent(env=self.env, model=self._model, gamma=gamma) self.huber_loss = Huber(reduction=tf.keras.losses.Reduction.SUM)
def nvidia(): """ Implementation of Nvidia's End-to-End Learning model for Self-driving cars """ global X_train, y_train # Model Design inputs = Input(shape=(160,320,3)) cropped = Cropping2D(cropping=((64, 0), (0, 0)))(inputs) resized_input = Lambda(lambda image: tf.image.resize(image, (66,200)))(cropped) normalize_layer = LayerNormalization(axis=1)(resized_input) conv1 = Conv2D(filters=24, kernel_size=5, strides=(2,2), activation='relu')(normalize_layer) conv2 = Conv2D(filters=36, kernel_size=5, strides=(2,2), activation='relu')(conv1) conv3 = Conv2D(filters=48, kernel_size=5, strides=(2,2), activation='relu')(conv2) conv4 = Conv2D(filters=64, kernel_size=3, activation='relu')(conv3) conv5 = Conv2D(filters=64, kernel_size=3, activation='relu')(conv4) flatten = Flatten()(conv5) dense1 = Dense(100,activation='relu')(flatten) dense2 = Dense(50,activation='relu')(dense1) dense3 = Dense(10,activation='relu')(dense2) out = Dense(1, activation='linear')(dense3) # Specifications and training checkpoint = ModelCheckpoint(filepath="./ckpts/model_nvidia.h5", monitor='val_loss', save_best_only=True) stopper = EarlyStopping(monitor='val_loss', min_delta=0.0003, patience = 10) lr_schedule = ExponentialDecay(initial_learning_rate=0.0001, decay_steps=100000, decay_rate=0.95) optimizer = Adam(learning_rate=lr_schedule) loss = Huber(delta=0.5, reduction="auto", name="huber_loss") t2 = time() model = Model(inputs=inputs, outputs=out) model.compile(loss = loss, optimizer = optimizer) result = model.fit(X_train, y_train, validation_split = 0.2, shuffle = True, epochs = 100, callbacks=[checkpoint, stopper]) # Visualization of loss variations across epochs plt.plot(result.history['loss']) plt.plot(result.history['val_loss']) plt.title('Huber Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Training set', 'Validation set'], loc = 'upper right') plt.savefig('loss.png') plt.show() print("Time taken to train: {:.2f}s".format(time()-t2)) model.load_weights('./ckpts/model_nvidia.h5') model.save('model.h5')
def dqn(input_shape: Tuple[int], action_size: int, learning_rate: float, noisy: bool) -> Model: # Build the convolutional network section and flatten the output state_input, state_hidden = build_base_cnn(input_shape, noisy) dense_layer = NoisyDense if noisy else Dense output = dense_layer(action_size, activation='linear')(state_hidden) model = Model(inputs=state_input, outputs=output) model.compile(loss=Huber(), optimizer=Adam(lr=learning_rate)) return model
def train_spatial_temporal_model( model, dataset_generator, opt='adam', epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, include_tb=False): # validation_data, val_steps = VALIDATION_STEPS, ## Early stopping earlystopping = EarlyStopping(monitor='loss', min_delta=0.00001, patience=10, restore_best_weights=True) # val_loss # Automatically save latest best model to file filepath = repo_path + "models/model_saves/" + PRED_TAR + '/' + RUN_ID + ".hdf5" checkpoint = ModelCheckpoint(filepath=filepath, monitor='loss', verbose=0, save_best_only=True, mode='min') # Set callbacks callbacks_list = [checkpoint, earlystopping] # Include tensorboard if include_tb: tensorboard_cb = tf.keras.callbacks.TensorBoard(get_run_logdir()) callbacks_list.extend([tensorboard_cb]) # Optimizers optimizers = { 'adam': Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) } model.compile(loss='mean_absolute_error', optimizer=optimizers[opt], metrics=[mae, RootMeanSquaredError(), Huber()]) # Fit model #x = [spatial_train, temporal_train_x], y = temporal_train_y, history = model.fit( dataset_generator, epochs=epochs, use_multiprocessing=True, # validation_data = validation_data, validation_steps = val_steps, steps_per_epoch=steps_per_epoch, verbose=1, callbacks=callbacks_list) return (history)
def _build_model(self): state_size = self.state_size action_size = self.action_size layers = self.options['layers'] # Neural Net for Deep-Q learning Model model = Sequential() model.add(Dense(layers[0], input_dim=state_size, activation='relu')) for l in layers: model.add(Dense(l, activation='relu')) model.add(Dense(action_size, activation='linear')) model.compile(loss=Huber(), optimizer=Adam(lr=self.options['alpha'])) return model
def model(self): model = Sequential() model.add(Conv2D(filters=32, kernel_size=8, strides=4, activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS))) model.add(Conv2D(filters=64, kernel_size=4, strides=2, activation='relu')) model.add(Conv2D(filters=64, kernel_size=3, strides=1, activation='relu')) model.add(Flatten()) model.add(Dense(units=512, activation='relu')) model.add(Dense(units=number_of_actions, activation='linear')) opt = RMSprop(learning_rate=LEARNING_RATE, rho=GRADIENT_MOMENTUM, epsilon=MIN_SQUARED_GRADIENT) model.compile(loss=Huber(), optimizer=opt) print(model.summary()) return model
def vanilla_build_model(input_dimension, output_dimension, nodes_per_layer, hidden_layer_count, learning_rate): inputs = Input(shape=input_dimension) hidden_layer = inputs for _ in range(hidden_layer_count): hidden_layer = Dense(nodes_per_layer, activation='relu')(hidden_layer) # TODO explore batchnorm in RL. #hidden_layer = BatchNormalization()(hidden_layer) predictions = Dense(output_dimension, activation='linear')(hidden_layer) model = Model(inputs=inputs, outputs=predictions) # TODO do more testing on MSE vs Huber #model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate, epsilon=1.5e-4), loss=tf.keras.losses.Huber()) model.compile(optimizer=Adam(lr=learning_rate), loss=Huber()) return model
def create_lstm_network(env, *_, lr, layer_nodes, time_steps): """ Builds an LSTM DQN network fitting the environment's state/action spaces. """ state_size = env.observation_space.shape[0] nA = env.action_space.n model = Sequential() model.add(LSTM(layer_nodes[0], input_shape=(time_steps, state_size))) for num_nodes in layer_nodes[1:]: model.add(Dense(num_nodes, activation='relu')) model.add(Dense(nA, activation='linear')) model.compile(loss=Huber(), optimizer=Adam(lr=lr), loss_weights=1.0) return model
def compile(self, optimizer=None, loss=Huber()): # lr_schedule = ExponentialDecay( # initial_learning_rate=self.config.learning_rate, # decay_steps=self.config.lr_decay_steps, # decay_rate=self.config.lr_decay, # lr_min=self.config.lr_min) # optimizer = Adam(learning_rate=lr_schedule) if optimizer is None: optimizer = Adam(learning_rate=self.config.learning_rate) self.target_model = clone_model(self.model) self.target_model.compile(optimizer='sgd', loss='mse') self.model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
def __get_loss(loss, num_of_classes): if loss == 'cross_entropy': # default value. return CategoricalCrossentropy( ) # if num_of_classes != 2 else BinaryCrossentropy() elif loss == 'binary_cross_entropy"': return BinaryCrossentropy() elif loss == 'cosine_similarity': return CosineSimilarity() elif loss == 'mean_absolute_error': return MeanAbsoluteError() elif loss == 'mean_squared_error': return MeanSquaredError() elif loss == 'huber': return Huber() else: raise ValueError('loss type does not exist.')
def __init__(self, alpha=0.034657, delta=0.20752, epsilon_decay=0.99991, eta=0.096408, gamma=0.077969, learning_rate=0.00849): super().__init__(alpha, delta, epsilon_decay, eta, gamma, learning_rate) self.policy_network = DuelingQNetwork(self.action_size) self.target_network = DuelingQNetwork(self.action_size) self.policy_network.compile(optimizer=keras.optimizers.Adam(), loss=Huber(reduction=Reduction.SUM)) self.update_network()
def learning(model, code, train_input, train_output, valid_input, valid_output): global BATCH_SIZE, FILE_PATH loss = Huber() optimizer = Adam(0.0005) model.compile(loss=loss, optimizer=optimizer, metrics=['mse']) early_stop = EarlyStopping(monitor='val_loss', patience=10) filename = os.path.join(FILE_PATH, code+'.h5') checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') history = model.fit(train_input, train_output, epochs=200, batch_size=BATCH_SIZE, validation_data=(valid_input, valid_output), callbacks=[early_stop, checkpoint]) print('End Learning !\n')