def __init__(self, game_name): self.game_name = game_name if self.game_name == 'PONG': self.action_space = 3 elif self.game_name == 'BREAKOUT': self.action_space = 4 else: ValueError('Error...') self.state_shape = (84, 84, 4) self.finish_greedy = 5000000 self.no_frames = 1 self.gamma = 0.99 self.epsilon_start = 1.0 self.epsilon_end = 0.05 self.learning_rate = 0.00025 self.optimizer = Adam(lr=self.learning_rate) self.loss_object = losses.Huber() self.model = self.build_model() self.target_model = self.build_model() self.batch_size = 32 self.experience = deque(maxlen=50000) self.Q_list = []
def train(q_net, q_target, optimizer, batch_size, gamma, loss_list, Replay_time): huber = losses.Huber() for i in range(Replay_time): s, a, r, s_next, done_flag = q_net.sample_memory(batch_size) with tf.GradientTape() as tape: # Q_value qa_out = q_net(s) a_index = tf.expand_dims(tf.range(a.shape[0]), axis=1) a_index = tf.concat([a_index, a], axis=1) q_a = tf.gather_nd(qa_out, a_index) q_a = tf.expand_dims(q_a, axis=1) # Q_target_value qtarget_out = q_target(s_next) qtarget_out = tf.reduce_max(qtarget_out, axis=1, keepdims=True) # for DQN # a_target = tf.argmax(qa_out,axis = 1) # a_target = tf.reshape(tf.cast(a_target,dtype = tf.int32),shape = (batch_size,1)) # a_target_index = tf.expand_dims(tf.range(a_target.shape[0]),axis = 1) # a_target_index = tf.concat([a_target_index,a_target],axis = 1) # qtarget_out = tf.gather_nd(qtarget_out,a_target_index) # qtarget_out = tf.expand_dims(qtarget_out,axis=1) # for DDQN q_t = r + gamma * qtarget_out * done_flag # loss loss = huber(q_a, q_t) loss_list.append(loss) grads = tape.gradient(loss, q_net.trainable_variables) optimizer.apply_gradients(zip(grads, q_net.trainable_variables))
def train(q, q_target, memory, optimizer, batch_size, gamma): # 通过Q网络和影子网络来构造贝尔曼方程的误差, # 并只更新Q网络,影子网络的更新会滞后Q网络 huber = losses.Huber() # 训练10次 for i in range(10): # 从缓冲池采样 s, a, r, s_prime, done_mask = memory.sample(batch_size) with tf.GradientTape() as tape: # s: [b, 4] q_out = q(s) # 得到Q(s,a)的分布 # 由于TF的gather_nd与pytorch的gather功能不一样,需要构造 # gather_nd需要的坐标参数,indices:[b, 2] # pi_a = pi.gather(1, a) # pytorch只需要一行即可实现 indices = tf.expand_dims(tf.range(a.shape[0]), axis=1) indices = tf.concat([indices, a], axis=1) # 动作的概率值, [b] q_a = tf.gather_nd(q_out, indices) # [b]=> [b,1] q_a = tf.expand_dims(q_a, axis=1) # 得到Q(s',a)的最大值,它来自影子网络! [b,4]=>[b,2]=>[b,1] max_q_prime = tf.reduce_max(q_target(s_prime), axis=1, keepdims=True) # 构造Q(s,a_t)的目标值,来自贝尔曼方程 target = r + gamma * max_q_prime * done_mask # 计算Q(s,a_t)与目标值的误差 loss = huber(q_a, target) # 更新网络,使得Q(s,a_t)估计符合贝尔曼方程 grads = tape.gradient(loss, q.trainable_variables) optimizer.apply_gradients(zip(grads, q.trainable_variables))
def __build_cnn2D(self): inputs = tf.keras.Input(shape=(self.image_width, self.image_height, self.history_length)) x = layers.Lambda(lambda layer: layer / 255)(inputs) x = layers.Conv2D( filters=16, kernel_size=(4, 4), strides=(2, 2), activation='relu', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) x = layers.MaxPool2D((2, 2))(x) x = layers.Conv2D( filters=8, kernel_size=(2, 2), strides=(1, 1), activation='relu', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) x = layers.MaxPool2D((2, 2))(x) x = layers.Flatten()(x) x = layers.Dense( 64, activation='relu', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) predictions = layers.Dense( self.num_actions, activation='linear', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) model = tf.keras.Model(inputs=inputs, outputs=predictions) model.compile( optimizer=optimizers.Adam(self.learning_rate), loss=losses.Huber() ) #loss to be removed. It is needed in the bugged version installed on Jetson model.summary() return model
def __build_cnn1D(self): inputs = tf.keras.Input(shape=(self.state_size, self.history_length)) x = layers.Conv1D( filters=16, kernel_size=4, strides=2, activation='relu', kernel_initializer=initializers.VarianceScaling(scale=2.))(inputs) x = layers.Conv1D( filters=32, kernel_size=2, strides=1, activation='relu', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) x = layers.Flatten()(x) x = layers.Dense( 64, activation='relu', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) predictions = layers.Dense( self.num_actions, activation='linear', kernel_initializer=initializers.VarianceScaling(scale=2.))(x) model = tf.keras.Model(inputs=inputs, outputs=predictions) model.compile( optimizer=optimizers.Adam(self.learning_rate), loss=losses.Huber() ) #loss to be removed. It is needed in the bugged version installed on Jetson model.summary() return model
def __init__(self, base_model_type="RoBERTa", activation="relu", kr_rate=0.001, score_loss="mse", cpkt=""): self.kr_rate = kr_rate self.scaler = MinMaxScaler(feature_range=(0, 1)) # Set the model activation: if activation == "leaky_relu": self.activation = LeakyReLU() self.kr_initializer = tf.keras.initializers.HeUniform() elif activation == "paramaterized_leaky_relu": self.activation = PReLU() self.kr_initializer = tf.keras.initializers.HeUniform() elif activation == "relu": self.activation = "relu" self.kr_initializer = tf.keras.initializers.HeUniform() else: self.activation = activation self.kr_initializer = tf.keras.initializers.GlorotUniform() # Set the regression loss: self.score_metric = "mean_squared_error" if score_loss == "huber": delta = 2.0 self.score_loss = losses.Huber(delta=delta) elif score_loss == "log_cosh": self.score_loss = "log_cosh" elif score_loss == "mean_squared_logarithmic_error": self.score_loss = "mean_squared_logarithmic_error" elif score_loss == "mae": self.score_loss = "mae" else: self.score_loss = "mean_squared_error" # ModelCheckPoint Callback: if score_loss == "huber": cpkt = cpkt + "-kr-{}-{}-{}-{}".format(self.kr_rate, self.activation, score_loss, delta) else: cpkt = cpkt + "-kr-{}-{}-{}".format(self.kr_rate, self.activation, score_loss) cpkt = cpkt + "-epoch-{epoch:02d}-val-loss-{val_loss:02f}.h5" self.model_checkpoint_callback = ModelCheckpoint(filepath=cpkt, save_weights_only=True, monitor='val_loss', mode='auto', save_freq = 'epoch', save_best_only=True) # Reduce Learning Rate on Plateau Callback: self.reduce_lr_callback = ReduceLROnPlateau(monitor='val_loss', mode='auto', factor=0.2, patience=10, min_lr=0.0005, verbose=1) # Early Stopping self.early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1) print("\nActivation: ", self.activation) print("Kernel Initializer: ", self.kr_initializer) print("Kernel Regularizing Rate: ", self.kr_rate) print("\n")
def __init__(self, args): self.debug = args.debug self.clip_grad = args.clip_grad self.action_dim = args.action_dim self.discount = args.discount self.batch_size = args.batch self.use_huber = args.huber self.use_mse = args.mse self.use_rms = args.rms self.use_adam = args.adam self.tau = args.tau if args.optimistic: self.train_net = OptimisticCNN(self.action_dim, args.beta) self.fixed_net = OptimisticCNN(self.action_dim, args.beta) else: self.train_net = CNN(self.action_dim) self.fixed_net = CNN(self.action_dim) self.train_net.trainable = True self.fixed_net.trainable = False if args.dqn: self.next_policy_net = self.fixed_net self.Q_next_net = self.fixed_net elif args.ddqn: if args.pol: self.next_policy_net = self.fixed_net self.Q_next_net = self.train_net elif args.vi: self.next_policy_net = self.train_net self.Q_next_net = self.fixed_net if args.rms: self.optimizer = optimizers.RMSprop(learning_rate=args.lr, rho=0.95, momentum=0.95, epsilon=0.01) elif args.adam: self.optimizer = optimizers.Adam(learning_rate=args.lr) elif args.sgd: self.optimizer = optimizers.SGD(learning_rate=args.lr, momentum=0.9) if args.huber: self.loss_func = losses.Huber() elif args.mse: self.loss_func = losses.MeanSquaredError() self.train_net(tf.random.uniform(shape=[1, 84, 84, 4])) self.fixed_net(tf.random.uniform(shape=[1, 84, 84, 4])) self.hard_update() self.policy_net = self.train_net if args.fixed_policy: self.policy_net = self.fixed_net
def compute_loss(self, action_probs, values, returns): advantage = returns - values action_log_probs = tf.math.log(action_probs) actor_loss = -tf.math.reduce_sum(action_log_probs * advantage) huber_loss = losses.Huber(reduction=losses.Reduction.SUM) critic_loss = huber_loss(values, returns) return actor_loss + critic_loss
def __init__(self, train_net, fixed_net, args): self.train_net = train_net self.fixed_net = fixed_net self.debug = args.debug obs_dim, action_dim = args.obs_dim, args.action_dim self.action_dim = action_dim self.discount = args.discount self.batch_size = args.batch self.use_huber = args.huber self.use_mse = args.mse self.use_rms = args.rms self.use_adam = args.adam self.tau = args.tau self.train_net.trainable = True self.fixed_net.trainable = False if args.rms: self.optimizer = optimizers.RMSprop(learning_rate=args.lr, rho=0.95, momentum=0.95, epsilon=0.01) elif args.adam: self.optimizer = optimizers.Adam(learning_rate=args.lr) elif args.sgd: self.optimizer = optimizers.SGD(learning_rate=args.lr, momentum=0.9) if args.huber: self.loss_func = losses.Huber() elif args.mse: self.loss_func = losses.MeanSquaredError() self.train_net(tf.random.uniform(shape=[1, obs_dim], dtype=TF_TYPE)) self.fixed_net(tf.random.uniform(shape=[1, obs_dim], dtype=TF_TYPE)) self.hard_update() self.e_greedy_train = EpsilonGreedy(args) self.eval_epsilon = args.eval_epsilon self.policy_net = self.train_net if args.fixed_policy: self.policy_net = self.fixed_net if args.dqn: self.next_policy_net = self.fixed_net self.Q_next_net = self.fixed_net elif args.ddqn: if args.pol: self.next_policy_net = self.fixed_net self.Q_next_net = self.train_net elif args.vi: self.next_policy_net = self.train_net self.Q_next_net = self.fixed_net
def gradient_train(self, old_states, target_q, one_hot_actions): with tf.GradientTape() as tape: q_values = self.target_net(old_states) current_q = tf.reduce_sum(tf.multiply(q_values, one_hot_actions), axis=1) loss = losses.Huber()(target_q, current_q) variables = self.target_net.trainable_variables gradients = tape.gradient(loss, variables) self.target_net.optimizer.apply_gradients(zip(gradients, variables)) return loss
def __generate_model(): now_state_input = krs_layer.Input(shape=(8, 8, 1)) action_state_input = krs_layer.Input(shape=(8, 8, 1)) now_state_conv = krs_layer.Conv2D(32, (5, 5), strides=(2, 2), activation="relu", padding="same")(now_state_input) action_state_conv = krs_layer.Conv2D(32, (5, 5), strides=(2, 2), activation="relu", padding="same")(action_state_input) combined_layer = krs_layer.concatenate([now_state_conv, action_state_conv]) combined_layer = krs_layer.Conv2D(64, (3, 3), strides=(3, 3), activation="relu", padding="same")(combined_layer) combined_layer = krs_layer.Conv2D(64, (3, 3), strides=(3, 3), activation="relu", padding="same")(combined_layer) combined_layer = krs_layer.Flatten()(combined_layer) last_layer = krs_layer.Dense(1, activation="relu")(combined_layer) rms_prop = krs_optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=0.01, decay=0.0) model = krs_models.Model(inputs=[now_state_input, action_state_input], outputs=last_layer) model.compile(rms_prop, loss=krs_losses.Huber()) return model
def train(q, q_target, memory, optimizer): huber = losses.Huber() for i in range(10): s, a, r, s_prime, done = memory.sample(batch_size) with tf.GradientTape() as tape: q_out = q(s) indices = tf.expand_dims(tf.range(a.shape[0]), axis=1) indices = tf.concat([indices, a], axis=1) q_a = tf.gather_nd(q_out, indices) # 根据indices提取对应位置的元素 q_a = tf.expand_dims(q_a, axis=1) max_q_prime = tf.reduce_max(q_target(s_prime), axis=1, keepdims=True) target = r + gamma*max_q_prime*done loss = huber(q_a, target) grads = tape.gradient(loss, q.trainable_variables) optimizer.apply_gradients(zip(grads, q.trainable_variables))
def __init__(self, loss=None, num_timesteps=20, num_fields=1, field_size=32, model_type="crnn"): """Agent that subclasses from base_model.BaseModel, and uses the CRNN model. :param loss: Subclass of tf.keras.Losses. :param num_timesteps: Number of time steps to simulate and send to model. :param num_fields: Number of fields to prepare model for. :param field_size: Width of quadratic field to generate and send to model. :param model_type: String that represents classmethod in CRNN. 'crnn' and 'rnn' valid. :raises: AttributeError if given model_type doesn't exist. """ try: model = getattr(CRNN, model_type)(num_timesteps=num_timesteps, num_fields=num_fields, field_size=field_size) except AttributeError: raise AttributeError( ("The model_type parameter is invalid, " "Model doesn't have a factory method of that name.")) self.loss_instance = loss or kls.Huber(delta=0.2) model.compile(optimizer=ko.Adam(), loss=self.loss_instance) dummy_x = tf.zeros((1, *model.spatial_input_shape)) dummy_y = tf.zeros(1) model.predict(dummy_x, dummy_y) model.summary() self.model = model self.pred_field_shape = (field_size, field_size) self.field_scale = 8 self.sim_field_size = field_size * self.field_scale self.sim_field_shape = (self.sim_field_size, self.sim_field_size) self.draw_mass = simple_mass self.training_history = [] self.loss = 0 self.test_y = np.zeros(0) self.pred_y = np.zeros(0)
def build_and_compile(self, local_model_name, local_settings, local_hyperparameters): try: # keras,tf session/random seed reset/fix # kb.clear_session() # tf.compat.v1.reset_default_graph() np.random.seed(11) tf.random.set_seed(2) # load hyperparameters units_layer_1 = local_hyperparameters['units_layer_1'] units_layer_2 = local_hyperparameters['units_layer_2'] units_layer_3 = local_hyperparameters['units_layer_3'] units_layer_4 = local_hyperparameters['units_layer_4'] units_dense_layer_4 = local_hyperparameters['units_dense_layer_4'] units_final_layer = local_hyperparameters['units_final_layer'] activation_1 = local_hyperparameters['activation_1'] activation_2 = local_hyperparameters['activation_2'] activation_3 = local_hyperparameters['activation_3'] activation_4 = local_hyperparameters['activation_4'] activation_dense_layer_4 = local_hyperparameters[ 'activation_dense_layer_4'] activation_final_layer = local_hyperparameters[ 'activation_final_layer'] dropout_layer_1 = local_hyperparameters['dropout_layer_1'] dropout_layer_2 = local_hyperparameters['dropout_layer_2'] dropout_layer_3 = local_hyperparameters['dropout_layer_3'] dropout_layer_4 = local_hyperparameters['dropout_layer_4'] dropout_dense_layer_4 = local_hyperparameters[ 'dropout_dense_layer_4'] input_shape_y = local_hyperparameters['input_shape_y'] input_shape_x = local_hyperparameters['input_shape_x'] nof_channels = local_hyperparameters['nof_channels'] stride_y_1 = local_hyperparameters['stride_y_1'] stride_x_1 = local_hyperparameters['stride_x_1'] kernel_size_y_1 = local_hyperparameters['kernel_size_y_1'] kernel_size_x_1 = local_hyperparameters['kernel_size_x_1'] kernel_size_y_2 = local_hyperparameters['kernel_size_y_2'] kernel_size_x_2 = local_hyperparameters['kernel_size_x_2'] kernel_size_y_3 = local_hyperparameters['kernel_size_y_3'] kernel_size_x_3 = local_hyperparameters['kernel_size_x_3'] kernel_size_y_4 = local_hyperparameters['kernel_size_y_4'] kernel_size_x_4 = local_hyperparameters['kernel_size_x_4'] pool_size_y_1 = local_hyperparameters['pool_size_y_1'] pool_size_x_1 = local_hyperparameters['pool_size_x_1'] pool_size_y_2 = local_hyperparameters['pool_size_y_2'] pool_size_x_2 = local_hyperparameters['pool_size_x_2'] pool_size_y_3 = local_hyperparameters['pool_size_y_3'] pool_size_x_3 = local_hyperparameters['pool_size_x_3'] pool_size_y_4 = local_hyperparameters['pool_size_y_4'] pool_size_x_4 = local_hyperparameters['pool_size_x_4'] optimizer_function = local_hyperparameters['optimizer'] optimizer_learning_rate = local_hyperparameters['learning_rate'] epsilon_adam = local_hyperparameters['epsilon_adam'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam( learning_rate=optimizer_learning_rate, epsilon=epsilon_adam) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) elif optimizer_function == 'sgd': optimizer_function = optimizers.SGD(optimizer_learning_rate) elif optimizer_function == 'rmsp': optimizer_function = optimizers.RMSprop( optimizer_learning_rate, epsilon=epsilon_adam) optimizer_function = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimizer_function) loss_1 = local_hyperparameters['loss_1'] loss_2 = local_hyperparameters['loss_2'] loss_3 = local_hyperparameters['loss_3'] label_smoothing = local_hyperparameters['label_smoothing'] losses_list = [] union_settings_losses = [loss_1, loss_2, loss_3] if 'CategoricalCrossentropy' in union_settings_losses: losses_list.append( losses.CategoricalCrossentropy( label_smoothing=label_smoothing)) if 'BinaryCrossentropy' in union_settings_losses: losses_list.append(losses.BinaryCrossentropy()) if 'CategoricalHinge' in union_settings_losses: losses_list.append(losses.CategoricalHinge()) if 'KLD' in union_settings_losses: losses_list.append(losses.KLDivergence()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) if 'customized_loss_t2' in union_settings_losses: losses_list.append(customized_loss_t2) if "Huber" in union_settings_losses: losses_list.append(losses.Huber()) metrics_list = [] metric1 = local_hyperparameters['metrics1'] metric2 = local_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'auc_roc' in union_settings_metrics: metrics_list.append(metrics.AUC()) if 'customized_metric_auc_roc' in union_settings_metrics: metrics_list.append(customized_metric_auc_roc()) if 'CategoricalAccuracy' in union_settings_metrics: metrics_list.append(metrics.CategoricalAccuracy()) if 'CategoricalHinge' in union_settings_metrics: metrics_list.append(metrics.CategoricalHinge()) if 'BinaryAccuracy' in union_settings_metrics: metrics_list.append(metrics.BinaryAccuracy()) if local_settings['use_efficientNetB2'] == 'False': type_of_model = '_custom' if local_hyperparameters['regularizers_l1_l2_1'] == 'True': l1_1 = local_hyperparameters['l1_1'] l2_1 = local_hyperparameters['l2_1'] activation_regularizer_1 = regularizers.l1_l2(l1=l1_1, l2=l2_1) else: activation_regularizer_1 = None if local_hyperparameters['regularizers_l1_l2_2'] == 'True': l1_2 = local_hyperparameters['l1_2'] l2_2 = local_hyperparameters['l2_2'] activation_regularizer_2 = regularizers.l1_l2(l1=l1_2, l2=l2_2) else: activation_regularizer_2 = None if local_hyperparameters['regularizers_l1_l2_3'] == 'True': l1_3 = local_hyperparameters['l1_3'] l2_3 = local_hyperparameters['l2_3'] activation_regularizer_3 = regularizers.l1_l2(l1=l1_3, l2=l2_3) else: activation_regularizer_3 = None if local_hyperparameters['regularizers_l1_l2_4'] == 'True': l1_4 = local_hyperparameters['l1_4'] l2_4 = local_hyperparameters['l2_4'] activation_regularizer_4 = regularizers.l1_l2(l1=l1_4, l2=l2_4) else: activation_regularizer_4 = None if local_hyperparameters[ 'regularizers_l1_l2_dense_4'] == 'True': l1_dense_4 = local_hyperparameters['l1_dense_4'] l2_dense_4 = local_hyperparameters['l2_dense_4'] activation_regularizer_dense_layer_4 = regularizers.l1_l2( l1=l1_dense_4, l2=l2_dense_4) else: activation_regularizer_dense_layer_4 = None # building model classifier_ = tf.keras.models.Sequential() # first layer classifier_.add( layers.Input(shape=(input_shape_y, input_shape_x, nof_channels))) # classifier_.add(layers.ZeroPadding2D(padding=((0, 1), (0, 1)))) classifier_.add( layers.Conv2D( units_layer_1, kernel_size=(kernel_size_y_1, kernel_size_x_1), strides=(stride_y_1, stride_x_1), activity_regularizer=activation_regularizer_1, activation=activation_1, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_1)) # LAYER 1.5 classifier_.add( layers.Conv2D( units_layer_1, kernel_size=(kernel_size_y_1, kernel_size_x_1), input_shape=(input_shape_y, input_shape_x, nof_channels), strides=(stride_y_1, stride_x_1), activity_regularizer=activation_regularizer_1, activation=activation_1, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_1)) # second layer classifier_.add( layers.Conv2D( units_layer_2, kernel_size=(kernel_size_y_2, kernel_size_x_2), activity_regularizer=activation_regularizer_2, activation=activation_2, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_2)) # LAYER 2.5 classifier_.add( layers.Conv2D( units_layer_2, kernel_size=(kernel_size_y_2, kernel_size_x_2), activity_regularizer=activation_regularizer_2, activation=activation_2, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_2)) # third layer classifier_.add( layers.Conv2D( units_layer_3, kernel_size=(kernel_size_y_3, kernel_size_x_3), activity_regularizer=activation_regularizer_3, activation=activation_3, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_3)) # LAYER 3.5 classifier_.add( layers.Conv2D( units_layer_3, kernel_size=(kernel_size_y_3, kernel_size_x_3), activity_regularizer=activation_regularizer_3, activation=activation_3, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_3)) # fourth layer classifier_.add( layers.Conv2D( units_layer_4, kernel_size=(kernel_size_y_4, kernel_size_x_4), activity_regularizer=activation_regularizer_4, activation=activation_4, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_4)) # Full connection and final layer classifier_.add( layers.Dense(units=units_final_layer, activation=activation_final_layer)) # Compile model classifier_.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) elif local_settings['use_efficientNetB2'] == 'True': type_of_model = '_EfficientNetB2' # pretrained_weights = ''.join([local_settings['models_path'], # local_hyperparameters['weights_for_training_efficientnetb2']]) classifier_pretrained = tf.keras.applications.EfficientNetB2( include_top=False, weights='imagenet', input_tensor=None, input_shape=(input_shape_y, input_shape_x, 3), pooling=None, classifier_activation=None) # classifier_pretrained.save_weights(''.join([local_settings['models_path'], # 'pretrained_efficientnetb2_weights.h5'])) # # classifier_receptor = tf.keras.applications.EfficientNetB2(include_top=False, weights=None, # input_tensor=None, # input_shape=(input_shape_y, # input_shape_x, 1), # pooling=None, # classifier_activation=None) # # classifier_receptor.load_weights(''.join([local_settings['models_path'], # 'pretrained_efficientnetb2_weights.h5']), by_name=True) # # classifier_pretrained = classifier_receptor if local_settings['nof_classes'] == 2 or local_hyperparameters[ 'use_bias_always'] == 'True': # if two classes, log(pos/neg) = log(0.75/0.25) = 0.477121254719 bias_initializer = tf.keras.initializers.Constant( local_hyperparameters['bias_initializer']) else: # assuming balanced classes... bias_initializer = tf.keras.initializers.Constant(0) effnb2_model = models.Sequential(classifier_pretrained) effnb2_model.add(layers.GlobalAveragePooling2D()) effnb2_model.add(layers.Dropout(dropout_dense_layer_4)) # effnb2_model.add(layers.Dense(units=units_dense_layer_4, activation=activation_dense_layer_4, # kernel_initializer=tf.keras.initializers.VarianceScaling(scale=0.333333333, # mode='fan_out', # distribution='uniform'), # bias_initializer=bias_initializer)) # effnb2_model.add(layers.Dropout(dropout_dense_layer_4)) effnb2_model.add( layers.Dense(units_final_layer, activation=activation_final_layer, kernel_initializer=tf.keras.initializers. VarianceScaling(scale=0.333333333, mode='fan_out', distribution='uniform'), bias_initializer=bias_initializer)) classifier_ = effnb2_model if local_settings[ 'use_local_pretrained_weights_for_retraining'] != 'False': classifier_.load_weights(''.join([ local_settings['models_path'], local_settings[ 'use_local_pretrained_weights_for_retraining'] ])) for layer in classifier_.layers[0].layers: layer.trainable = True # if 'excite' in layer.name: # layer.trainable = True # if 'top_conv' in layer.name: # layer.trainable = True # if 'project_conv' in layer.name: # layer.trainable = True classifier_.build(input_shape=(input_shape_y, input_shape_x, nof_channels)) classifier_.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) # Summary of model classifier_.summary() # save_model classifier_json = classifier_.to_json() with open(''.join([local_settings['models_path'], local_model_name, type_of_model, '_classifier_.json']), 'w') \ as json_file: json_file.write(classifier_json) json_file.close() classifier_.save(''.join([ local_settings['models_path'], local_model_name, type_of_model, '_classifier_.h5' ])) classifier_.save(''.join([ local_settings['models_path'], local_model_name, type_of_model, '/' ]), save_format='tf') print('model architecture saved') # output png and pdf with model, additionally saves a json file model_name_analyzed.json if local_settings['model_analyzer'] == 'True': model_architecture = model_structure() model_architecture_review = model_architecture.analize( ''.join( [local_model_name, type_of_model, '_classifier_.h5']), local_settings, local_hyperparameters) except Exception as e: print('error in build or compile of customized model') print(e) classifier_ = None logger.error(str(e), exc_info=True) return classifier_
def __init__(self, state_shape, num_actions, learning_rate=0.00025, load_model_path=None, name='DQN'): # Get logger for network self.logger = logging.getLogger(name) self.state_shape = state_shape self.num_actions = num_actions # Create DQN model # Set backend float dtype keras.backend.set_floatx('float32') # Input dim: (batch, H, W, D, channels) = (32, 10, 10, 7, 4) inputs = keras.Input(shape=self.state_shape, dtype='float32', name='state') x = layers.Conv3D(32, (7, 7, 5), 1, padding='same', activation='relu', name='conv1')(inputs) x = layers.Conv3D(64, (5, 5, 5), 1, padding='same', activation='relu', name='conv2')(x) x = layers.Conv3D(64, (3, 3, 3), 1, padding='same', activation='relu', name='conv3')(x) x = layers.Flatten(name='flatten')(x) x = layers.Dense(512, activation='relu', name='d1')(x) outputs = layers.Dense(self.num_actions, name='d2')(x) self.model = keras.Model(inputs, outputs, name=name) # Create optimizer self.optimizer = optimizers.RMSprop(learning_rate, momentum=0.95, epsilon=0.01) # Create loss function #self.loss_func = losses.MeanSquaredError() self.loss_func = losses.Huber( ) # less sensitive to outliers (linearized MSE when |x| > delta) # Accumulate training loss self.train_loss = keras.metrics.Mean(name='train_loss') self.train_loss.reset_states() # Create checkpoint self.checkpoint = tf.train.Checkpoint(model=self.model, optimizer=self.optimizer) if load_model_path is None: self.logger.info('__init__: Creating a new DQN model') else: # Restart training from the checkpoint self.checkpoint.restore(load_model_path) self.logger.info( '__init__: Loading an existing DQN model from "%s"', load_model_path)