def identity_blockV2(input_tensor, kernel_size, filters, stage, block, use_bn=True): """The identity_block is the block that has no conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = input_tensor if use_bn: x = BatchNormalization(axis=3, name=bn_name_base + '2a')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a')(x) if use_bn: x = BatchNormalization(axis=3, name=bn_name_base + '2b')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b')(x) if use_bn: x = BatchNormalization(axis=3, name=bn_name_base + '2c')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c')(x) x = KL.Add()([x, input_tensor]) return x
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size,), name='states') actions = layers.Input(shape=(self.action_size,), name='actions') # Add hidden layer(s) for state pathway net_states = layers.Dense(units=400,kernel_regularizer=layers.regularizers.l2(0.02))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = layers.Dense(units=300, kernel_regularizer=layers.regularizers.l2(0.02))(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense(units=300,kernel_regularizer=layers.regularizers.l2(0.02))(actions) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add more layers to the combined network if needed # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values',kernel_initializer = layers.initializers.RandomUniform(-0.005,0.005))(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=0.001) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): ''' input: (state, action) pairs :return: Q-values ''' # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actinos') # hidden layer for state pathway net_states = layers.Dense(units=400, activation='relu')(states) net_states = layers.Dense(units=300)(net_states) # hidden layer for action pathway net_actions = layers.Dense(units=300)(actions) # Hyper parameters: layer size, activations, batch normalization, regularization # combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Final output layer to produce action values(Q values) Q_values = layers.Dense(units=1, name='q_values', kernel_initializer=RandomUniform( minval=-0.003, maxval=0.003))(net) # create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with builtin loss function optimizer = optimizers.Adam(lr=self.learningRate) self.model.compile(optimizer=optimizer, loss='mse') # compute action gradients (derivative of Q values w.r.t actions) action_gradients = K.gradients(Q_values, actions) # define an additional function to fetch action gradients (for actor model) # a separate function needs to be defined to provide access to these gradients: self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def _build_model(self, inputs, return_model=False): # inputs = Input(shape=(self.fragment_length, self.output_bins), name='input_part') out = inputs skip_connections = [] out = CausalDilatedConv1D(self.filters, 2, atrous_rate=1, border_mode='valid', causal=True, name='initial_causal_conv')(out) for s in range(self.stacks): for i in range(0, self.dilation_depth + 1): out, skip_out = self._build_model_residual_block(out, i, s) skip_connections.append(skip_out) if self.use_skip_connections: #if not using skip, the out is the final added residual out out = layers.Add()(skip_connections) out = layers.Activation('relu')(out) out = layers.Conv1D(self.output_bins, 1, padding='same', kernel_regularizer=l2(self.final_l2))(out) out = layers.Activation('relu')(out) out = layers.Conv1D(self.output_bins, 1, padding='same')(out) if not self.learn_all_outputs: raise DeprecationWarning( 'Learning on just all outputs is wasteful, now learning only inside receptive field.' ) out = layers.Lambda( lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1], ))( out) # Based on gif in deepmind blog: take last output? out = layers.Activation('softmax', name="output_softmax")(out) if return_model: model = Model(inputs, out) # self.receptive_field, self.receptive_field_ms = self._compute_receptive_field() return model else: return out
def block0(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): """A residual block. # Arguments x: input tensor. filters: integer, filters of the bottleneck layer. kernel_size: default 3, kernel size of the bottleneck layer. stride: default 1, stride of the first layer. conv_shortcut: default True, use convolution shortcut if True, otherwise identity shortcut. name: string, block label. # Returns Output tensor for the residual block. """ bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 if conv_shortcut is True: shortcut = layers.Conv2D(filters, 1, strides=stride, name=name + '_0_conv')(x) shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut) else: shortcut = x x = layers.Conv2D(filters, kernel_size, strides=stride, padding='SAME', name=name + '_1_conv')(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) x = layers.Activation('relu', name=name + '_1_relu')(x) x = layers.Conv2D(filters, kernel_size, padding='SAME', name=name + '_2_conv')(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) x = layers.Add(name=name + '_add')([shortcut, x]) x = layers.Activation('relu', name=name + '_out')(x) return x
def model(self): layer_sta_input = layers.Input(shape=self.state_shape) layer_sta_hidden_1_input = layers.Dense( units=512, activation='relu')(layer_sta_input) layer_sta_hidden_1_bn = layers.BatchNormalization()( layer_sta_hidden_1_input) layer_sta_hidden_1_output = layers.Activation('relu')( layer_sta_hidden_1_bn) layer_sta_hidden_2_input = layers.Dense( units=256, activation='relu')(layer_sta_hidden_1_output) layer_sta_hidden_2_bn = layers.BatchNormalization()( layer_sta_hidden_2_input) layer_sta_hidden_2_output = layers.Activation('relu')( layer_sta_hidden_2_bn) layer_act_input = layers.Input(shape=self.action_shape) layer_act_hidden_1_input = layers.Dense( units=512, activation='relu')(layer_act_input) layer_act_hidden_1_bn = layers.BatchNormalization()( layer_act_hidden_1_input) layer_act_hidden_1_output = layers.Activation('relu')( layer_act_hidden_1_bn) layer_act_hidden_2_input = layers.Dense( units=256, activation='relu')(layer_act_hidden_1_output) layer_act_hidden_2_bn = layers.BatchNormalization()( layer_act_hidden_2_input) layer_act_hidden_2_output = layers.Activation('relu')( layer_act_hidden_2_bn) layer_input = [layer_sta_input, layer_act_input] layer_output = layers.Dense(units=1)(layers.Activation('relu')( layers.Add()( [layer_sta_hidden_2_output, layer_act_hidden_2_output]))) self.model = models.Model(inputs=layer_input, outputs=layer_output) self.model.compile(optimizer=optimizers.Adam(lr=0.001), loss='mse') self.get_action_gradients = backend.function( inputs=[*self.model.input, backend.learning_phase()], outputs=backend.gradients(layer_output, layer_act_input))
def s_conv_block(input_tensor, kernel_size, filters, stage, block, prefix='s_', strides=(2, 2), use_bias=True, train_bn=None): """conv_block is the block that has a conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names prefix: layer name prefix to distinguish teacher and student network use_bias: Boolean. To use or not use a bias in conv layers. train_bn: Boolean. Train or freeze Batch Norm layers Note that from stage 3, the first conv layer at main path is with subsample=(2,2) And the shortcut should have subsample=(2,2) as well """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = prefix + 'res' + str(stage) + block + '_branch' bn_name_base = prefix + 'bn' + str(stage) + block + '_branch' x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) x = modellib.BatchNorm(name=bn_name_base + '2a')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', use_bias=use_bias)(x) x = modellib.BatchNorm(name=bn_name_base + '2b')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=use_bias)(x) x = modellib.BatchNorm(name=bn_name_base + '2c')(x, training=train_bn) shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', use_bias=use_bias)(input_tensor) shortcut = modellib.BatchNorm(name=bn_name_base + '1')(shortcut, training=train_bn) x = KL.Add()([x, shortcut]) x = KL.Activation('relu', name=prefix + 'res' + str(stage) + block + '_out')(x) return x
def build_NN(self): # Try to mimic Actor ?? states = layers.Input(shape=(self.state_size,), name='states') # define states input net_states = layers.Dense(units=32, activation=None)(states) # first layer # net_states = layers.BatchNormalization()(net_states) # first normalize net_states = layers.Activation('relu')(net_states) # first activation function net_states = layers.Dense(units=64, activation=None)(net_states) # second layer # net_states = layers.BatchNormalization()(net_states) # second normalize net_states = layers.Activation('relu')(net_states) # second activation function # net_states = layers.Dense(units=16, activation='relu')(net_states) # third layer, no activation # try to mimic Actor again ?? actions = layers.Input(shape=(self.action_size,), name='actions') # define inputs net_actions = layers.Dense(units=32, activation=None)(actions) # first layer # net_actions = layers.BatchNormalization()(net_actions) # first normalize net_actions = layers.Activation('relu')(net_actions) # first activation function net_actions = layers.Dense(units=64, activation=None)(net_actions) # second layer # net_actions = layers.BatchNormalization()(net_actions) # second normalize net_actions = layers.Activation('relu')(net_actions) # second activation function # net_actions = layers.Dense(units=16, activation='relu')(net_actions) # third layer, no activation # combination net = layers.Add()([net_states, net_actions]) # combine state and actions net = layers.Activation('relu')(net) # fourth activation # net = layers.Dense(units=16, activation='relu')(actions) # fifth activation Q_values = layers.Dense(units=1, name='q_values')(net) # generate a Q_value self.model = models.Model(inputs=[states, actions], outputs=Q_values) # create the model optimizer = optimizers.Adam(lr=0.0005) # set the optimizer self.model.compile(optimizer=optimizer, loss='mse') # compile the model action_gradients = K.gradients(Q_values, actions) # calculate the gradients with respect to actions # function to get the action gradients # to be used by Actor self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def CapsNet(input_shape, n_class, routings): x = layers.Input(shape=input_shape) # Layer 1: Just a conventional Conv2D layer conv1 = layers.Conv2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv1')(x) # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule] primarycaps = PrimaryCap(conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid') # Layer 3: Capsule layer. Routing algorithm works here. digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='digitcaps')(primarycaps) # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape. # If using tensorflow, this will not be necessary. :) out_caps = Length(name='capsnet')(digitcaps) # Decoder network. y = layers.Input(shape=(n_class,)) masked_by_y = Mask()([digitcaps, y]) # The true label is used to mask the output of capsule layer. For training masked = Mask()(digitcaps) # Mask using the capsule with maximal length. For prediction # Shared Decoder model in training and prediction decoder = models.Sequential(name='decoder') decoder.add(layers.Dense(512, activation='relu', input_dim=16*n_class)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) # Models for training and evaluation (prediction) train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = models.Model(x, [out_caps, decoder(masked)]) # manipulate model noise = layers.Input(shape=(n_class, 16)) noised_digitcaps = layers.Add()([digitcaps, noise]) masked_noised_y = Mask()([noised_digitcaps, y]) manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y)) return train_model, eval_model, manipulate_model
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # Add hidden layer(s) for state pathway net_states = layers.Dense(units=32, activation='relu')(states) net_states = layers.Dense(units=128, activation='relu')(net_states) Normailization_states = layers.BatchNormalization()(net_states) dropout_states = layers.Dropout(rate=0.3)(Normailization_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense(units=32, activation='relu')(actions) net_actions = layers.Dense(units=128, activation='relu')(net_actions) Normailization_actions = layers.BatchNormalization()(net_actions) dropout_actions = layers.Dropout(rate=0.3)(Normailization_actions) # Combine state and action pathways net = layers.Add()([dropout_states, dropout_actions]) net = layers.Activation('relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=0.03) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def convolutional_block(X, f, filters, stage, block, s=2): block_name = str(stage) + "_" + str(block) conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' F1, F2, F3 = filters X_shortcut = X X = layers.Conv2D(filters=F1, kernel_size=(1,1), strides=(s,s), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X) X = layers.BatchNormalization(axis=3, name=bn_name_base + '2a')(X) X = layers.Activation('relu')(X) X = layers.Conv2D(filters=F2, kernel_size=(f,f), strides=(1,1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X) X = layers.BatchNormalization(axis=3, name=bn_name_base + '2b')(X) X = layers.Activation('relu')(X) X = layers.Conv2D(filters=F3, kernel_size=(1,1), strides=(1,1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X) X = layers.BatchNormalization(axis=3, name=bn_name_base + '2c')(X) se = layers.GlobalAveragePooling2D(name='pool' + block_name + '_gap')(X) se = layers.Dense(F3 // 16, activation='relu', name = 'fc' + block_name + '_sqz')(se) se = layers.Dense(F3, activation='sigmoid', name = 'fc' + block_name + '_exc')(se) se = layers.Reshape([1, 1, F3])(se) X = layers.Multiply(name='scale' + block_name)([X, se]) X_shortcut = layers.Conv2D(filters=F3, kernel_size=(1,1), strides=(s,s), padding='valid', name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut) X_shortcut = layers.BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut) X = layers.Add()([X, X_shortcut]) X = layers.Activation('relu')(X) return X
def _create_octconv_last_residual_block(inputs, ch, alpha): # Last layer for octconv resnets high, low = inputs # OctConv high, low = OctConv2D(filters=ch, alpha=alpha)([high, low]) high = layers.BatchNormalization()(high) high = layers.Activation("relu")(high) low = layers.BatchNormalization()(low) low = layers.Activation("relu")(low) # Last conv layers = alpha_out = 0 : vanila Conv2D # high -> high high_to_high = layers.Conv2D(ch, 3, padding="same")(high) # low -> high low_to_high = layers.Conv2D(ch, 3, padding="same")(low) low_to_high = layers.Lambda(lambda x: K.repeat_elements( K.repeat_elements(x, 2, axis=1), 2, axis=2))(low_to_high) x = layers.Add()([high_to_high, low_to_high]) x = layers.BatchNormalization()(x) x = layers.Activation("relu")(x) return x
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), use_bias=True): """ conv_block is the block that has a conv layer at shortcut Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names Note that from stage 3, the first conv layer at main path is with subsample=(2,2) And the shortcut should have subsample=(2,2) as well. Shortcut: The input tensor is passed through a Conv2D + Batch Norm layers Added to the main path passed through a ReLu activation layer """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', use_bias=use_bias)(x) x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=use_bias)(x) x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', use_bias=use_bias)(input_tensor) shortcut = BatchNorm(axis=3, name=bn_name_base + '1')(shortcut) x = KL.Add()([x, shortcut]) x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) return x
def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = CausalAtrousConvolution1D(nb_filters, 2, dilation_rate=2**i, padding='valid', causal=True, use_bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh', kernel_regularizer=l2(res_l2))(x) sigm_out = CausalAtrousConvolution1D(nb_filters, 2, dilation_rate=2**i, padding='valid', causal=True, use_bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid', kernel_regularizer=l2(res_l2))(x) x = layers.Multiply(name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Convolution1D(nb_filters, 1, padding='same', use_bias=use_bias, kernel_regularizer=l2(res_l2))(x) skip_x = layers.Convolution1D(nb_filters, 1, padding='same', use_bias=use_bias, kernel_regularizer=l2(res_l2))(x) res_x = layers.Add()([original_x, res_x]) return res_x, skip_x
def CapsNet_nogradientstop_crossentropy(input_shape, n_class, routings): # best testing results! val 0.13xx testX cnn1 200 1 cnn2 150 9 drop1 0.68 drop20.68 n_channels 50 kernel_size 20,dropout1 x = layers.Input(shape=input_shape) conv1 = layers.Conv1D(filters=200, kernel_size=1, strides=1, padding='valid', kernel_initializer='he_normal',activation='relu', name='conv1')(x) #conv1=BatchNormalization()(conv1) conv1 = Dropout(0.7)(conv1) conv2 = layers.Conv1D(filters=200, kernel_size=9, strides=1, padding='valid', kernel_initializer='he_normal',activation='relu', name='conv2')(conv1) #conv1=BatchNormalization()(conv1) conv2 = Dropout(0.75)(conv2) #0.75 valx loss has 0.1278! primarycaps = PrimaryCap(conv2, dim_capsule=8, n_channels=60, kernel_size=20, kernel_initializer='he_normal',strides=1, padding='valid',dropout=0.2) dim_capsule_dim2=10 # Capsule layer. Routing algorithm works here. digitcaps_c = CapsuleLayer_nogradient_stop(num_capsule=n_class, dim_capsule=dim_capsule_dim2, num_routing=routings,name='digitcaps',kernel_initializer='he_normal',dropout=0.1)(primarycaps) #digitcaps_c = CapsuleLayer(num_capsule=n_class, dim_capsule=dim_capsule_dim2, num_routing=routings,name='digitcaps',kernel_initializer='he_normal')(primarycaps) digitcaps = Extract_outputs(dim_capsule_dim2)(digitcaps_c) weight_c = Extract_weight_c(dim_capsule_dim2)(digitcaps_c) out_caps = Length()(digitcaps) out_caps = Activation('softmax',name='capsnet')(out_caps) # Decoder network. y = layers.Input(shape=(n_class,)) masked_by_y = Mask()([digitcaps, y]) # The true label is used to mask the output of capsule layer. For training masked = Mask()(digitcaps) # Mask using the capsule with maximal length. For prediction # Shared Decoder model in training and prediction decoder = Sequential(name='decoder') decoder.add(layers.Dense(512, activation='relu', input_dim=dim_capsule_dim2*n_class)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) # Models for training and evaluation (prediction) train_model = Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = Model(x, [out_caps, decoder(masked)]) weight_c_model = Model(x,weight_c) # manipulate model noise = layers.Input(shape=(n_class, dim_capsule_dim2)) noised_digitcaps = layers.Add()([digitcaps, noise]) masked_noised_y = Mask()([noised_digitcaps, y]) manipulate_model = Model([x, y, noise], decoder(masked_noised_y)) return train_model, eval_model, manipulate_model,weight_c_model
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # Add hidden layer(s) for state pathway net_states = create_dense_layer(states, 300) net_states = create_dense_layer(net_states, 400) # Add hidden layer(s) for action pathway net_actions = create_dense_layer(actions, 300) net_actions = create_dense_layer(net_actions, 400) # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense( units=1, name='q_values', kernel_initializer=layers.initializers.RandomUniform( minval=-0.003, maxval=0.003))(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=0.001) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), use_bias=True): nb_filter1, nb_filter2, nb_filter3 = filters #[64,64,256]等 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) x = BatchNormalization(name=bn_name_base + '2a')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', use_bias=use_bias)(x) x = BatchNormalization(name=bn_name_base + '2b')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=use_bias)(x) x = BatchNormalization(name=bn_name_base + '2c')(x) shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', use_bias=use_bias)(input_tensor) shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut) x = KL.Add()([x, shortcut]) x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) return x
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') net_states = layers.Dense( units=400, kernel_regularizer=layers.regularizers.l2(1e-6))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = layers.Dense( units=300, kernel_regularizer=layers.regularizers.l2(1e-6))(net_states) net_actions = layers.Dense( units=300, kernel_regularizer=layers.regularizers.l2(1e-6))(actions) net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) Q_values = layers.Dense( units=1, name='q_values', kernel_initializer=layers.initializers.RandomUniform( minval=-0.003, maxval=0.003))(net) self.model = models.Model(inputs=[states, actions], outputs=Q_values) optimizer = optimizers.Adam(lr=0.001) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def create_generator(): img = L.Input(shape=(32, 32, 3)) x = L.Conv2D(filters=64, kernel_size=(9, 9), strides=(1, 1), padding='same')(img) x = L.PReLU(shared_axes=[1, 2])(x) res = x for i in range(16): x = ResBlock(x, 64) x = L.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = L.BatchNormalization()(x) x = L.Add()([res, x]) x = L.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = L.UpSampling2D()(x) x = L.PReLU(shared_axes=[1, 2])(x) x = L.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = L.UpSampling2D()(x) x = L.PReLU(shared_axes=[1, 2])(x) x = L.Conv2D(filters=3, kernel_size=(9, 9), strides=(1, 1), padding='same', activation='relu')(x) gen = Model(inputs=img, outputs=x) gen.compile(loss=mean_squared_error, optimizer=Adam(), metrics=['accuracy']) return gen
def build_model(self): #Input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') #Hidden layers for states h_states = layers.Dense(units=32, activation='relu')(states) h_states = layers.Dense(units=64, activation='relu')(h_states) #Hidden layers for actions h_actions = layers.Dense(units=32, activation='relu')(actions) h_actions = layers.Dense(units=64, activation='relu')(h_actions) #Combine states and actions network = layers.Add()([h_states, h_actions]) network = layers.Activation('relu')(network) #Add batch normlization layer network = layers.normalization.BatchNormalization()(network) #Add output layer to produce action values Q_values = layers.Dense(units=1, name='q_values', kernel_initializer=initializers.random_uniform( minval=-0.0005, maxval=0.0005))(network) #Create Model self.model = models.Model(inputs=[states, actions], outputs=Q_values) #Compile Model self.model.compile(optimizer=optimizers.Adam(lr=0.001), loss='mse') #compute action gradients action_gradients = kb.gradients(Q_values, actions) self.get_action_gradients = kb.function( inputs=[*self.model.input, kb.learning_phase()], outputs=action_gradients)
def layer(input_tensor): # get params and names of layers conv_params = get_conv_params() bn_params = get_bn_params() conv_name, bn_name, relu_name, sc_name = handle_block_names(stage, block) x = layers.BatchNormalization(name=bn_name + '1', **bn_params)(input_tensor) x = layers.Activation('relu', name=relu_name + '1')(x) # defining shortcut connection if cut == 'pre': shortcut = input_tensor elif cut == 'post': shortcut = layers.Conv2D(filters * 4, (1, 1), name=sc_name, strides=strides, **conv_params)(x) else: raise ValueError('Cut type not in ["pre", "post"]') # continue with convolution layers x = layers.Conv2D(filters, (1, 1), name=conv_name + '1', **conv_params)(x) x = layers.BatchNormalization(name=bn_name + '2', **bn_params)(x) x = layers.Activation('relu', name=relu_name + '2')(x) x = layers.ZeroPadding2D(padding=(1, 1))(x) x = layers.Conv2D(filters, (3, 3), strides=strides, name=conv_name + '2', **conv_params)(x) x = layers.BatchNormalization(name=bn_name + '3', **bn_params)(x) x = layers.Activation('relu', name=relu_name + '3')(x) x = layers.Conv2D(filters * 4, (1, 1), name=conv_name + '3', **conv_params)(x) # use attention block if defined if attention is not None: x = attention(x) # add residual connection x = layers.Add()([x, shortcut]) return x
def build_model(self): """ build a critic (value) network that maps (start, action) pairs -> Q-values.""" # Define input layers # Note that actor model is meant to map states to actions, the critic model # needs to map (state,action) pairs to their Q-values states = layers.Input(shape = (self.state_size, ), name="states") actions = layers.Input(shape = (self.action_size,), name="actions") # State and action layers are first be processed via separate "pathways"(mini sub-network), # but eventually need to be combined. net_states = layers.Dense(units=HIDDEN1_UNITS, activation='relu')(states) net_states = layers.Dense(units=HIDDEN2_UNITS, activation='relu')(net_states) # Add hidden layers for action pathway net_actions = layers.Dense(units=HIDDEN1_UNITS, activation = 'relu')(actions) net_actions = layers.Dense(units=HIDDEN2_UNITS, activation = 'relu')(net_actions) # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add final output layer to produce action value (Q value) Q_values = layers.Dense(units=1, name='q_values')(net) # Define optimizer and compile model for traning with build-in loss function optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs = [*self.model.input, K.learning_phase()], outputs = action_gradients )
def res_block(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name='resblock'): # if conv_shortcut is True: shortcut = layers.Conv2D(4 * filters, 1, strides=stride, name=name + '_0_conv')(x) shortcut = layers.BatchNormalization(epsilon=1.001e-5, name=name + '_0_bn')(shortcut) # else: # shortcut = x x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x) x = layers.BatchNormalization(epsilon=1.001e-5, name=name + '_1_bn')(x) x = layers.Activation('relu', name=name + '_1_relu')(x) x = layers.Conv2D(filters, kernel_size, padding='SAME', name=name + '_2_conv')(x) x = layers.BatchNormalization(epsilon=1.001e-5, name=name + '_2_bn')(x) x = layers.Activation('relu', name=name + '_2_relu')(x) x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x) x = layers.BatchNormalization(epsilon=1.001e-5, name=name + '_3_bn')(x) # x = layers.Conv2D(filters, 1, name=name + '_4_conv')(x) # x = layers.BatchNormalization(epsilon=1.001e-5, # name=name + '_4_bn')(x) x = layers.Add(name=name + '_add')([shortcut, x]) x = layers.Activation('relu', name=name + '_out')(x) return x
def ConvLSTM(closeness, period, trend): closeness = ConvLSTMCell(closeness) period = ConvLSTMCell(period) trend = ConvLSTMCell(trend) closeness = layers.Conv2D(64, (1, 1), padding='same', activation='relu', use_bias=True)(closeness) period = layers.Conv2D(64, (1, 1), padding='same', activation='relu', use_bias=True)(period) trend = layers.Conv2D(64, (1, 1), padding='same', activation='relu', use_bias=True)(trend) res = layers.Add()([closeness, period, trend]) res = layers.Conv2D(1, (1, 1), padding='same', activation='relu', use_bias=True)(res) res = layers.Flatten(name='output')(res) return res
def __init__(self, **kwargs): self.kwargs = kwargs self.name = kwargs['name'] self.filters = kwargs['filters'] self.kernel = kwargs['kernel_size'] self.activation = 'linear' if 'activation' in kwargs: self.activation = kwargs['activation'] kwargs['activation'] = 'linear' if isinstance(self.kernel, int): ks = self.kernel self.kernel = (ks, ks, ks) ks = self.kernel kwargs['kernel_size'] = (1, ks[1], ks[2]) kwargs['name'] = self.name + '_x_axis' self.convx = KL.Convolution3D(**kwargs) kwargs['kernel_size'] = (ks[0], 1, ks[2]) kwargs['name'] = self.name + '_y_axis' self.convy = KL.Convolution3D(**kwargs) kwargs['kernel_size'] = (ks[0], ks[1], 1) kwargs['name'] = self.name + '_z_axis' self.convz = KL.Convolution3D(**kwargs) self.addLayer = KL.Add(name=self.name + '_add')
def build_model(user_n, movie_n, latent_dim): print('Building model') user_input = layers.Input(shape=[1]) u_v = layers.Embedding(user_n, latent_dim)(user_input) u_v = layers.Flatten()(u_v) movie_input = layers.Input(shape=[1]) m_v = layers.Embedding(movie_n, latent_dim)(movie_input) m_v = layers.Flatten()(m_v) user_bias = layers.Embedding(user_n, 1)(user_input) user_bias = layers.Flatten()(user_bias) movie_bias = layers.Embedding(movie_n, 1)(movie_input) movie_bias = layers.Flatten()(movie_bias) merge = layers.Dot(axes=1)([u_v, m_v]) result = layers.Add()([merge, user_bias, movie_bias]) result = layers.Dense(1)(result) model = Model(inputs=[user_input, movie_input], outputs=[result]) model.compile(loss='mse', optimizer="adamax", metrics=[rmse]) model.summary() return model
def _bottleneck(x, h_out, n_out, strides=None): n_in = x.get_shape()[-1] # print(n_in, n_out) if strides is None: strides = (1, 1) if n_in == h_out else (2, 2) h = layers.ZeroPadding2D(padding=(1, 1))(x) h = layers.Conv2D(h_out, (3, 3), strides=strides)(h) h = layers.BatchNormalization()(h) h = layers.Activation('relu')(h) h = layers.ZeroPadding2D(padding=(1, 1))(h) h = layers.Conv2D(h_out, (3, 3), strides=(1, 1))(h) h = layers.BatchNormalization()(h) # print('h:', h.get_shape()) if n_in != h_out: # 判断支路output是否需要resize(主路? shortcut = layers.ZeroPadding2D(padding=(1, 1))(x) shortcut = layers.Conv2D(h_out, (3, 3), strides=strides)(shortcut) shortcut = layers.BatchNormalization()(shortcut) else: shortcut = x # h = Lambda(lambda h: h + shortcut)(h) h = layers.Add()([h, shortcut]) # h = merge([h, shortcut], mode="sum") return layers.Activation('relu')(h)
def build_model(self): states = layers.Input(shape=(self.state_size, ), name="states") actions = layers.Input(shape=(self.action_size, ), name="actions") net_states = layers.Dense(units=32, activation="relu")(states) net_states = layers.Dense(units=64, activation="relu")(net_states) net_actions = layers.Dense(32, activation="relu")(actions) net_actions = layers.Dense(64, activation="relu")(net_actions) net = layers.Add()([net_states, net_actions]) net = layers.Activation("relu")(net) q_values = layers.Dense(units=1, name="q_values")(net) self.model = models.Model(inputs=[states, actions], outputs=q_values) optimizer = optimizers.Adam(lr=self.learning_rate) self.model.compile(optimizer=optimizer, loss="mse") action_gradients = K.gradients(q_values, actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def _build_model(): fragment_length = settings.frame_size input_shape = Input(shape=(fragment_length, settings.output_bins), name='input_part') out = input_shape skip_connections = [] out = CausalDilatedConv1D(settings.filters, 2, atrous_rate=1, border_mode='valid', causal=True, name='initial_causal_conv')(out) for s in range(settings.stacks): for i in range(0, settings.dilation_depth + 1): out, skip_out = _build_model_residual_block(out, i, s) skip_connections.append(skip_out) out = layers.Add()(skip_connections) out = layers.Activation('relu')(out) out = layers.Conv1D(settings.output_bins, 1, padding='same', kernel_regularizer=l2(settings.final_l2))(out) out = layers.Activation('relu')(out) out = layers.Conv1D(settings.output_bins, 1, padding='same')(out) out = layers.Activation('softmax', name="output_softmax")(out) model = Model(input_shape, out) # receptive_field, receptive_field_ms = _compute_receptive_field() return model
def identity_block(input_tensor, kernel_size, filters, stage, block, use_bias=True, train_bn=True): """The identity_block is the block that has no conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names use_bias: Boolean. To use or not use a bias in conv layers. train_bn: Boolean. Train or freeze Batch Norm layers """ nb_filter1, nb_filter2, nb_filter3 = filters # conv_name_base = 'res' + str(stage) + block + '_branch' # bn_name_base = 'bn' + str(stage) + block + '_branch' x = KL.Conv2D(nb_filter1, (1, 1), use_bias=use_bias)(input_tensor) x = BatchNorm()(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', use_bias=use_bias)(x) x = BatchNorm()(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), use_bias=use_bias)(x) x = BatchNorm()(x, training=train_bn) x = KL.Add()([x, input_tensor]) x = KL.Activation('relu')(x) return x