def _get_expert_outputs_optimized(input_num_dimensions, last_layer, num_units, output_dim, output_tf_activation, topmost_hidden_states=None): """ Method for obtaining expert outputs in an AME optimised for faster compilation speed. Reduces the number of operations in the TF graph to a number independent of the chosen number of experts by sharing the per-expert operations and looping over their associated weights instead. This method is less flexible than the unoptimised version since it requires that all experts share the same architecture. """ from keras.initializers import he_normal, zeros extra_trainable_weights = [] has_prebuilt_hidden_states = topmost_hidden_states is not None if not has_prebuilt_hidden_states: num_experts = input_num_dimensions w1 = tf.Variable(he_normal()((num_experts, 1, num_units))) b1 = tf.Variable(zeros()((num_experts, num_units))) extra_trainable_weights += [w1, b1] else: num_experts = len(topmost_hidden_states) w2 = tf.Variable(he_normal()((num_experts, num_units, output_dim))) b2 = tf.Variable(zeros()((num_experts, output_dim))) extra_trainable_weights += [w2, b2] if not has_prebuilt_hidden_states: topmost_hidden_states = tf.TensorArray(dtype=tf.float32, size=num_experts, dynamic_size=False) else: topmost_hidden_states = tf.stack(topmost_hidden_states, axis=0) outputs = tf.TensorArray(dtype=tf.float32, size=num_experts, dynamic_size=False) def loop_fun(x): i = tf.constant(0) c = lambda i, ths, o: tf.math.less(i, num_experts) def loop_body(i, topmost_hidden_states, outputs): pdb.set_trace() if has_prebuilt_hidden_states: topmost_hidden_state = topmost_hidden_states[i] else: topmost_hidden_state = tf.nn.selu(tf.matmul(x[:, i:i + 1], w1[i]) + b1[i]) topmost_hidden_states = topmost_hidden_states.write(i, topmost_hidden_state) auxiliary_output = output_tf_activation(tf.matmul(topmost_hidden_state, w2[i]) + b2[i]) outputs = outputs.write(i, auxiliary_output) return i + 1, topmost_hidden_states, outputs pdb.set_trace() _, hidden_states, aux_outputs = tf.while_loop(c, loop_body, [i, topmost_hidden_states, outputs]) #_, hidden_states, aux_outputs = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, loop_body, [i, topmost_hidden_states, outputs])) #_,hidden_states, aux_outputs = tf.compat.v1.while_loop(c, loop_body, [i, topmost_hidden_states, outputs]) return [hidden_states.stack() if not has_prebuilt_hidden_states else topmost_hidden_states, aux_outputs.stack()] topmost_hidden_states, outputs = Lambda(loop_fun, output_shape=lambda _: [(num_experts, None, num_units), (num_experts, None, output_dim)])(last_layer) topmost_hidden_states = Lambda(lambda x: tf.unstack(x, num=num_experts, axis=0))(topmost_hidden_states) outputs = Lambda(lambda x: tf.unstack(x, num=num_experts, axis=0))(outputs) return outputs, topmost_hidden_states, extra_trainable_weights
def build(self, input_shape): self.W_emb = self.add_weight(name="W_emb", shape=(self.input_dim, 2 * self.hidden_dim), initializer=glorot_uniform()) self.b_emb = self.add_weight(name="b_emb", shape=(2 * self.hidden_dim, ), initializer=zeros()) self.W_R = self.add_weight(name="W_R", shape=(2 * self.hidden_dim, 5 * self.hidden_dim), initializer=glorot_uniform()) self.b_R = self.add_weight(name="b_R", shape=(5 * self.hidden_dim, ), initializer=zeros()) self.W_S1 = self.add_weight(name="W_S1", shape=(3 * self.hidden_dim, self.RL_dim), initializer=glorot_uniform()) self.b_S1 = self.add_weight(name="b_S1", shape=(self.RL_dim, ), initializer=zeros()) self.W_S2 = self.add_weight(name="W_S2", shape=(self.RL_dim, 2), initializer=glorot_uniform()) self.b_S2 = self.add_weight(name="b_S2", shape=(2, ), initializer=zeros()) self.built = True
def build(self, input_shape): input_dimension = input_shape[-1] units1 = self.output_dimension * self.number_of_mixtures self.mu_kernel = self.add_weight(name="mu_kernel", shape = shape(input_dimension, units1), initializer=initializers.random_normal(), trainable=True) self.mu_bias = self.add_weight(name="mu_bias", shape = shape(units1), initializer=initializers.zeros(), trainable=True) self.sigma_kernel = self.add_weight(name="sigma_kernel", shape = shape(input_dimension, units1), initializer=initializers.random_normal(), trainable=True) self.sigma_bias = self.add_weight(name="sigma_bias", shape = shape(units1), initializer=initializers.zeros(), trainable=True) units2 = self.number_of_mixtures self.pi_kernel = self.add_weight(name="pi_kernel", shape = shape(input_dimension, units2), initializer=initializers.random_normal(), trainable=True) self.pi_bias = self.add_weight(name="pi_bias", shape = shape(units2), initializer=initializers.zeros(), trainable=True)
def create_model(self, pkeep=0.5, enable_bn=True): inputs = Input(shape=self._input_shape) # Convolution2D(n_filter, w_filter, h_filter, border_mode='same')(inputs) # Activation(activation='relu')() # return BatchNormalization()() conv1 = Conv2D( filters=96, kernel_size=(7, 7), strides=(4, 4), padding="valid", kernel_initializer=random_normal(stddev=0.01), kernel_regularizer=l2(self._weight_decay), bias_initializer=zeros())(inputs) if enable_bn: conv1 = BatchNormalization(axis=self._channel_axis, momentum=0.9997)(conv1) pool1 = MaxPooling2D(pool_size=3, strides=2)(conv1) conv2 = Conv2D( filters=256, kernel_size=(5, 5), strides=(1, 1), padding="same", kernel_initializer=random_normal(stddev=0.01), kernel_regularizer=l2(self._weight_decay), bias_initializer=ones())(pool1) # "One conv at the beginning (spatial size: 32x32)" if enable_bn: conv2 = BatchNormalization(axis=self._channel_axis, momentum=0.9997)(conv2) pool2 = MaxPooling2D(pool_size=3, strides=2)(conv2) conv3 = Conv2D( filters=384, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer=random_normal(stddev=0.01), kernel_regularizer=l2(self._weight_decay), bias_initializer=zeros())(pool2) # "One conv at the beginning (spatial size: 32x32)" if enable_bn: conv3 = BatchNormalization(axis=self._channel_axis, momentum=0.9997)(conv3) pool3 = MaxPooling2D(pool_size=3, strides=2)(conv3) flatten = Flatten()(pool3) full1 = Dense(512, kernel_regularizer=l2(self._weight_decay), bias_initializer=ones(), kernel_initializer=random_normal(stddev=0.005))(flatten) drop1 = Dropout(rate=pkeep)(full1) full2 = Dense(512, kernel_regularizer=l2(self._weight_decay), bias_initializer=ones(), kernel_initializer=random_normal(stddev=0.005))(drop1) drop2 = Dropout(rate=pkeep)(full2) predictions_g = Dense(units=2, kernel_initializer=random_normal(stddev=0.01), bias_initializer=zeros(), name="Gender_Prediction", activation="softmax")(drop2) predictions_a = Dense(units=101, kernel_initializer=random_normal(stddev=0.01), bias_initializer=zeros(), name="Age_Prediction", activation="softmax")(drop2) model = Model(inputs=inputs, outputs=[predictions_g, predictions_a]) return model
def double_thresholding(ghd_layer, inputs): input_shape = inputs.shape.as_list() shape = input_shape[1:] if ghd_layer.per_pixel else (input_shape[-1], ) initializer = initializers.glorot_normal( 807) if ghd_layer.double_threshold else initializers.zeros() rmin = ghd_layer.add_weight(name='rmin', shape=shape, dtype=K.floatx(), initializer=initializer, regularizer=None, trainable=ghd_layer.double_threshold) rmax = ghd_layer.add_weight(name='rmax', shape=shape, dtype=K.floatx(), initializer=initializer, regularizer=None, trainable=ghd_layer.double_threshold) if len(input_shape) == 4: axis = (1, 2) else: axis = (1, ) inputs_rmin = K.min(inputs, axis=axis, keepdims=True) * K.sigmoid(rmin) * 3 inputs_rmax = K.max(inputs, axis=axis, keepdims=True) * K.sigmoid(rmax) * 3 alpha = ghd_layer.alpha hout = (inputs + 0.5) * differentiable_clip(inputs, alpha, inputs_rmin, inputs_rmax) - 0.5 return hout
def build(self, input_shape): self.W_S1 = self.add_weight(name="W_S1", shape=(3 * self.hidden_dim, self.RL_dim), initializer=glorot_uniform()) self.b_S1 = self.add_weight(name="b_S1", shape=(self.RL_dim, ), initializer=zeros()) self.W_S2 = self.add_weight(name="W_S2", shape=(self.RL_dim, 2), initializer=glorot_uniform()) self.b_S2 = self.add_weight(name="b_S2", shape=(2, ), initializer=zeros()) self.trainable_weights = [self.W_S1, self.b_S1, self.W_S2, self.b_S2] self.built = True
def _get_expert_auxiliary_predictions_optimized(output_dim, output_tf_activation, topmost_hidden_states): """ Method for obtaining experts' auxiliary predictions in an AME optimised for faster compilation speed. Like _get_expert_outputs_unoptimized, this routine creates a large number of operations in the TF graph and can therefore be slow to compile. Use _get_expert_auxiliary_predictions_optimized for problems that require more than 100 experts in a single model. """ from keras.initializers import he_normal, zeros num_experts = len(topmost_hidden_states) step_size = K.int_shape(topmost_hidden_states[0])[-1] w3 = tf.Variable(he_normal()((num_experts, step_size * (num_experts - 1), output_dim))) b3 = tf.Variable(zeros()((num_experts, output_dim))) extra_trainable_weights = [w3, b3] def apply_fully_connected(idx, x): return output_tf_activation(tf.matmul(x, w3[idx]) + b3[idx]) def get_all_but_one_auxiliary_outputs(x): all_outputs = tf.concat(x, axis=-1) return [apply_fully_connected( idx, tf.concat((all_outputs[:, :idx * step_size], all_outputs[:, (idx + 1) * step_size:]), axis=-1) ) for idx in range(num_experts)] all_but_one_auxiliary_outputs = Lambda(get_all_but_one_auxiliary_outputs)(topmost_hidden_states) return all_but_one_auxiliary_outputs, extra_trainable_weights
def fit(self, X, y, X_t, y_t): X = self.transfer_shape(X) X_t = self.transfer_shape(X_t) y = keras.utils.to_categorical(y, 2) y_t = keras.utils.to_categorical(y_t, 2) print(X.shape) if len(X_t) % 2==0: X_test, X_val = np.split(X_t, 2) y_test, y_val = np.split(y_t, 2) else: X_test, X_val = np.split(X_t[:-1,:], 2) y_test, y_val = np.split(y_t[:-1], 2) assert(len(X_test) == len(y_test)) """Hyperparameters""" num_filt_1 = self.num_filt_1 #Number of filters in first conv layer num_filt_2 = self.num_filt_2 #Number of filters in second conv layer num_fc_1 = self.num_fc_1 #Number of neurons in hully connected layer initializer = initializers.glorot_uniform(seed=123) self.classifier.add(Conv2D(filters=num_filt_1, kernel_size=[5,1], padding='same', kernel_initializer=initializer, bias_initializer=initializers.zeros(), input_shape=X.shape[1:])) self.classifier.add(Activation('relu')) self.classifier.add(Conv2D(filters=num_filt_2, kernel_size=[4,1], kernel_initializer=initializer, bias_initializer=initializers.zeros(), padding='same')) #self.classifier.add(BatchNormalization()) self.classifier.add(Activation('relu')) self.classifier.add(Flatten()) self.classifier.add(Dense(num_fc_1, kernel_initializer=initializer,bias_initializer=initializer)) self.classifier.add(Activation('relu')) self.classifier.add(Dropout(0.2, seed=123)) self.classifier.add(Dense(2, kernel_initializer=initializer, bias_initializer=initializers.Constant(0.1))) self.classifier.add(Activation('softmax')) #self.classifier.compile(loss='binary_crossentropy', optimizer=Adam(lr=learning_rate),metrics=['accuracy']) opt = Adam() self.classifier.compile(loss='binary_crossentropy', optimizer=opt,metrics=['accuracy']) self.classifier.fit(X, y, verbose=self.verbose,validation_data=(X_t, y_t), batch_size=self.batch_size, nb_epoch=self.nb_epoch, shuffle=False)
def build(self, input_shape): self.style_weight = self.add_weight( "style_weight", shape=(input_shape[1][-1], input_shape[0][-1] * 2), initializer=VarianceScaling(scale=1), trainable=True) self.bias = self.add_weight("style_bias", shape=(input_shape[0][-1] * 2, ), initializer=zeros(), trainable=True)
def decoder(self, params): print('genetic_params:', params) NUM_NET_1 = 1 # for LR NUM_NET_2 = 8 # for the rest network params NUM_TIME = 3 NUM_DELAY_TYPE = 3 NUM_DELAY = 4 # netowr_params BATCH_SIZE = [16, 32, 64, 128] SEQ_LEN = [16, 32, 64, 128] STATE_SIZE = [16, 32, 64, 128] LR = list(np.logspace(-3, -6, 16)) DR = [0.99, 0.98, 0.97, 0.96] PKEEP = [0.9, 0.8, 0.7, 0.6] ACTIVATION = ["relu", "tanh", "sigmoid", "softsign"] INIT = [zeros(), TruncatedNormal(), Orthogonal(), RandomUniform()] net_name = ['lr', 'batch_size', 'seq_len', 'state_size', 'dr', 'pkeep', 'optimizer', 'activation_f', 'initializer'] network_params = {} network_params['lr'] = LR[BitArray(params[0: NUM_NET_1 * 4]).uint] for i in range(NUM_NET_2): name = net_name[i + 1] network_params[name] = BitArray(params[4 + i * 2: 4 + i * 2 + 2]).uint network_params['batch_size'] = BATCH_SIZE[network_params['batch_size']] network_params['seq_len'] = SEQ_LEN[network_params['seq_len']] network_params['state_size'] = STATE_SIZE[network_params['state_size']] network_params['dr'] = DR[network_params['dr']] network_params['pkeep'] = PKEEP[network_params['pkeep']] network_params['activation_f'] = ACTIVATION[network_params['activation_f']] network_params['initializer'] = INIT[network_params['initializer']] # timeseries_params timeseries_params = {} TIME_STEP_DAYS = [7, 14, 30, 60] TIME_STEP_WEEKS = [4, 8, 12, 24] TIME_STEP_MONTHS = [2, 3, 6, 9] TIME_STEP = [TIME_STEP_DAYS, TIME_STEP_WEEKS, TIME_STEP_MONTHS] step_name = ['time_series_step_days', 'time_series_step_weeks', 'time_series_step_months'] for index in range(NUM_TIME): name = step_name[index] step = TIME_STEP[index] timeseries_params[name] = step[BitArray(params[20 + index * 2: 20 + index * 2 + 2]).uint] DELAY = [7, 14, 30, 60, 90, 120, 150, 180] delay_name_days = ['delay_google_days', 'delay_tweeter_days', 'delay_macro_days', 'delay_tweeter_re_days'] delay_name_weeks = ['delay_google_weeks', 'delay_tweeter_weeks', 'delay_macro_weeks', 'delay_tweeter_re_weeks'] delay_name_months = ['delay_google_months', 'delay_tweeter_months', 'delay_macro_months', 'delay_tweeter_re_months'] delay_name = [delay_name_days, delay_name_weeks, delay_name_months] for type in range(NUM_DELAY_TYPE): name_list = delay_name[type] for index in range(NUM_DELAY): name = name_list[index] timeseries_params[name] = DELAY[BitArray(params[26 + index * 3: 26 + index * 3 + 3]).uint] return network_params, timeseries_params
def decoder(self, params): print('genetic_params:', params) NUM_NET_1 = 1 # for LR NUM_NET_2 = 8 # for the rest network params NUM_TIME = 1 NUM_DELAY = 4 # netowr_params BATCH_SIZE = [16, 32, 64, 128] SEQ_LEN = [16, 32, 64, 128] STATE_SIZE = [16, 32, 64, 128] LR = list(np.logspace(-1, -2, 16)) DR = [0.99, 0.98, 0.97, 0.96] PKEEP = [0.9, 0.8, 0.7, 0.6] ACTIVATION = ["relu", "tanh", "sigmoid", "softsign"] INIT = [zeros(), TruncatedNormal(), Orthogonal(), RandomUniform()] net_name = [ 'lr', 'batch_size', 'seq_len', 'state_size', 'dr', 'pkeep', 'optimizer', 'activation_f', 'initializer' ] network_params = {} network_params['lr'] = LR[BitArray(params[0:NUM_NET_1 * 4]).uint] for i in range(NUM_NET_2): name = net_name[i + 1] network_params[name] = BitArray(params[4 + i * 2:4 + i * 2 + 2]).uint network_params['batch_size'] = BATCH_SIZE[network_params['batch_size']] network_params['seq_len'] = SEQ_LEN[network_params['seq_len']] network_params['state_size'] = STATE_SIZE[network_params['state_size']] network_params['dr'] = DR[network_params['dr']] network_params['pkeep'] = PKEEP[network_params['pkeep']] network_params['activation_f'] = ACTIVATION[ network_params['activation_f']] network_params['initializer'] = INIT[network_params['initializer']] # timeseries_params timeseries_params = {} DELAY = [7, 14, 30, 60, 90, 120, 150, 180] TIME_STEP = [10, 20, 30, 40] timeseries_params['time_series_step'] = TIME_STEP[BitArray( params[20:20 + NUM_TIME * 2]).uint] time_name = [ 'delay_google', 'delay_tweeter', 'delay_macro', 'delay_tweeter_re' ] for i in range(NUM_DELAY): name = time_name[i] timeseries_params[name] = DELAY[BitArray(params[22 + i * 3:22 + i * 3 + 3]).uint] return network_params, timeseries_params
def resnet_block(self, input_data, filters_num1, filters_num2, filters_size, stride, block_num, ds=False): x_shortcut = input_data x = Conv2D(filters_num1, (filters_size, filters_size), padding='valid', strides=(2, 2), kernel_initializer=glorot_uniform(seed=27), bias_initializer=zeros())(input_data) x = BatchNormalization()(x) x = Activation('relu')(x) # x = Dropout(0.4)(x) x = Conv2D(filters_num2, (filters_size, filters_size), padding='same', kernel_initializer=glorot_uniform(seed=27), bias_initializer=zeros(), activation=None)(x) x = BatchNormalization()(x) if ds: x_shortcut = Conv2D(1, (filters_size, filters_size), padding='valid', strides=(2, 2), kernel_initializer=glorot_uniform(seed=27), bias_initializer=zeros(), activation=None)(x_shortcut) x = Add()([x, x_shortcut]) x = Activation('relu')(x) x = Dropout(0.25)(x) return x
def reset_classification(self): """ Reinitializes the weights for the classification layer. """ layer = self.model.layers[-1] old_weights = layer.get_weights() if len(old_weights) == 2: weights = glorot_uniform()( old_weights[0].shape).eval(session=K.get_session()) bias = zeros()(old_weights[1].shape).eval(session=K.get_session()) new_weights = [weights, bias] layer.set_weights(new_weights) self.model.compile(optimizer=self.optimizer, loss='mean_squared_error', metrics=['accuracy'])
def build(self, input_shape): self.kernel = self.add_weight(name='kernel', shape=(input_shape[1][2], self.units), initializer=initializers.orthogonal(), regularizer=regularizers.l2(5e-4)) self.bias = self.add_weight(name='bias', shape=(self.units, ), initializer=initializers.zeros()) if self.learn_pqr: self.p = self.add_weight(name='p', shape=(1, ), initializer=initializers.constant(0)) self.q = self.add_weight(name='q', shape=(1, ), initializer=initializers.constant(0)) # self.trainable_weights = [self.p, self.q] super(MyGCN, self).build(input_shape)
def baseline_model(): global num model = Sequential() model.add( Dense(input_dim=83, kernel_initializer=initializers.uniform(seed=0), bias_initializer=initializers.zeros(), activation='relu', units=30)) # model.add(BatchNormalization()) model.add(GaussianNoise(1)) model.add(GaussianDropout(0.3)) # model.add(Dense(20, kernel_initializer='uniform', bias_initializer='uniform', activation='relu')) # model.add(Dropout(0.3)) model.add( Dense(10, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='relu')) model.add(Dropout(0.3)) model.add( Dense(5, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='relu')) model.add(Dropout(0.3)) model.add( Dense(5, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='relu')) model.add(Dropout(0.3)) model.add( Dense(2, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='softmax')) # Compile model model.compile(loss=losses.binary_crossentropy, optimizer='adam', metrics=['accuracy']) return model
def build_resnet(self): seed = 27 inputs = Input(shape=(self.n_input, self.n_input, self.n_channel)) #block1 block1output = self.resnet_block(inputs, 32, 64, 7, 3, 1, True) #block2 block2output = self.resnet_block(block1output, 64, 128, 5, 16, 2, True) #block3 block3output = self.resnet_block(block2output, 128, 256, 3, 16, 3, True) flatten_result = Flatten()(block3output) # x = Dropout(0.3)(flatten_result) outputs = Dense(self.n_classes, activation='softmax', kernel_initializer=glorot_uniform(seed=seed), bias_initializer=zeros())(flatten_result) resnet_model = Model(inputs, outputs) return resnet_model
def decoder(self, params): print('genetic_params:', params) NUM_NET_1 = 1 # for LR NUM_NET_2 = 8 # for the rest network params # netowr_params BATCH_SIZE = [8, 16, 32, 64] SEQ_LEN = [16, 32, 64, 128] STATE_SIZE = [16, 32, 64, 128] LR = list(np.logspace(-3, -6, 16)) DR = [0.99, 0.98, 0.97, 0.96] PKEEP = [0.9, 0.8, 0.7, 0.6] ACTIVATION = ["relu", "tanh", "sigmoid", "softsign"] #INIT = [tf.truncated_normal_initializer(stddev=0.01), tf.random_uniform_initializer(), tf.zeros_initializer(), # tf.orthogonal_initializer()] INIT = [zeros(), TruncatedNormal(), Orthogonal(), RandomUniform()] net_name = [ 'lr', 'batch_size', 'seq_len', 'state_size', 'dr', 'pkeep', 'optimizer', 'activation_f', 'initializer' ] network_params = {} network_params['lr'] = LR[BitArray(params[0:NUM_NET_1 * 4]).uint] for i in range(NUM_NET_2): #what is the NUM_NET_2? name = net_name[i + 1] network_params[name] = BitArray(params[4 + i * 2:4 + i * 2 + 2]).uint network_params['batch_size'] = BATCH_SIZE[network_params['batch_size']] network_params['seq_len'] = SEQ_LEN[network_params['seq_len']] network_params['state_size'] = STATE_SIZE[network_params['state_size']] network_params['dr'] = DR[network_params['dr']] network_params['pkeep'] = PKEEP[network_params['pkeep']] #there seems no optimizer #keras offers different opt: SGD,RMSProp,AdaDelta,Adam here we gonna use these four opt #we can do this: #1 -> SGD 2->RMSProp 3->AdaDelta 4->Adam network_params['activation_f'] = ACTIVATION[ network_params['activation_f']] network_params['initializer'] = INIT[network_params['initializer']] return network_params
), ), ], ) def test_parameters_by_signature(instance, signature_filter, params): assert parameters_by_signature(instance, signature_filter) == params ################################################## # `keras_initializer_to_dict` Scenarios ################################################## @pytest.mark.parametrize( ["initializer", "initializer_dict"], [ #################### Normal Initializers #################### pytest.param(initializers.zeros(), dict(class_name="zeros"), id="zero_0"), pytest.param(initializers.Zeros(), dict(class_name="zeros"), id="zero_1"), pytest.param(initializers.ones(), dict(class_name="ones"), id="one_0"), pytest.param(initializers.Ones(), dict(class_name="ones"), id="one_1"), pytest.param(initializers.constant(), dict(class_name="constant", value=0), id="c_0"), pytest.param(initializers.Constant(5), dict(class_name="constant", value=5), id="c_1"), pytest.param( initializers.RandomNormal(0.1), dict(class_name="random_normal", mean=0.1, stddev=0.05, seed=None), id="rn_0", ), pytest.param( initializers.random_normal(mean=0.2, stddev=0.003, seed=42), dict(class_name="random_normal", mean=0.2, stddev=0.003, seed=42), id="rn_1", ),
#kernel_initializer = initializers.normal(mean=0, stddev=0.5, seed=None) kernel_initializer = initializers.uniform(minval=-0.05, maxval=0.05, seed=None) #kernel_initializer = initializers.truncated_normal(mean=0.0, stddev=0.05, seed=None) #kernel_initializer = initializers.orthogonal(gain=1, seed=None) #kernel_initializer = initializers.identity(gain=1) #kernel_initializer = initializers.he_uniform() #kernel_initializer = initializers.glorot_uniform() #bias_initializer = initializers.normal(mean=0, stddev=0.5, seed=None) #bias_initializer = initializers.uniform(minval=-0.05, maxval=0.05, seed=None) #bias_initializer = initializers.truncated_normal(mean=0.0, stddev=0.05, seed=None) #bias_initializer = initializers.orthogonal(gain=1, seed=None) #bias_initializer = initializers.identity(gain=1) #bias_initializer = initializers.he_uniform() #bias_initializer = initializers.glorot_uniform() bias_initializer = initializers.zeros() #kernel_regularizer = regularizers.l2(l=0.01) kernel_regularizer = None #bias_regularizer = regularizers.l2(l=0.01) bias_regularizer = None #activation = 'elu' #activation = 'hard_sigmoid' #activation = 'linear' activation = 'relu' #activation = 'selu' #activation = 'sigmoid' #activation = 'softmax' #activation = 'softplus'
def build_target_network(): X = Input(shape=SHAPE[1:], dtype='float32') Q, model = Agent.infer(X, trainable=False, init=initializers.zeros()) return model
def fit(self, X, y, X_t, y_t): X = self.transfer_shape(X) X_t = self.transfer_shape(X_t) y = keras.utils.to_categorical(y, 2) y_t = keras.utils.to_categorical(y_t, 2) print(X.shape) if len(X_t) % 2 == 0: X_test, X_val = np.split(X_t, 2) y_test, y_val = np.split(y_t, 2) else: X_test, X_val = np.split(X_t[:-1, :], 2) y_test, y_val = np.split(y_t[:-1], 2) assert (len(X_test) == len(y_test)) """Hyperparameters""" num_filt_1 = self.num_filt_1 #Number of filters in first conv layer num_filt_2 = self.num_filt_2 #Number of filters in second conv layer num_fc_1 = self.num_fc_1 #Number of neurons in hully connected layer initializer = initializers.glorot_uniform(seed=123) self.classifier.add( Conv2D(filters=num_filt_1, kernel_size=[5, 1], padding='same', kernel_initializer=initializer, bias_initializer=initializers.zeros(), input_shape=X.shape[1:])) self.classifier.add(Activation('relu')) self.classifier.add( Conv2D(filters=num_filt_2, kernel_size=[4, 1], kernel_initializer=initializer, bias_initializer=initializers.zeros(), padding='same')) #self.classifier.add(BatchNormalization()) self.classifier.add(Activation('relu')) self.classifier.add(Flatten()) self.classifier.add( Dense(num_fc_1, kernel_initializer=initializer, bias_initializer=initializer)) self.classifier.add(Activation('relu')) self.classifier.add(Dropout(0.2, seed=123)) self.classifier.add( Dense(2, kernel_initializer=initializer, bias_initializer=initializers.Constant(0.1))) self.classifier.add(Activation('softmax')) #self.classifier.compile(loss='binary_crossentropy', optimizer=Adam(lr=learning_rate),metrics=['accuracy']) opt = Adam() self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) self.classifier.fit(X, y, verbose=self.verbose, validation_data=(X_t, y_t), batch_size=self.batch_size, nb_epoch=self.nb_epoch, shuffle=False)
def build(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] batch_size = input_shape[0] if self.stateful else None self.input_dim = input_shape[2] self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim)) self.state_spec = [InputSpec(shape=(batch_size, self.units)), InputSpec(shape=(batch_size, self.units))] self.states = [None, None] if self.stateful: self.reset_states() self.series_num = 4 ## we use 4 series: target, positive, negative, hs300 ## kernels: kernel_f, ## kernel_i, kernel_ip, kernel_in, kernel_ihs ## kernel_c, kernel_cp, kernel_cn, kernel_chs ## kernel_o ## recurrent kernels: recurrent_kernel_f, ## recurrent_kernel_i, recurrent_kernel_ip, recurrent_kernel_in, recurrent_kernel_ihs ## recurrent_kernel_c, recurrent_kernel_cp, recurrent_kernel_cn, recurrent_kernel_chs ## recurrent_kernel_o ## total: 20 kernels ## these are the original kernels for LSTM ## total: 8 kernels self.kernel = self.add_weight((int(self.input_dim / self.series_num), self.units * 4), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( (self.units, self.units * 4), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) ## here we use another variable to specify driving series kernel, since they use different initializer ## kernel will be zero at the beginning ## total: 12 kernels self.kernel_rel = self.add_weight((int(self.input_dim / self.series_num), self.units * 6), name='kernel_related', initializer=initializers.zeros(), regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel_rel = self.add_weight( (self.units, self.units * 6), name='recurrent_kernel_related', initializer=initializers.zeros(), regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) ## attention kernel self.attention_kernel = self.add_weight( (self.units, self.units), name='attention_kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) ## original 4 bias and 6 related bias ## all bias use the same initializer if self.use_bias: self.bias = self.add_weight((self.units * 10,), name='bias', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) if self.unit_forget_bias: bias_value = np.zeros((self.units * 10,)) bias_value[self.units: self.units * 2] = 1. K.set_value(self.bias, bias_value) else: self.bias = None ## original kernels self.kernel_i = self.kernel[:, :self.units] self.kernel_f = self.kernel[:, self.units: self.units * 2] self.kernel_c = self.kernel[:, self.units * 2: self.units * 3] self.kernel_o = self.kernel[:, self.units * 3:] self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units] self.recurrent_kernel_f = self.recurrent_kernel[:, self.units: self.units * 2] self.recurrent_kernel_c = self.recurrent_kernel[:, self.units * 2: self.units * 3] self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:] ## related kernels self.kernel_ip = self.kernel_rel[:, :self.units] self.kernel_in = self.kernel_rel[:, self.units: self.units * 2] self.kernel_ihs = self.kernel_rel[:, self.units * 2: self.units * 3] self.kernel_cp = self.kernel_rel[:, self.units * 3: self.units * 4] self.kernel_cn = self.kernel_rel[:, self.units * 4: self.units * 5] self.kernel_chs = self.kernel_rel[:, self.units * 5:] self.recurrent_kernel_ip = self.recurrent_kernel_rel[:, :self.units] self.recurrent_kernel_in = self.recurrent_kernel_rel[:, self.units: self.units * 2] self.recurrent_kernel_ihs = self.recurrent_kernel_rel[:, self.units * 2: self.units * 3] self.recurrent_kernel_cp = self.recurrent_kernel_rel[:, self.units * 3: self.units * 4] self.recurrent_kernel_cn = self.recurrent_kernel_rel[:, self.units * 4: self.units * 5] self.recurrent_kernel_chs = self.recurrent_kernel_rel[:, self.units * 5:] if self.use_bias: ## original bias self.bias_i = self.bias[:self.units] self.bias_f = self.bias[self.units: self.units * 2] self.bias_c = self.bias[self.units * 2: self.units * 3] self.bias_o = self.bias[self.units * 3: self.units * 4] ## related bias self.bias_ip = self.bias[self.units * 4: self.units * 5] self.bias_in = self.bias[self.units * 5: self.units * 6] self.bias_ihs = self.bias[self.units * 6: self.units * 7] self.bias_cp = self.bias[self.units * 7: self.units * 8] self.bias_cn = self.bias[self.units * 8: self.units * 9] self.bias_chs = self.bias[self.units * 9:] else: self.bias_i = None self.bias_f = None self.bias_c = None self.bias_o = None self.bias_ip = None self.bias_in = None self.bias_ihs = None self.bias_cp = None self.bias_cn = None self.bias_chs = None self.built = True
def __init__(self, F_, attn_heads, attn_heads_reduction, dropout_rate, decay_rate, activation, flag_batch_norm, flag_edge_weights, use_bias=True, **kwargs): ''' Initialize the Layer object given the set of architecture hyperparameters received as input :param F_: Dimensionality of the node features produced : F' in the GAT paper :param attn_heads: Number of attention heads on this layer : K in the GAT paper :param attn_heads_reduction: Reduction of attention heads via average/concatenation :param dropout_rate: Dropout rate of alpha coefficients and input node features :param decay_rate: L2 regularization coefficient :param activation: Activation function of the layer :param flag_batch_norm: Use Batch Normalization on the output of this layer :param flag_edge_weights: Include edge weights into the learning process :param use_bias: Use bias on the node features produced :param kwargs: ''' super(GATLayer, self).__init__(**kwargs) self.F_ = F_ self.attn_heads = attn_heads self.attn_heads_reduction = attn_heads_reduction self.dropout_rate = dropout_rate self.activation = activation self.use_bias = use_bias self.use_batch_norm = flag_batch_norm self.use_ew = flag_edge_weights self.decay_rate = decay_rate # Initializers for each type of parameters used by the layer self.main_kernel_init = initializers.glorot_normal() self.bias_init = initializers.zeros() self.attn_kernel_init = initializers.glorot_normal() # Regularizers for each type of parameters used by the layer self.main_kernel_regular = regularizers.l2(decay_rate) self.bias_regular = regularizers.l2(decay_rate) self.attn_kernel_regular = regularizers.l2(decay_rate) # Layer kernels for each attention head : 'W' weight matrix in the GAT paper self.kernels = [] # Layer biases for each attention head self.biases = [] # Attention kernels for each attention head: 'a' weight vector in the GAT paper self.attn_kernels = [] # Determine the output node features dimension if attn_heads_reduction == 'concat': self.output_dim = self.F_ * self.attn_heads # output shape: (? x N x KF') elif attn_heads_reduction == 'average': # Output will have shape (..., F') self.output_dim = self.F_ # output shape: (? x N x F') else: raise ValueError( 'Possible reduction methods: {concat, average}, not %s' % attn_heads_reduction)
def test_zero(tensor_shape): _runner(initializers.zeros(), tensor_shape, target_mean=0., target_max=0.)
def basic(type, train, test, code, epoch, batch): # Load MNIST train and test data X_train = np.loadtxt(train, delimiter=',', dtype=None) X_test = np.loadtxt(test, delimiter=',', dtype=None) # z_list : define experiment code(Z) size z_list = [code] autoencoder = [[] for i in range(len(z_list))] # E : epoch, BS = batch size E = epoch BS = batch # Train model and save data(code(Z), output and total loss data) model_index = 0 total_summary_loss_data = [ 'model_type', 'z_size', 'train_loss', 'test_loss' ] for z_size in z_list: # Define models INPUT_SIZE = 784 HIDDEN_SIZE = z_size if type == "digit": w_initializer = initializers.truncated_normal(mean=0.0, stddev=0.05, seed=None) b_initializer = initializers.zeros() dense1 = Input(shape=(INPUT_SIZE, )) dense2 = Dense(HIDDEN_SIZE, activation='linear', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense1) dense3 = Dense(INPUT_SIZE, activation='sigmoid', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense2) autoencoder[model_index] = Model(dense1, dense3) adam = optimizers.Adam(lr=0.001) autoencoder[model_index].compile(loss='mean_squared_error', optimizer=adam) autoencoder[model_index].fit(X_train, X_train, epochs=E, batch_size=BS, verbose=0) else: w_initializer = initializers.glorot_uniform(seed=None) b_initializer = initializers.glorot_uniform(seed=None) dense1 = Input(shape=(INPUT_SIZE, )) dense2 = Dense(HIDDEN_SIZE, activation='linear', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense1) dense3 = Dense(INPUT_SIZE, activation='sigmoid', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense2) autoencoder[model_index] = Model(dense1, dense3) adagrad = optimizers.Adagrad(lr=0.01) autoencoder[model_index].compile(loss='mean_squared_error', optimizer=adagrad) autoencoder[model_index].fit(X_train, X_train, epochs=E, batch_size=BS, verbose=0) # Get output and calculate loss get_output = K.function([autoencoder[model_index].layers[0].input], [autoencoder[model_index].layers[2].output]) train_output = get_output([X_train])[0] test_output = get_output([X_test])[0] train_loss = np.sum((X_train - train_output)** 2) / (X_train.shape[0] * X_train.shape[1]) test_loss = np.sum( (X_test - test_output)**2) / (X_test.shape[0] * X_test.shape[1]) summary_loss_data = ['BAE', z_size, train_loss, test_loss] total_summary_loss_data = np.vstack( (total_summary_loss_data, summary_loss_data)) np.savetxt("total_loss.csv", total_summary_loss_data, delimiter=',', fmt='%s') np.savetxt("test_out.csv", test_output, delimiter=',') # Get code(Z) get_z = K.function([autoencoder[model_index].layers[0].input], [autoencoder[model_index].layers[1].output]) test_z = get_z([X_test])[0] np.savetxt("test_code.csv", test_z, delimiter=',') model_index = model_index + 1 # Print total loss print(total_summary_loss_data) print("learning basic autoencoder model finish! \n")
if factor_name: _data_sets[factor_name[:-1]] = (get_data, factor_name[:-1]) data_sets = LazyDict(_data_sets) activations = {LINEAR: linear, TAHN: tanh, RELU: relu} initializers = { LECUN_NORMAL: lecun_normal(), LECUN_UNIFORM: lecun_uniform(), HE_NORMAL: he_normal(), HE_UNIFORM: he_uniform(), GLOROT_NORMAL: glorot_normal(), GLOROT_UNIFORM: glorot_uniform(), ZEROS: zeros() } regularizers = {NONE: None, L1: l1(), L2: l2(), L1_L2: l1_l2()} hidden_layers = { NN3_1: [70], NN3_2: [80], NN3_3: [100], NN3_4: [120], DNN5_1: [100, 50, 10], DNN5_2: [100, 70, 50], DNN5_3: [120, 70, 20], DNN5_4: [120, 80, 40], DNN8_1: [100, 100, 50, 50, 10, 10], DNN8_2: [100, 100, 70, 70, 50, 50],
"n10": 157 } # Model_1 inpt = Input(shape=(param["c0"], param["h0"], param["w0"])) # 1 x = Conv2D(filters=param["n1"], kernel_size=(param["k"], param["k"]), strides=param["cs"], padding="same", activation=None, use_bias=False, kernel_initializer=initializers.random_normal(0.0, 0.01))(inpt) x = BatchNormalization(axis=1, center=True, beta_initializer=initializers.zeros(), scale=True, gamma_initializer=initializers.ones(), epsilon=10**-8, momentum=0.9)(x) x = PReLU(alpha_initializer=initializers.zeros())(x) x = MaxPooling2D(pool_size=(param["k"], param["k"]), strides=param["ps"], padding="same")(x) x.shape # (96, 48, 48) # 2 d2 = d(c=param["n1"], n=param["n2"], k=param["k"], w=int(x.shape[2]), speed=param["speed"])
), ], ) def test_parameters_by_signature(instance, signature_filter, params): assert parameters_by_signature(instance, signature_filter) == params ################################################## # `keras_initializer_to_dict` Scenarios ################################################## @pytest.mark.parametrize( ["initializer", "initializer_dict"], [ #################### Normal Initializers #################### pytest.param( initializers.zeros(), dict(class_name="zeros"), id="zero_0"), pytest.param( initializers.Zeros(), dict(class_name="zeros"), id="zero_1"), pytest.param(initializers.ones(), dict(class_name="ones"), id="one_0"), pytest.param(initializers.Ones(), dict(class_name="ones"), id="one_1"), pytest.param(initializers.constant(), dict(class_name="constant", value=0), id="c_0"), pytest.param(initializers.Constant(5), dict(class_name="constant", value=5), id="c_1"), pytest.param( initializers.RandomNormal(0.1), dict(class_name="random_normal", mean=0.1, stddev=0.05, seed=None), id="rn_0", ),
def build(self, input_shape): self.dlatent_avg = self.add_weight("dlatent_avg", shape=(self.dlatent_size, ), initializer=zeros(), trainable=False)
def build(self, input_shape): self.noise_weight = self.add_weight('noise_weight', shape=[input_shape[-1]], initializer=zeros())