# sys.exit(0) inputs = Input(shape=(96, 2000, 1), batch_size=1, name='input') # Block_01 matmul1_1 = tf.math.multiply(inputs, np.load('weights/data_mul_28262830')) add1_1 = Add()([matmul1_1, np.load('weights/data_add_28272832')]) conv1_1 = Conv2D( filters=64, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], kernel_initializer=Constant( np.load('weights/95_mean_Fused_Mul_30603062_const').transpose( 1, 2, 3, 0)))(add1_1) add1_2 = Add()( [conv1_1, np.load('weights/data_add_28352840').transpose(0, 2, 3, 1)]) relu1_1 = ReLU()(add1_2) # Block_02 conv2_1 = Conv2D( filters=64, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], kernel_initializer=Constant( np.load('weights/98_mean_Fused_Mul_30643066_const').transpose(
def get_Jang_model(PARAMS, fs=16000, Tw=25, n_mels=64, t_dim=5, n_classes=2): ''' Function to build the Mel-scale CNN architecture proposed by Jang et al. [4] Parameters ---------- fs : int, optional Sampling rate. The default is 16000. Tw : int, optional Short-term frame size in miliseconds. The default is 25. n_mels : int, optional Number of mel-filters. The default is 64. t_dim : int, optional Time dimension of the mel-scale kernels. The default is 5. n_classes : int, optional Number of classes. The default is 2. Returns ------- model : tensorflow.keras.models.Model Mel-scale CNN model. ''' n_fft = PARAMS['n_fft'][PARAMS['Model']] M = librosa.filters.mel(fs, n_fft=n_fft, n_mels=n_mels, norm='slaney') # print('M: ', np.shape(M), n_fft, n_mels) filter_bins = np.zeros((np.shape(M)[0], 2)) for i in range(np.shape(M)[0]): filter_bins[i, 0] = np.squeeze(np.where(M[i, :] > 0))[0] filter_bins[i, 1] = np.squeeze(np.where(M[i, :] > 0))[-1] filter_bins = filter_bins.astype(int) n_fft = np.shape(M)[1] # print('M: ', np.shape(M), np.shape(filter_bins), n_fft, n_mels) ''' melCL layer ''' inp_img = Input( PARAMS['input_shape'][PARAMS['Model']]) # Input(shape=(n_fft, 101, 1)) top = filter_bins[0, 0] bottom = n_fft - filter_bins[0, 1] - 1 inp_mel = Cropping2D(cropping=((top, bottom), (0, 0)))(inp_img) kernel_width = filter_bins[0, 1] - filter_bins[0, 0] + 1 kernel_init = get_kernel_initializer(M[0, :], filter_bins[0, :], t_dim) melCl = Conv2D(3, kernel_size=(kernel_width, t_dim), strides=(kernel_width, 1), padding='same', name='melCl0', kernel_initializer=Constant(kernel_init), use_bias=False, kernel_regularizer=l1_l2())(inp_mel) # melCl = Activation('tanh')(melCl) # print('melCl: ', filter_bins[0,:], top, bottom, K.int_shape(inp_mel), K.int_shape(melCl)) # print('melCl: ', filter_bins[0,:], kernel_width) for mel_i in range(1, n_mels): top = filter_bins[mel_i, 0] bottom = n_fft - filter_bins[mel_i, 1] - 1 inp_mel = Cropping2D(cropping=((top, bottom), (0, 0)))(inp_img) kernel_width = filter_bins[mel_i, 1] - filter_bins[mel_i, 0] + 1 kernel_init = get_kernel_initializer(M[mel_i, :], filter_bins[mel_i, :], t_dim) melCl_n = Conv2D(3, kernel_size=(kernel_width, t_dim), strides=(kernel_width, 1), padding='same', name='melCl' + str(mel_i), kernel_initializer=Constant(kernel_init), use_bias=False, kernel_regularizer=l1_l2())(inp_mel) # melCl_n = Activation('tanh')(melCl_n) melCl = Concatenate(axis=1)([melCl, melCl_n]) # print('melCl: ', filter_bins[mel_i,:], top, bottom, K.int_shape(inp_mel), K.int_shape(melCl)) # print('melCl: ', filter_bins[mel_i,:], kernel_width) ''' ~~~~~~~~~~~~~~~~~~~~ ''' # melCl = BatchNormalization(axis=-1)(melCl) melCl = Activation('tanh')(melCl) # melCl = Activation('sigmoid')(melCl) # melCl = Dropout(0.4)(melCl) # print('melCl: ', K.int_shape(melCl)) x = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same')(melCl) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.4)(x) # print('x1: ', K.int_shape(x)) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) # print('x2: ', K.int_shape(x)) x = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.4)(x) # print('x3: ', K.int_shape(x)) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) # print('x4: ', K.int_shape(x)) x = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.4)(x) # print('x5: ', K.int_shape(x)) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) # print('x6: ', K.int_shape(x)) x = Flatten()(x) # print('x7: ', K.int_shape(x)) # Best results obtained without the FC layers # x = Dense(2048)(x) # x = BatchNormalization(axis=-1)(x) # x = Activation('relu')(x) # x = Dropout(0.4)(x) # print('x8: ', K.int_shape(x)) # x = Dense(1024)(x) # x = BatchNormalization(axis=-1)(x) # x = Activation('relu')(x) # x = Dropout(0.4)(x) # print('x9: ', K.int_shape(x)) output = Dense(n_classes, activation='softmax')(x) model = Model(inp_img, output) learning_rate = 0.001 if n_classes == 2: model.compile(loss='binary_crossentropy', optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy']) elif n_classes == 3: model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy']) print(model.summary()) print('Mel-scale CNN architecture proposed by Jang et al.') return model, learning_rate
def build(self): """ build MTNet model :param config: :return: """ training = True if self.mc else None # long-term time series historical data inputs long_input = Input(shape=(self.long_num, self.time_step, self.feature_num)) # short-term time series historical data short_input = Input(shape=(self.time_step, self.feature_num)) # ------- no-linear component---------------- # memory and context : (batch, long_num, last_rnn_size) memory = self.__encoder(long_input, num=self.long_num, name='memory', training=training) # memory = memory_model(long_input) context = self.__encoder(long_input, num=self.long_num, name='context', training=training) # context = context_model(long_input) # query: (batch, 1, last_rnn_size) query_input = Reshape((1, self.time_step, self.feature_num), name='reshape_query')(short_input) query = self.__encoder(query_input, num=1, name='query', training=training) # query = query_model(query_input) # prob = memory * query.T, shape is (long_num, 1) query_t = Permute((2, 1))(query) prob = Lambda(lambda xy: tf.matmul(xy[0], xy[1]))([memory, query_t]) prob = Softmax(axis=-1)(prob) # out is of the same shape of context: (batch, long_num, last_rnn_size) out = multiply([context, prob]) # concat: (batch, long_num + 1, last_rnn_size) pred_x = concatenate([out, query], axis=1) reshaped_pred_x = Reshape((self.last_rnn_size * (self.long_num + 1), ), name="reshape_pred_x")(pred_x) nonlinear_pred = Dense( units=self.output_dim, kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1), )(reshaped_pred_x) # ------------ ar component ------------ if self.ar_window > 0: ar_pred_x = Reshape( (self.ar_window * self.feature_num, ), name="reshape_ar")(short_input[:, -self.ar_window:]) linear_pred = Dense( units=self.output_dim, kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1), )(ar_pred_x) else: linear_pred = 0 y_pred = Add()([nonlinear_pred, linear_pred]) self.model = Model(inputs=[long_input, short_input], outputs=y_pred) # lr decay # def lr_scheduler(epoch, r): # max_lr = 0.03 # min_lr = 0.0001 # lr = min_lr + (max_lr - min_lr) * math.exp(-epoch / 60) # return lr # callbacks = [tf.keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=1)] # initial_lr = 0.003 # rate = math.exp(-1 / 60) # lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( # initial_lr, # decay_steps=249, # decay_rate=rate, # staircase=True # ) # # self.model.compile(loss="mae", # metrics=metrics, # optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule)) self.model.compile(loss="mae", metrics=self.metrics, optimizer=tf.keras.optimizers.Adam(lr=self.lr)) return self.model
def build(self, input_shape): self.a = self.add_weight(name='a', shape=(), initializer=Constant(0), trainable=True) super(PoissonLayer, self).build(input_shape)
num_words = min(MAX_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((num_words, embedding_dim)) for word, i in word_index.items(): if i > MAX_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding( num_words, embedding_dim, embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQ_LENGTH, trainable=False) from avg_auroc import AvgAurocCallback avg_auroc_callback = AvgAurocCallback(x_train, y_train, x_val, y_val) model = tf.keras.Sequential([ embedding_layer, tf.keras.layers.Dropout(rate=0.2), tf.keras.layers.Conv1D(64, 5, activation='relu'), tf.keras.layers.MaxPooling1D(pool_size=4), tf.keras.layers.CuDNNLSTM(64), tf.keras.layers.Dense(6, activation='sigmoid') ])
from tensorflow.keras.datasets import cifar10 import numpy as np import cv2 as cv from tensorflow.keras.utils import to_categorical from tensorflow.keras.layers import Conv2D, MaxPool2D, concatenate from tensorflow.keras.initializers import glorot_uniform, Constant from tensorflow.nn import relu kernel_init, bias_init = glorot_uniform(), Constant(value=0.2) def load_cifar10_data(img_rows, img_cols): (X_train, y_train), (X_test, y_test) = cifar10.load_data() X_train = np.array( [cv.resize(img, (img_rows, img_cols)) for img in X_train[:, :, :, :]]) X_test = np.array( [cv.resize(img, (img_rows, img_cols)) for img in X_test[:, :, :, :]]) X_train = np.array( [cv.resize(img, (img_rows, img_cols)) for img in X_train[:, :, :, :]]) X_test = np.array( [cv.resize(img, (img_rows, img_cols)) for img in X_test[:, :, :, :]]) y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) X_train = X_train / 255.0 X_test = X_test / 255.0 return (X_train, y_train), (X_test, y_test)
# tmp = np.load('weights/depthwise_conv2d_Kernel') # print(tmp.shape) # print(tmp) # def init_f(shape, dtype=None): # ker = np.load('weights/depthwise_conv2d_Kernel') # print(shape) # return ker # sys.exit(0) inputs = Input(shape=(128, 128, 3), name='input') # Block_01 conv1_1 = Conv2D(filters=48, kernel_size=[5, 5], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/conv2d_Kernel').transpose(1,2,3,0)), bias_initializer=Constant(np.load('weights/conv2d_Bias')))(inputs) depthconv1_1 = DepthwiseConv2D(kernel_size=[5, 5], strides=[1, 1], padding="same", depth_multiplier=1, dilation_rate=[1, 1], depthwise_initializer=Constant(np.load('weights/depthwise_conv2d_Kernel')), bias_initializer=Constant(np.load('weights/depthwise_conv2d_Bias')))(conv1_1) conv1_2 = Conv2D(filters=48, kernel_size=[1, 1], strides=[1, 1], padding="valid", dilation_rate=[1, 1], kernel_initializer=Constant(np.load('weights/conv2d_1_Kernel').transpose(1,2,3,0)), bias_initializer=Constant(np.load('weights/conv2d_1_Bias')))(depthconv1_1) add1_1 = Add()([conv1_1, conv1_2]) relu1_1 = ReLU()(add1_1) # Block_02 depthconv2_1 = DepthwiseConv2D(kernel_size=[5, 5], strides=[1, 1], padding="same", depth_multiplier=1, dilation_rate=[1, 1], depthwise_initializer=Constant(np.load('weights/depthwise_conv2d_1_Kernel')), bias_initializer=Constant(np.load('weights/depthwise_conv2d_1_Bias')))(relu1_1) conv2_1 = Conv2D(filters=48, kernel_size=[1, 1], strides=[1, 1], padding="valid", dilation_rate=[1, 1],
def __init__( self, units, order, theta, # relative to dt=1 method="zoh", realizer=Identity(), # TODO: Deprecate? factory=LegendreDelay, # TODO: Deprecate? memory_to_memory=True, hidden_to_memory=True, hidden_to_hidden=True, trainable_input_encoders=True, trainable_hidden_encoders=True, trainable_memory_encoders=True, trainable_input_kernel=True, trainable_hidden_kernel=True, trainable_memory_kernel=True, trainable_A=False, trainable_B=False, input_encoders_initializer="lecun_uniform", hidden_encoders_initializer="lecun_uniform", memory_encoders_initializer=Constant(0), # 'lecun_uniform', input_kernel_initializer="glorot_normal", hidden_kernel_initializer="glorot_normal", memory_kernel_initializer="glorot_normal", hidden_activation="tanh", return_sequences=False, **kwargs): # Note: Setting memory_to_memory, hidden_to_memory, and hidden_to_hidden to # False doesn't actually remove the connections, but only initializes the # weights to be zero and non-trainable (when using the LMUCell). # This behaviour may change pending a future API decision. self.units = units self.order = order self.theta = theta self.method = method self.realizer = realizer self.factory = factory self.memory_to_memory = memory_to_memory self.hidden_to_memory = hidden_to_memory self.hidden_to_hidden = hidden_to_hidden self.trainable_input_encoders = trainable_input_encoders self.trainable_hidden_encoders = (trainable_hidden_encoders if hidden_to_memory else False) self.trainable_memory_encoders = (trainable_memory_encoders if memory_to_memory else False) self.trainable_input_kernel = trainable_input_kernel self.trainable_hidden_kernel = (trainable_hidden_kernel if hidden_to_hidden else False) self.trainable_memory_kernel = trainable_memory_kernel self.trainable_A = trainable_A self.trainable_B = trainable_B self.input_encoders_initializer = input_encoders_initializer self.hidden_encoders_initializer = (hidden_encoders_initializer if hidden_to_memory else Constant(0)) self.memory_encoders_initializer = (memory_encoders_initializer if memory_to_memory else Constant(0)) self.input_kernel_initializer = input_kernel_initializer self.hidden_kernel_initializer = (hidden_kernel_initializer if hidden_to_hidden else Constant(0)) self.memory_kernel_initializer = memory_kernel_initializer self.hidden_activation = hidden_activation self.return_sequences = return_sequences super().__init__(**kwargs) if self.fft_check(): self.lmu_layer = LMUCellFFT( units=self.units, order=self.order, theta=self.theta, trainable_input_encoders=self.trainable_input_encoders, trainable_input_kernel=self.trainable_input_kernel, trainable_memory_kernel=self.trainable_memory_kernel, input_encoders_initializer=self.input_encoders_initializer, input_kernel_initializer=self.input_kernel_initializer, memory_kernel_initializer=self.memory_kernel_initializer, hidden_activation=self.hidden_activation, return_sequences=self.return_sequences, ) else: self.lmu_layer = RNN( LMUCell( units=self.units, order=self.order, theta=self.theta, method=self.method, realizer=self.realizer, factory=self.factory, trainable_input_encoders=self.trainable_input_encoders, trainable_hidden_encoders=self.trainable_hidden_encoders, trainable_memory_encoders=self.trainable_memory_encoders, trainable_input_kernel=self.trainable_input_kernel, trainable_hidden_kernel=self.trainable_hidden_kernel, trainable_memory_kernel=self.trainable_memory_kernel, trainable_A=self.trainable_A, trainable_B=self.trainable_B, input_encoders_initializer=self.input_encoders_initializer, hidden_encoders_initializer=self. hidden_encoders_initializer, memory_encoders_initializer=self. memory_encoders_initializer, input_kernel_initializer=self.input_kernel_initializer, hidden_kernel_initializer=self.hidden_kernel_initializer, memory_kernel_initializer=self.memory_kernel_initializer, hidden_activation=self.hidden_activation, ), return_sequences=self.return_sequences, )
def __init__(self, output_classes=1000, fcn=False, upsampling=False, alpha=1, imagenet_filepath=None, model_filepath=None): super().__init__() self.name = "VGG16" self.output_classes = output_classes self.fcn = fcn self.upsampling = upsampling self.alpha = alpha weight_value_tuples = [] if fcn: xavier_weight_filler = 'glorot_uniform' zeros_weight_filler = 'zeros' fc_bias_weight_filler = 'zeros' else: xavier_weight_filler = glorot_normal() zeros_weight_filler = Zeros() fc_bias_weight_filler = Constant(value=0.1) if fcn and imagenet_filepath: weights_of_pretrained_model = h5py.File(imagenet_filepath, mode='r') if 'layer_names' not in weights_of_pretrained_model.attrs and 'model_weights' in weights_of_pretrained_model: weights_of_pretrained_model = weights_of_pretrained_model[ 'model_weights'] layer_names = [ encoded_layer_name.decode('utf8') for encoded_layer_name in weights_of_pretrained_model.attrs['layer_names'] ] filtered_layer_names_owning_weights = [] for layer_name in layer_names: weights = weights_of_pretrained_model[layer_name] weight_names = [ encoded_layer_name.decode('utf8') for encoded_layer_name in weights.attrs['weight_names'] ] if len(weight_names): filtered_layer_names_owning_weights.append(layer_name) layer_names = filtered_layer_names_owning_weights for i, layer_name in enumerate(layer_names): weights = weights_of_pretrained_model[layer_name] weight_names = [ encoded_layer_name.decode('utf8') for encoded_layer_name in weights.attrs['weight_names'] ] weight_values = [ weights[weight_name] for weight_name in weight_names ] weight_values[0] = np.asarray(weight_values[0], dtype=np.float32) if len(weight_values[0].shape) == 4: weight_values[0] = weight_values[0] if alpha == 1: weight_values[0] = weight_values[0].transpose( 3, 2, 1, 0 ) # todo just because model with alpha 1 was trained using theano backend weight_value_tuples.append(weight_values) weightFC0W = np.asarray(weight_value_tuples[13][0], dtype=np.float32) weightFC0b = np.asarray(weight_value_tuples[13][1], dtype=np.float32) weightFC0W = weightFC0W.reshape( (7, 7, int(512 * alpha), int(4096 * alpha))) weight_value_tuples[13] = [weightFC0W, weightFC0b] weightFC1W = np.asarray(weight_value_tuples[14][0], dtype=np.float32) weightFC1b = np.asarray(weight_value_tuples[14][1], dtype=np.float32) weightFC1W = weightFC1W.reshape( (1, 1, int(4096 * alpha), int(4096 * alpha))) weight_value_tuples[14] = [weightFC1W, weightFC1b] rgb_input = Input(shape=(None, None, 3), name="rgb_input") # input_shape = (1024,2048) conv1_1 = Conv2D(int(64 * alpha), (3, 3), activation='relu', name="conv1_1", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[0] if len(weight_value_tuples) > 0 else None, trainable=False, padding='same')(rgb_input) conv1_2 = Conv2D(int(64 * alpha), (3, 3), activation='relu', name="conv1_2", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[1] if len(weight_value_tuples) > 0 else None, trainable=False, padding='same')(conv1_1) pool1 = MaxPooling2D((2, 2), strides=(2, 2), name="pool1")(conv1_2) # shape = (512,1024) conv2_1 = Conv2D(int(128 * alpha), (3, 3), activation='relu', name="conv2_1", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[2] if len(weight_value_tuples) > 0 else None, padding='same')(pool1) conv2_2 = Conv2D(int(128 * alpha), (3, 3), activation='relu', name="conv2_2", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[3] if len(weight_value_tuples) > 0 else None, padding='same')(conv2_1) pool2 = MaxPooling2D((2, 2), strides=(2, 2), name="pool2")(conv2_2) # shape = (256,512) conv3_1 = Conv2D(int(256 * alpha), (3, 3), activation='relu', name="conv3_1", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[4] if len(weight_value_tuples) > 0 else None, padding='same')(pool2) conv3_2 = Conv2D(int(256 * alpha), (3, 3), activation='relu', name="conv3_2", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[5] if len(weight_value_tuples) > 0 else None, padding='same')(conv3_1) conv3_3 = Conv2D(int(256 * alpha), (3, 3), activation='relu', name="conv3_3", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[6] if len(weight_value_tuples) > 0 else None, padding='same')(conv3_2) pool3 = MaxPooling2D((2, 2), strides=(2, 2), name="pool3")(conv3_3) # shape = (128,256) conv4_1 = Conv2D(int(512 * alpha), (3, 3), activation='relu', name="conv4_1", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[7] if len(weight_value_tuples) > 0 else None, padding='same')(pool3) conv4_2 = Conv2D(int(512 * alpha), (3, 3), activation='relu', name="conv4_2", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[8] if len(weight_value_tuples) > 0 else None, padding='same')(conv4_1) conv4_3 = Conv2D(int(512 * alpha), (3, 3), activation='relu', name="conv4_3", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[9] if len(weight_value_tuples) > 0 else None, padding='same')(conv4_2) pool4 = MaxPooling2D((2, 2), strides=(2, 2), name="pool4")(conv4_3) # shape = (64,128) conv5_1 = Conv2D(int(512 * alpha), (3, 3), activation='relu', name="conv5_1", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[10] if len(weight_value_tuples) > 0 else None, padding='same')(pool4) conv5_2 = Conv2D(int(512 * alpha), (3, 3), activation='relu', name="conv5_2", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[11] if len(weight_value_tuples) > 0 else None, padding='same')(conv5_1) conv5_3 = Conv2D(int(512 * alpha), (3, 3), activation='relu', name="conv5_3", bias_initializer=zeros_weight_filler, kernel_initializer=xavier_weight_filler, weights=weight_value_tuples[12] if len(weight_value_tuples) > 0 else None, padding='same')(conv5_2) pool5 = MaxPooling2D((2, 2), strides=(2, 2), name="pool5")(conv5_3) # shape = (32,64) if fcn: # Semseg Path fc6 = Conv2D(int(4096 * alpha), (7, 7), activation='relu', weights=weight_value_tuples[13] if len(weight_value_tuples) > 0 else None, name="fc6", padding='same')(pool5) fc6 = Dropout(0.5)(fc6) fc7 = Conv2D(int(4096 * alpha), (1, 1), activation='relu', weights=weight_value_tuples[14] if len(weight_value_tuples) > 0 else None, name="fc7")(fc6) fc7 = Dropout(0.5)(fc7) score_fr = Conv2D(output_classes, (1, 1), activation='relu', name="score_fr")(fc7) score_pool4 = Conv2D(output_classes, (1, 1), activation='relu', name="score_pool4")(pool4) score_pool3 = Conv2D(output_classes, (1, 1), activation='relu', name="score_pool3")(pool3) upsampling1 = UpSampling2D(size=(2, 2), interpolation='bilinear')(score_fr) fuse_pool4 = add([upsampling1, score_pool4]) # shape = (64,128) upsampling2 = UpSampling2D(size=(2, 2), interpolation='bilinear')(fuse_pool4) fuse_pool3 = add([upsampling2, score_pool3]) # shape = (128,256) if upsampling: # upsampling3 = UpSampling2DBilinear(size=(8, 8))(fuse_pool3) # or upsampling3 = UpSampling2D( size=(2, 2), interpolation='bilinear')(fuse_pool3) upsampling3 = UpSampling2D( size=(2, 2), interpolation='bilinear')(upsampling3) upsampling3 = UpSampling2D( size=(2, 2), interpolation='bilinear')(upsampling3) # shape = (1024,2048) output_layer = upsampling3 else: output_layer = fuse_pool3 # shape = (128,256) output = Softmax4D(axis=3, name="softmax_output")(output_layer) else: # Univariate Classification Path (Imagenet Pretraining) pool5 = Flatten()(pool5) fc6 = Dense(num_filters, activation='relu', name="fc6", bias_initializer=fc_bias_weight_filler, kernel_initializer=xavier_weight_filler)(pool5) fc6 = Dropout(0.5)(fc6) fc7 = Dense(num_filters, activation='relu', name="fc7", bias_initializer=fc_bias_weight_filler, kernel_initializer=xavier_weight_filler)(fc6) fc7 = Dropout(0.5)(fc7) output = Dense(output_classes, activation='softmax_output', name="scoring", bias_initializer=fc_bias_weight_filler, kernel_initializer=xavier_weight_filler)(fc7) self.model = Model(inputs=rgb_input, outputs=output) if model_filepath: self.model.load_weights(model_filepath)
def __init__( self, units, order, theta, # relative to dt=1 method="zoh", realizer=Identity(), factory=LegendreDelay, trainable_input_encoders=True, trainable_hidden_encoders=True, trainable_memory_encoders=True, trainable_input_kernel=True, trainable_hidden_kernel=True, trainable_memory_kernel=True, trainable_forget_input_kernel=False, trainable_forget_hidden_kernel=False, trainable_forget_bias=False, trainable_A=False, trainable_B=False, input_encoders_initializer="lecun_uniform", hidden_encoders_initializer="lecun_uniform", memory_encoders_initializer=Constant(0), # 'lecun_uniform', input_kernel_initializer="glorot_normal", hidden_kernel_initializer="glorot_normal", memory_kernel_initializer="glorot_normal", forget_input_kernel_initializer=Constant(1), forget_hidden_kernel_initializer=Constant(1), forget_bias_initializer=Constant(0), hidden_activation="tanh", input_activation="linear", gate_activation="linear", **kwargs): super().__init__(**kwargs) self.units = units self.order = order self.theta = theta self.method = method self.realizer = realizer self.factory = factory self.trainable_input_encoders = trainable_input_encoders self.trainable_hidden_encoders = trainable_hidden_encoders self.trainable_memory_encoders = trainable_memory_encoders self.trainable_input_kernel = trainable_input_kernel self.trainable_hidden_kernel = trainable_hidden_kernel self.trainable_memory_kernel = trainable_memory_kernel self.trainable_forget_input_kernel = (trainable_forget_input_kernel, ) self.trainable_forget_hidden_kernel = trainable_forget_hidden_kernel self.trainable_forget_bias = trainable_forget_bias self.trainable_A = trainable_A self.trainable_B = trainable_B self.input_encoders_initializer = initializers.get( input_encoders_initializer) self.hidden_encoders_initializer = initializers.get( hidden_encoders_initializer) self.memory_encoders_initializer = initializers.get( memory_encoders_initializer) self.input_kernel_initializer = initializers.get( input_kernel_initializer) self.hidden_kernel_initializer = initializers.get( hidden_kernel_initializer) self.memory_kernel_initializer = initializers.get( memory_kernel_initializer) self.forget_input_kernel_initializer = initializers.get( forget_input_kernel_initializer) self.forget_hidden_kernel_initializer = initializers.get( forget_hidden_kernel_initializer) self.forget_bias_initializer = initializers.get( forget_bias_initializer) self.hidden_activation = activations.get(hidden_activation) self.input_activation = activations.get(input_activation) self.gate_activation = activations.get(gate_activation) self._realizer_result = realizer(factory(theta=theta, order=self.order)) self._ss = cont2discrete(self._realizer_result.realization, dt=1.0, method=method) self._A = self._ss.A - np.eye(order) # puts into form: x += Ax self._B = self._ss.B self._C = self._ss.C assert np.allclose(self._ss.D, 0) # proper LTI # assert self._C.shape == (1, self.order) # C_full = np.zeros((self.units, self.order, self.units)) # for i in range(self.units): # C_full[i, :, i] = self._C[0] # decoder_initializer = Constant( # C_full.reshape(self.units*self.order, self.units)) # TODO: would it be better to absorb B into the encoders and then # initialize it appropriately? trainable encoders+B essentially # does this in a low-rank way # if the realizer is CCF then we get the following two constraints # that could be useful for efficiency # assert np.allclose(self._ss.B[1:], 0) # CCF # assert np.allclose(self._ss.B[0], self.order**2) self.state_size = (self.units, self.order) self.output_size = self.units
def build(self, input_shape): """ Initializes various network parameters. """ input_dim = input_shape[-1] # TODO: add regularizers self.input_encoders = self.add_weight( name="input_encoders", shape=(input_dim, 1), initializer=self.input_encoders_initializer, trainable=self.trainable_input_encoders, ) self.hidden_encoders = self.add_weight( name="hidden_encoders", shape=(self.units, 1), initializer=self.hidden_encoders_initializer, trainable=self.trainable_hidden_encoders, ) self.memory_encoders = self.add_weight( name="memory_encoders", shape=(self.order, 1), initializer=self.memory_encoders_initializer, trainable=self.trainable_memory_encoders, ) self.input_kernel = self.add_weight( name="input_kernel", shape=(input_dim, self.units), initializer=self.input_kernel_initializer, trainable=self.trainable_input_kernel, ) self.hidden_kernel = self.add_weight( name="hidden_kernel", shape=(self.units, self.units), initializer=self.hidden_kernel_initializer, trainable=self.trainable_hidden_kernel, ) self.memory_kernel = self.add_weight( name="memory_kernel", shape=(self.order, self.units), initializer=self.memory_kernel_initializer, trainable=self.trainable_memory_kernel, ) self.forget_input_kernel = self.add_weight( name="forget_input_kernel", shape=(input_dim, self.order), initializer=self.forget_input_kernel_initializer, trainable=self.trainable_forget_input_kernel, ) self.forget_hidden_kernel = self.add_weight( name="forget_hidden_kernel", shape=(self.units, self.order), initializer=self.forget_input_kernel_initializer, trainable=self.trainable_forget_input_kernel, ) self.forget_bias = self.add_weight( name="forget_bias", shape=(1, self.order), initializer=self.forget_bias_initializer, trainable=self.trainable_forget_bias, ) self.AT = self.add_weight( name="AT", shape=(self.order, self.order), initializer=Constant(self._A.T), # note: transposed trainable=self.trainable_A, ) self.BT = self.add_weight( name="BT", shape=(1, self.order), # system is SISO initializer=Constant(self._B.T), # note: transposed trainable=self.trainable_B, ) self.built = True
def __init__( self, units, order, theta=100, # relative to dt=1 method="euler", return_states=False, realizer=Identity(), factory=LegendreDelay, trainable_encoders=True, trainable_decoders=True, trainable_dt=False, trainable_A=False, trainable_B=False, encoder_initializer=InputScaled(1.0), # TODO decoder_initializer=None, # TODO hidden_activation="linear", # TODO output_activation="tanh", # TODO **kwargs): super().__init__(**kwargs) self.units = units self.order = order self.theta = theta self.method = method self.return_states = return_states self.realizer = realizer self.factory = factory self.trainable_encoders = trainable_encoders self.trainable_decoders = trainable_decoders self.trainable_dt = trainable_dt self.trainable_A = trainable_A self.trainable_B = trainable_B self._realizer_result = realizer(factory(theta=theta, order=self.order)) self._ss = self._realizer_result.realization self._A = self._ss.A self._B = self._ss.B self._C = self._ss.C assert np.allclose(self._ss.D, 0) # proper LTI self.encoder_initializer = initializers.get(encoder_initializer) self.dt_initializer = initializers.get(Constant(1.0)) if decoder_initializer is None: assert self._C.shape == (1, self.order) C_full = np.zeros((self.units, self.order, self.units)) for i in range(self.units): C_full[i, :, i] = self._C[0] decoder_initializer = Constant( C_full.reshape(self.units * self.order, self.units)) self.decoder_initializer = initializers.get(decoder_initializer) self.hidden_activation = activations.get(hidden_activation) self.output_activation = activations.get(output_activation) # TODO: would it be better to absorb B into the encoders and then # initialize it appropriately? trainable encoders+B essentially # does this in a low-rank way # if the realizer is CCF then we get the following two constraints # that could be useful for efficiency # assert np.allclose(self._ss.B[1:], 0) # CCF # assert np.allclose(self._ss.B[0], self.order**2) if not (self.trainable_dt or self.trainable_A or self.trainable_B): # This is a hack to speed up parts of the computational graph # that are static. This is not a general solution. ss = cont2discrete(self._ss, dt=1.0, method=self.method) AT = K.variable(ss.A.T) B = K.variable(ss.B.T[None, ...]) self._solver = lambda: (AT, B) elif self.method == "euler": self._solver = self._euler elif self.method == "zoh": self._solver = self._zoh else: raise NotImplementedError("Unknown method='%s'" % self.method) self.state_size = self.units * self.order # flattened self.output_size = self.state_size if return_states else self.units
# return ker # sys.exit(0) height = 192 width = 640 ds = 'kitti' inputs = Input(shape=(height, width, 3), batch_size=1, name='input') # Block_01 mul1_1 = tf.math.multiply(inputs, np.load('weights/{}_{}x{}/FP32/data_mul_13016_copy_const.npy'.format(ds, height, width)).transpose(0,2,3,1)) add1_1 = Add()([mul1_1, np.load('weights/{}_{}x{}/FP32/data_add_13018_copy_const.npy'.format(ds, height, width)).flatten()]) conv1_1 = Conv2D(filters=64, kernel_size=[7, 7], strides=[2, 2], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/490_mean_Fused_Mul_1412714129_const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/data_add_1302113026_copy_const.npy'.format(ds, height, width)).flatten()))(add1_1) # Block_02 maxpool2_1 = tf.nn.max_pool(conv1_1, ksize=[3, 3], strides=[2, 2], padding='SAME') conv2_1 = Conv2D(filters=64, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/494_mean_Fused_Mul_1413114133_const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/data_add_1302913034_copy_const.npy'.format(ds, height, width)).flatten()))(maxpool2_1) conv2_2 = Conv2D(filters=64, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/497_mean_Fused_Mul_1413514137_const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/data_add_1303713042_copy_const.npy'.format(ds, height, width)).flatten()))(conv2_1) add2_1 = Add()([conv2_2, maxpool2_1]) relu2_1 = ReLU()(add2_1) # Block_03 conv3_1 = Conv2D(filters=64, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu',
def __init__(self, patch_size=28, num_instances=2, num_classes=2, learning_rate=1e-4, num_bins=None, num_features=10, batch_size=None): self._lr_rate=learning_rate self._num_features = num_features kernel_kwargs = { 'kernel_initializer': glorot_uniform(), 'kernel_regularizer': None } patch_input = Input((28, 28, 1)) x0 = Conv2D(16, (3, 3), padding='same', bias_initializer=Constant(value=0.1), **kernel_kwargs)(patch_input) x0 = self.wide_residual_blocks(x0, 2, 1, 16) x0 = self.wide_residual_blocks(x0, 4, 1, 16, True) x0 = self.wide_residual_blocks(x0, 8, 1, 16, True) x0 = Activation('relu')(x0) x0 = Flatten()(x0) print('flatten shape:{}'.format(K.int_shape(x0))) patch_output = Dense(self._num_features, activation='sigmoid', use_bias=False, name='fc_sigmoid', **kernel_kwargs)(x0) self._patch_model = Model(inputs=patch_input, outputs=patch_output) feature_input = Input((self._num_features,)) x1 = Dense(7*7*128, use_bias=True, bias_initializer=Constant(value=0.1), **kernel_kwargs)(feature_input) x1 = Reshape((7,7,128))(x1) x1 = self.wide_residual_blocks_reverse(x1, 8, 1, 16, True) x1 = self.wide_residual_blocks_reverse(x1, 4, 1, 16, True) x1 = self.wide_residual_blocks_reverse(x1, 2, 1, 16) x1 = Activation('relu')(x1) reconstructed = Conv2D(1, (3, 3), padding='same', bias_initializer=Constant(value=0.1), **kernel_kwargs)(x1) print('reconstructed shape:{}'.format(K.int_shape(reconstructed))) self._image_generation_model = Model(inputs=feature_input, outputs=reconstructed) ae_output = self._image_generation_model(patch_output) self._autoencoder_model = Model(inputs=patch_input, outputs=ae_output) input_list = list() output_list = list() ae_output_list = list() for i in range(num_instances): temp_input = Input(shape=(patch_size, patch_size, 1)) temp_output = self._patch_model(temp_input) temp_output = Reshape((1,-1))(temp_output) # print('temp_output shape:{}'.format(K.int_shape(temp_output))) temp_ae_output = self._autoencoder_model(temp_input) temp_ae_output = Reshape((1,patch_size, patch_size, 1))(temp_ae_output) input_list.append(temp_input) output_list.append(temp_output) ae_output_list.append(temp_ae_output) concatenated = layers.concatenate(output_list,axis=1) print('concatenated shape:{}'.format(K.int_shape(concatenated))) ae_concatenated = layers.concatenate(ae_output_list,axis=1) print('ae_concatenated shape:{}'.format(K.int_shape(ae_concatenated))) y = layers.Lambda(self.kde, arguments={'num_nodes':num_bins,'sigma':0.1,'batch_size':batch_size, 'num_features':self._num_features})(concatenated) print('y shape:{}'.format(K.int_shape(y))) y1 = Dense(384, activation='relu', name='fc_relu1', **kernel_kwargs)(y) y1 = Dense(192, activation='relu', name='fc_relu2', **kernel_kwargs)(y1) out = Dense(num_classes, activation='softmax', name='fc_softmax', **kernel_kwargs)(y1) self._classification_model = Model(inputs=input_list, outputs=[out,ae_concatenated]) self._ucc_model = Model(inputs=input_list, outputs=out) optimizer=Adam(lr=learning_rate) self._classification_model.compile(optimizer=optimizer, loss=['categorical_crossentropy','mse'], metrics=['accuracy'], loss_weights=[0.5, 0.5]) self._distribution_model = Model(inputs=input_list, outputs=y) self._features_model = Model(inputs=input_list, outputs=concatenated)
embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=VALIDATION_SPLIT) from tensorflow.keras.preprocessing import sequence from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM lstm_out = 200 batch_size = 64 model = Sequential() model.add(Embedding(num_words,EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False)) model.add(LSTM(units=lstm_out, activation='relu', dropout=0.5, recurrent_dropout=0.2)) # model.add(LSTM(units=lstm_out, activation='relu', # dropout=0.5, recurrent_dropout=0.2)) model.add(Dense(NUMBER_DIFFERENT_OUTPUTS, activation='softmax')) model.compile( loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy']) print(model.summary()) # experiment = Experiment(api_key="PqrK4iPuQntpHwzb6SvJuXbdh", project_name="COMP 551", workspace="mattesko") # experiment.add_tag('LSTM-SST2') # experiment.log_dataset_info('SST2')
x_test = x_test.reshape(test_size, num_features) # Save Path dir_path = os.path.abspath("C:/Users/Jan/Dropbox/_Programmieren/UdemyTF/models/") if not os.path.exists(dir_path): os.mkdir(dir_path) mnist_model_path = os.path.join(dir_path, "mnist_model.h5") # Log Dir log_dir = os.path.abspath("C:/Users/Jan/Dropbox/_Programmieren/UdemyTF/logs/") if not os.path.exists(log_dir): os.mkdir(log_dir) model_log_dir = os.path.join(log_dir, str(time.time())) # Model params init_w = TruncatedNormal(mean=0.0, stddev=0.01) init_b = Constant(value=0.0) lr = 0.001 optimizer = Adam(lr=lr) epochs = 10 batch_size = 256 # Define the DNN model = Sequential() model.add( Dense( units=500, kernel_initializer=init_w, bias_initializer=init_b, input_shape=(num_features,), )
def dcnn_resnet(model_config, input_shape, metrics, n_classes=2, output_bias=None): ''' Defines a deep convolutional neural network model for multiclass X-ray classification. :param model_config: A dictionary of parameters associated with the model architecture :param input_shape: The shape of the model input :param metrics: Metrics to track model's performance :return: a Keras Sequential model object with the architecture defined in this method ''' # Set hyperparameters nodes_dense0 = model_config['NODES_DENSE0'] lr = model_config['LR'] dropout = model_config['DROPOUT'] l2_lambda = model_config['L2_LAMBDA'] if model_config['OPTIMIZER'] == 'adam': optimizer = Adam(learning_rate=lr) elif model_config['OPTIMIZER'] == 'sgd': optimizer = SGD(learning_rate=lr) else: optimizer = Adam(learning_rate=lr) # For now, Adam is default option init_filters = model_config['INIT_FILTERS'] filter_exp_base = model_config['FILTER_EXP_BASE'] conv_blocks = model_config['CONV_BLOCKS'] kernel_size = eval(model_config['KERNEL_SIZE']) max_pool_size = eval(model_config['MAXPOOL_SIZE']) strides = eval(model_config['STRIDES']) # Set output bias if output_bias is not None: output_bias = Constant(output_bias) print("MODEL CONFIG: ", model_config) # Input layer X_input = Input(input_shape) X = X_input # Add convolutional (residual) blocks for i in range(conv_blocks): X_res = X X = Conv2D(init_filters * (filter_exp_base**i), kernel_size, strides=strides, padding='same', kernel_initializer='he_uniform', activity_regularizer=l2(l2_lambda))(X) X = BatchNormalization()(X) X = LeakyReLU()(X) X = Conv2D(init_filters * (filter_exp_base**i), kernel_size, strides=strides, padding='same', kernel_initializer='he_uniform', activity_regularizer=l2(l2_lambda))(X) X = concatenate([X, X_res]) X = BatchNormalization()(X) X = LeakyReLU()(X) X = MaxPool2D(max_pool_size, padding='same')(X) # Add fully connected layers X = Flatten()(X) X = Dropout(dropout)(X) X = Dense(nodes_dense0, kernel_initializer='he_uniform', activity_regularizer=l2(l2_lambda))(X) X = LeakyReLU()(X) Y = Dense(n_classes, activation='softmax', bias_initializer=output_bias, name='output')(X) # Set model loss function, optimizer, metrics. model = Model(inputs=X_input, outputs=Y) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=metrics) model.summary() return model
MAX_EPOCHS = 500 STOPPATIENCE = 50 VERBOSE = 0 VERBOSE_EARLY = 1 y_pred,history,model = one_NAX_iteration(dataset_NAX, BATCH_SIZE = BATCH_SIZE, EPOCHS = MAX_EPOCHS, REG_PARAM = REG_PARAM, ACT_FUN = ACT_FUN, LEARN_RATE = LEARN_RATE, HIDDEN_NEURONS=HIDDEN_NEURONS , STOPPATIENCE = STOPPATIENCE, VERBOSE= VERBOSE, VERBOSE_EARLY = VERBOSE_EARLY, LOSS_FUNCTION = my_loss, OUT_KERNEL = Constant(out_kernel ), # weights are initialized from last iteration (or from hyperparameters' grid) OUT_BIAS = Constant(out_bias ), HID_KERNEL = Constant(hid_kernel ), HID_BIAS = Constant(hid_bias ), HID_REC = Constant(hid_rec) ) # Weights of calibrated network are saved for following iteration hid_weights = model.layers[0].get_weights() hid_kernel = hid_weights[0] hid_bias = hid_weights[-1] hid_rec = hid_weights[1] out_weights = model.layers[1].get_weights() out_kernel = out_weights[0] out_bias = out_weights[1]
input_points = Input(shape=(num_points, 9)) x = Convolution1D(64, 1, activation='relu', input_shape=(num_points, 9))(input_points) x = BatchNormalization()(x) x = Convolution1D(128, 1, activation='relu')(x) x = BatchNormalization()(x) x = Convolution1D(1024, 1, activation='relu')(x) x = BatchNormalization()(x) x = MaxPooling1D(pool_size=num_points)(x) x = Dense(512, activation='relu')(x) x = BatchNormalization()(x) x = Dense(256, activation='relu')(x) x = BatchNormalization()(x) x = Dense(9 * 9, kernel_initializer='zeros', bias_initializer=Constant(np.eye(9).flatten()), activity_regularizer=None)(x) input_T = Reshape((9, 9))(x) # forward net g = Lambda(mat_mul)((input_points, input_T)) g = Convolution1D(64, 1, input_shape=(num_points, 9), activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(64, 1, input_shape=(num_points, 9), activation='relu')(g) g = BatchNormalization()(g) # feature transformation net f = Convolution1D(64, 1, activation='relu')(g) f = BatchNormalization()(f) f = Convolution1D(128, 1, activation='relu')(f) f = BatchNormalization()(f)
def run(task, input_concat, model_type, i): ''' task: string binary: Dataset with sentiment of postive and negative. multiclass: Dataset with sentiment of positive, neutral, and negative. input_concat: boolean True: The input is concatenated. False: The input is seperated. model_type: string CNN BiLSTM Transformer ''' ### Dataset selection if task == 'multiclass': # dataset for multiclass df = pd.read_csv('data/geo_microblog.csv') else: # dataset for binary df = pd.read_csv('data/geo_microblog.csv') df = df[df.sentiment != 1] df.sentiment.replace(2, 1, inplace=True) # neutral ### Text processing # prepare tokenizer t = Tokenizer() t.fit_on_texts(df['text']) vocab_size = len(t.word_index) + 1 # integer encode the documents encoded_docs = t.texts_to_sequences(df['text']) txtlen = 30 loclen = 0 # pad documents to a max length padded_docs = pad_sequences(encoded_docs, txtlen, padding='post') ### Location processing # location = df['geonames'].apply(lambda x: pd.Series([i for i in x.split(',') if i not in [""]]).value_counts()) # location = location.reindex(sorted(location.columns), axis=1).fillna(0) # location = location.values # np.save('microblog_multiclass_location.npy', location) if task == 'binary': location = np.load('variable/microblog_binary_location.npy') else: location = np.load('variable/microblog_multiclass_location.npy') loclen = location.shape[1] ### Prepare train and test set # merge txt and loc merge = np.concatenate((padded_docs, location),axis=1) # divide dataset to train and test set x_train, x_test, y_train, y_test = train_test_split(merge, df['sentiment'], test_size=0.3, random_state=100) if input_concat == False: # split train set to text and location x_train1 = x_train[:,:txtlen] x_train2 = x_train[:,-loclen:] # # split test set to text and location x_test1 = x_test[:,:txtlen] x_test2 = x_test[:,-loclen:] ### Pretrained word embedding # load the whole embedding into memory #embeddings_index = dict() #f = open('../glove.twitter.27B.200d.txt') #for line in f: # values = line.split() # word = values[0] # coefs = asarray(values[1:], dtype='float32') # embeddings_index[word] = coefs #f.close() #print('Loaded %s word vectors.' % len(embeddings_index)) # create a weight matrix for words in training docs vector_dimension = 200 #embedding_matrix = zeros((vocab_size, vector_dimension)) #for word, i in t.word_index.items(): # embedding_vector = embeddings_index.get(word) # if embedding_vector is not None: # embedding_matrix[i] = embedding_vector if task == 'binary': embedding_matrix = np.load('variable/microblog_binary_embedding_matrix.npy') else: embedding_matrix = np.load('variable/microblog_multiclass_embedding_matrix.npy') ### Deep Learning model if input_concat == True: input_dimension = txtlen+loclen inputs = Input(shape=(input_dimension,)) embedding_layer = Embedding(vocab_size, vector_dimension, embeddings_initializer=Constant(embedding_matrix), input_length=input_dimension)(inputs) else: inputText = Input(shape=(txtlen,)) x = Embedding(vocab_size, vector_dimension, embeddings_initializer=Constant(embedding_matrix), input_length=txtlen)(inputText) inputLocation = Input(shape=(loclen,)) y = Embedding(vocab_size, vector_dimension, embeddings_initializer=RandomNormal(), input_length=loclen)(inputLocation) embedding_layer = concatenate([x, y], axis=1) if model_type == "CNN": # CNN convolution_first = Convolution1D(filters=100, kernel_size=5, activation='relu')(embedding_layer) convolution_second = Convolution1D(filters=100, kernel_size=4, activation='relu')(convolution_first) convolution_third = Convolution1D(filters=100, kernel_size=3, activation='relu')(convolution_second) pooling_max = MaxPooling1D(pool_size=2)(convolution_third) flatten_layer = Flatten()(pooling_max) dense = Dense(20, activation="relu")(flatten_layer) if task == 'binary': outputs = Dense(units=1, activation='sigmoid')(dense) else: outputs = Dense(units=3, activation='softmax')(dense) if model_type == "BiLSTM": ### BiLSTM lstm_first = Bidirectional(LSTM(units=100))(embedding_layer) dense = Dense(20, activation="relu")(lstm_first) if task == 'binary': outputs = Dense(1, activation='sigmoid')(dense) else: outputs = Dense(3, activation='softmax')(dense) if model_type == "Transformer": ### Transformer num_heads = 2 # Number of attention heads ff_dim = 32 # Hidden layer size in feed forward network inside transformer if input_concat == True: embedding_layer_weighted = TokenAndPositionEmbedding(input_dimension, vocab_size, vector_dimension, Constant(embedding_matrix)) x = embedding_layer_weighted(inputs) else: embedding_layer_weighted = TokenAndPositionEmbedding(txtlen, vocab_size, vector_dimension, Constant(embedding_matrix)) x = embedding_layer_weighted(inputText) embedding_layer = TokenAndPositionEmbedding(loclen, vocab_size, vector_dimension, RandomNormal()) y = embedding_layer(inputLocation) x = concatenate([x, y], axis=1) transformer_block = TransformerBlock(vector_dimension, num_heads, ff_dim) x = transformer_block(x) x = GlobalAveragePooling1D()(x) x = Dense(20, activation="relu")(x) if task == 'binary': outputs = Dense(1, activation='sigmoid')(x) else: outputs = Dense(3, activation='softmax')(x) # build model if input_concat == True: model = Model(inputs, outputs) else: model = Model(inputs=[inputText, inputLocation], outputs=outputs) # compile model if task == 'binary': model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) else: model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) if input_concat == True: # save model model.load_weights(f"saved/{task}/2_loc/concat/{model_type}/{task}_location_True_concat_{model_type}_{i}.h5") # evaluate model loss, accuracy = model.evaluate(x_test, y_test, verbose=1) else: # save model model.load_weights(f"saved/{task}/2_loc/both/{model_type}/{task}_location_True_both_{model_type}_{i}.h5") # evaluate model loss, accuracy = model.evaluate([x_test1, x_test2], y_test, verbose=1) return loss, accuracy
import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose, LeakyReLU, BatchNormalization from tensorflow.keras.layers import Activation, add, multiply, Lambda from tensorflow.keras.layers import AveragePooling2D, average, UpSampling2D, Dropout from tensorflow.keras.initializers import VarianceScaling, Constant from losses import tversky_loss # initializers kinit = VarianceScaling(scale=1.0, mode='fan_in', distribution='normal', seed=None) bias_init = Constant(value=0.1) def expend_as(tensor, rep, name): my_repeat = tensor for i in range(rep-1): my_repeat = concatenate([my_repeat, tensor]) # my_repeat = Lambda(lambda x, repnum: repeat_elements(x, repnum, axis=3), arguments={'repnum': rep}, # name='psi_up' + name)(tensor) return my_repeat def AttnGatingBlock(x, g, inter_shape, name): ''' take g which is the spatially smaller signal, do a conv to get the same number of feature channels as x (bigger spatially) do a conv on x to also get same geature channels (theta_x) then, upsample g to be same size as x add x and g (concat_xg) relu, 1x1 conv, then sigmoid then upsample the final - this gives us attn coefficients'''
def __init__(self, noutput, use_snapshot=True): k_init = he_normal() b_init = Constant(0.5) k_reg = l2(1e-4) self.layers = [ ## conv block 1 Conv2D(64, 3, strides=1, input_shape=(224, 224, 1), kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), MaxPooling2D(2), ## conv block 2 Conv2D(128, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), MaxPooling2D(2), ## conv block 3 Conv2D(256, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), Conv2D(256, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), MaxPooling2D(2), ## conv block 4 Conv2D(512, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), Conv2D(512, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), MaxPooling2D(2), ## conv block 5 Conv2D(512, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), Conv2D(512, 3, strides=1, kernel_initializer=k_init, bias_initializer=b_init, kernel_regularizer=k_reg, activation='relu'), MaxPooling2D(2), ## FC layers Flatten(), # Dropout(0.5), Dense(2048, kernel_regularizer=k_reg, activation='relu'), # Dropout(0.5), Dense(2048, kernel_regularizer=k_reg, activation='relu'), # Dropout(0.5), ## output Dense(noutput), Activation('softplus'), Lambda(lambda x: x / K.sum(x, axis=-1, keepdims=True)), ] self.callbacks = [ ModelCheckpoint("snapshots/model.{epoch:04d}.hdf5", monitor='val_loss'), TensorBoard(log_dir="./logs") ] snapshots_dir = join(getcwd(), 'snapshots') if not isdir(snapshots_dir): mkdir(snapshots_dir) self.snapshots = sorted(glob(snapshots_dir + "/model.*.hdf5")) self.use_snapshot = use_snapshot self.initial_epoch = 0 if self.use_snapshot and len(self.snapshots) > 0: print("Loading model from snapshot '%s'" % self.snapshots[-1]) self.model = tensorflow.keras.models.load_model(self.snapshots[-1]) self.initial_epoch = int(self.snapshots[-1].split(".")[-2]) + 1 print("Last epoch was %d" % self.initial_epoch) else: print("No snapshot found, creating model") self.model = tfkeras.models.Sequential(self.layers) self.optimizer = tfkeras.optimizers.Adam(lr=0.0001) self.model.compile(optimizer=self.optimizer, loss='kullback_leibler_divergence') #, print("Layer output shapes:") for layer in self.model.layers: print(layer.output_shape) print("%0.2E parameters" % self.model.count_params())
def contextual_lstm_model_50(embed_mat, embed_size = 30000, embed_dim = 50, max_length = 40, optimizer = "Adam"): """ Defines LSTM model structure: using Dimension 50 word vectors """ # = output_sequence_length tweet_input = Input(shape=(max_length,), dtype = 'int64', name = "tweet_input") # New embedding layer: with masking embed_layer = Embedding(input_dim = embed_size, output_dim = embed_dim, embeddings_initializer=Constant(embed_mat), input_length = max_length, trainable = False, mask_zero = True)(tweet_input) lstm_layer = LSTM(embed_dim)(embed_layer) auxiliary_output = Dense(1, activation="sigmoid", name = "auxiliary_output")(lstm_layer) # side output, won't contribute to the contextual LSTM, just so we can see what the LSTM is doing/have predicted metadata_input = Input(shape = (5,), name = "metadata_input") # there are 5 pieces of metadata new_input = Concatenate(axis=-1)([cast(lstm_layer, "float32"), cast(metadata_input, "float32") ]) hidden_layer_1 = Dense(128, activation = "relu")(new_input) hidden_layer_2 = Dense(128, activation = "relu")(hidden_layer_1) final_output = Dense(1, activation = "sigmoid", name = "final_output")(hidden_layer_2) # compiling the model: model = Model(inputs = [tweet_input, metadata_input], outputs = [final_output, auxiliary_output]) model.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["acc"], loss_weights = [0.8, 0.2]) model.summary() return model
def main(): # load dataset and split labels for validation and classification tse = load_tse() labels_cv, labels_pr = split_labels_tse(tse) # load features for validation and classification features_cv, features_pr = load_features() # # split random # split = random.sample(range(0, len(labels_cv)), 1000) # features_cv, labels_cv = features_cv[split], labels_cv.iloc[split,] # split up features and labels so that we have two train and test sets X_train, X_test, y_train, y_test = train_test_split(features_cv, labels_cv, test_size=0.20, random_state=42) # load word embeddings for validation and classification embedding_matrix = load_embedding_layer() # input shape is the vocabulary (term) count vocab_size = len(embedding_matrix) # define parameters for embedding layer kwargs = { 'input_dim': vocab_size, 'output_dim': 300, 'trainable': False, 'embeddings_initializer': Constant(embedding_matrix) } # oversample train data so that classes are balanced training models sm = SMOTE() X_train, y_train = sm.fit_sample(X_train, y_train) # check shape print('rows, features: {}'.format(X_train.shape)) # create layers and hidden units model = keras.Sequential([ keras.layers.Embedding(**kwargs), keras.layers.GlobalMaxPooling1D(), keras.layers.Dense(100, activation=tf.nn.relu), keras.layers.Dense(50, activation=tf.nn.relu), keras.layers.Dense(1, activation=tf.nn.sigmoid) ]) # check model specifications model.summary() # compile metadata: definition of loss function, optimizer, and # metrics to evaluate results model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # fit model history = model.fit(x=X_train, y=y_train, batch_size=256, epochs=150, validation_data=(X_test, y_test)) # create graph history_dict = history.history # total epochs try: epochs = range(1, len(history_dict['accuracy']) + 1) except: epochs = range(1, len(history_dict['acc']) + 1) # save results to disk history_dict['epochs'] = epochs history_dict = pd.DataFrame(history_dict) history_dict.to_csv('data/validation_performance_dnn.csv', index=False) # save model to disk model.save('data/dnn_model.h5')
def get_Papakostas_model(PARAMS, n_classes=2): ''' CNN architecture proposed by Papakostas et al. [2] Parameters ---------- PARAMS : dict Contains various parameters. n_classes : int, optional Number of classes. Default is 2. Returns ------- model : tensorflow.keras.models.Model Cascaded MTL CNN model. learning_rate : float Initial learning rate. ''' input_img = Input(PARAMS['input_shape'][PARAMS['Model']]) x = Conv2D(96, input_shape=PARAMS['input_shape'][PARAMS['Model']], kernel_size=(5, 5), strides=(2, 2), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(input_img) x = Lambda(lambda norm_lyr: LRN( norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Conv2D(384, kernel_size=(3, 3), strides=(2, 2), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = Lambda(lambda norm_lyr: LRN( norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Conv2D(512, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1), padding='same')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Flatten()(x) x = Dense(4096, kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.5)(x) x = Dense(4096, kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.5)(x) output = Dense(n_classes, activation='softmax', kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) model = Model(input_img, output) initial_learning_rate = 0.001 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=700, decay_rate=0.1) optimizer = optimizers.SGD(learning_rate=lr_schedule) if n_classes == 2: model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) elif n_classes == 3: model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) print( 'Architecture proposed by Papakostas et al. Expert Systems with Applications 2018\n' ) return model, initial_learning_rate
def create_fc_crashing_model(Ns, weights, biases, p_fail, KLips=1, func='sigmoid', reg_spec={}, do_print=True, loss=keras.losses.mean_squared_error, optimizer=None, do_compile=True): """ Create a simple network with given dropout prob, weights and Lipschitz coefficient for sigmoid Ns: array of shapes: [input, hidden1, hidden2, ..., output] weights: array with matrices. The shape must be [hidden1 x input, hidden2 x hidden1, ..., output x hiddenLast] biases: array with vectors. The shape must be [hidden1, hidden2, ..., output] p_fail: array with p_fail for [input, hidden1, ..., output]. Must be the same size as Ns. Both for inference and training KLips: the Lipschitz coefficient func: The acivation function. Currently 'relu' and 'sigmoid' are supported. Note that the last layer is linear to agree with the When Neurons Fail article reg_spec: dictionary of regularizers """ # default optimizer if not optimizer: optimizer = keras.optimizers.Adam() # input sanity check assert isinstance(Ns, list), "Ns must be a list" assert_equal(len(Ns), len(p_fail), "Shape array length", "p_fail array length") assert_equal(len(Ns), len(weights) + 1, "Shape array length", "Weights array length + 1") assert_equal(len(biases), len(weights), "Biases array length", "Weights array length") assert func in [ 'relu', 'sigmoid' ], "Activation %s must be either relu or sigmoid" % str(func) # assert reg_type in [None, 'l1', 'l2', 'balanced', 'continuous'], "Regularization %s must be either l1, l2 or None" % str(reg_type) assert isinstance(reg_spec, dict), "Please supply a dictionary for regularizers" assert isinstance(KLips, Number), "KLips %s must be a number" % str(KLips) # assert isinstance(reg_coeff, Number) or isinstance(reg_coeff, list), "reg_coeff %s must be a number" % str(reg_coeff) # creating model model = Sequential() # loop over shapes for i in range(len(Ns)): # is the first layer (with input)? is_input = (i == 0) # is the last layer (with output)? is_output = (i == len(Ns) - 1) # probability of failure for this shape p = p_fail[i] # current shape N_current = Ns[i] # previous shape or None N_prev = Ns[i - 1] if i > 0 else None # adding a dense layer if have previous shape (otherwise it's input) if not is_input: # deciding the type of regularizer regularizer_w, regularizer_b = get_regularizer_wb( reg_spec, is_input=is_input, is_output=is_output, layer=i) # deciding the activation function activation = 'linear' if is_output else get_custom_activation( KLips, func) # extracting weights and biases w = weights[i - 1] b = biases[i - 1] assert_equal(w.shape, (N_current, N_prev), "Weight matrix %d/%d shape" % (i, len(Ns) - 1), "Ns array entries") assert_equal(b.shape, (N_current, ), "Biases vector %d/%d shape" % (i, len(Ns) - 1), "Ns array entry") # adding a Dense layer model.add( Dense(N_current, input_shape=(N_prev, ), kernel_initializer=Constant(w.T), activation=activation, bias_initializer=Constant(b), kernel_regularizer=regularizer_w, bias_regularizer=regularizer_b)) # adding dropout if needed if p > 0: model.add(IndependentCrashes(p, input_shape=(N_current, ))) # parameters for compilation, layer = 1 only (where the crashes are # obtaining total regularizers and adding it reg_loss = get_regularizer_total(reg_spec, model, layer=1) loss_reg_add = lambda y_true, y_pred: loss(y_true, y_pred) + reg_loss parameters = { 'loss': loss_reg_add, 'optimizer': optimizer, 'metrics': [ keras.metrics.categorical_accuracy, 'mean_squared_error', 'mean_absolute_error' ] } # if compilation requested, doing it if do_compile: # compiling the model model.compile(**parameters) # printing the summary if do_print: model.summary() # returning Keras model return model # otherwise returning the parameters for compilation else: return model, parameters
def run(task, model_type, i): ''' task: string binary: Dataset with sentiment of postive and negative. multiclass: Dataset with sentiment of positive, neutral, and negative. model_type: int CNN Bi-LSTM Transformer ''' ### Dataset selection if task == 'multiclass': # dataset for multiclass df = pd.read_csv('data/geo_microblog.csv') else: # dataset for binary df = pd.read_csv('data/geo_microblog.csv') df = df[df.sentiment != 1] df.sentiment.replace(2, 1, inplace=True) # neutral ### Text processing # prepare tokenizer t = Tokenizer() t.fit_on_texts(df['text']) vocab_size = len(t.word_index) + 1 # integer encode the documents encoded_docs = t.texts_to_sequences(df['text']) txtlen = 30 loclen = 0 # pad documents to a max length padded_docs = pad_sequences(encoded_docs, txtlen, padding='post') ### Prepare train and test set x_train, x_test, y_train, y_test = train_test_split(padded_docs, df['sentiment'], test_size=0.3, random_state=100) ### Pretrained word embedding # load the whole embedding into memory #embeddings_index = dict() #f = open('../glove.twitter.27B.200d.txt') #for line in f: # values = line.split() # word = values[0] # coefs = asarray(values[1:], dtype='float32') # embeddings_index[word] = coefs #f.close() #print('Loaded %s word vectors.' % len(embeddings_index)) # create a weight matrix for words in training docs vector_dimension = 200 #embedding_matrix = zeros((vocab_size, vector_dimension)) #for word, i in t.word_index.items(): # embedding_vector = embeddings_index.get(word) # if embedding_vector is not None: # embedding_matrix[i] = embedding_vector if task == 'binary': embedding_matrix = np.load('variable/microblog_binary_embedding_matrix.npy') else: embedding_matrix = np.load('variable/microblog_multiclass_embedding_matrix.npy') ### Deep Learning model input_dimension = txtlen+loclen inputs = Input(shape=(input_dimension,)) embedding_layer = Embedding(vocab_size, vector_dimension, embeddings_initializer=Constant(embedding_matrix), input_length=input_dimension)(inputs) if model_type == "CNN": # CNN convolution_first = Convolution1D(filters=100, kernel_size=5, activation='relu')(embedding_layer) convolution_second = Convolution1D(filters=100, kernel_size=4, activation='relu')(convolution_first) convolution_third = Convolution1D(filters=100, kernel_size=3, activation='relu')(convolution_second) pooling_max = MaxPooling1D(pool_size=2)(convolution_third) flatten_layer = Flatten()(pooling_max) dense = Dense(20, activation="relu")(flatten_layer) if task == 'binary': outputs = Dense(units=1, activation='sigmoid')(dense) else: outputs = Dense(units=3, activation='softmax')(dense) if model_type == "RNN": ### BiLSTM rnn_first = Bidirectional(SimpleRNN(units=100))(embedding_layer) dense = Dense(20, activation="relu")(rnn_first) if task == 'binary': outputs = Dense(1, activation='sigmoid')(dense) else: outputs = Dense(3, activation='softmax')(dense) if model_type == "BiLSTM": ### BiLSTM lstm_first = Bidirectional(LSTM(units=100))(embedding_layer) dense = Dense(20, activation="relu")(lstm_first) if task == 'binary': outputs = Dense(1, activation='sigmoid')(dense) else: outputs = Dense(3, activation='softmax')(dense) if model_type == "Transformer": ### Transformer num_heads = 2 # Number of attention heads ff_dim = 32 # Hidden layer size in feed forward network inside transformer embedding_layer_weighted = TokenAndPositionEmbedding(input_dimension, vocab_size, vector_dimension, Constant(embedding_matrix)) x = embedding_layer_weighted(inputs) transformer_block = TransformerBlock(vector_dimension, num_heads, ff_dim) x = transformer_block(x) x = GlobalAveragePooling1D()(x) x = Dense(20, activation="relu")(x) if task == 'binary': outputs = Dense(1, activation='sigmoid')(x) else: outputs = Dense(3, activation='softmax')(x) # build model model = Model(inputs, outputs) # compile model if task == 'binary': model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) else: model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) # early stopping early_stopping_monitor = EarlyStopping(patience=3) #patience: epochs the model can go without improving before we stop training # save model model.load_weights(f"saved/{task}/1_text_only/{model_type}/{task}_location_False_{model_type}_{i}.h5") # evaluate model loss, accuracy = model.evaluate(x_test, y_test, verbose=1) return loss, accuracy
def create_optpresso_model(input_shape: List) -> Sequential: model = Sequential() model.add(InputLayer(input_shape=input_shape)) model.add(SubtractMeanLayer(mean=MEAN_IMG_VALUES)) model.add(Rescaling(1.0 / 255)) model.add(RandomFlip()) model.add(RandomRotation(1)) model.add(Convolution2D( 32, (5, 5), padding="same", )) model.add(BatchNormalization()) model.add(Activation("relu")) # model.add(SpatialDropout2D(0.3)) model.add(Convolution2D( 48, (5, 5), strides=(2, 2), padding="same", )) # model.add(BatchNormalization()) # model.add(SpatialDropout2D(0.3)) model.add(Activation("relu")) model.add(Convolution2D( 48, (5, 5), strides=(2, 2), padding="same", )) # model.add(SpatialDropout2D(0.1)) model.add(Activation("relu")) model.add(Convolution2D( 64, (3, 3), strides=(2, 2), padding="same", )) # model.add(SpatialDropout2D(0.1)) model.add(Activation("relu")) model.add(Convolution2D( 64, (3, 3), strides=(2, 2), padding="same", )) model.add(Activation("relu")) model.add(Convolution2D( 128, (3, 3), strides=(2, 2), padding="same", )) # model.add(SpatialDropout2D(0.15)) model.add(Activation("relu")) model.add(Convolution2D( 128, (3, 3), strides=(2, 2), padding="same", )) model.add(Flatten()) model.add(Activation("relu")) model.add(Dense(128)) model.add(Dropout(0.5)) model.add(Activation("relu")) model.add(Dense(96)) model.add(Dropout(0.5)) model.add(Activation("relu")) model.add(Dense(64)) model.add(Dropout(0.5)) model.add(Activation("relu")) model.add(Dense(1, bias_initializer=Constant(MEAN_PULL_TIME))) return model
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator from tensorflow.python.keras.models import Sequential, Model, Input from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense, concatenate from tensorflow.python.keras import backend as K from tensorflow.python.keras.applications.inception_v3 import InceptionV3 import matplotlib.pyplot as plt import numpy as np import tensorflow_model_optimization.sparsity.keras as sparsity import numpy as np from tensorflow.keras.initializers import Constant class_bias = Constant( np.array([-3.353, -2.917, -2.108, -4.502, -2.07, 0.703, -4.196])) def fire_module(x, fire_id, squeeze=16, expand=64): sq1x1 = "squeeze1x1" exp1x1 = "expand1x1" exp3x3 = "expand3x3" relu = "relu_" s_id = 'fire' + str(fire_id) + '/' if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = Conv2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x) x = Activation('relu', name=s_id + relu + sq1x1)(x)
def defineModel(self): verbose = False latentDim = self.latentDim #define encoder model encoder_inputLayer_s = ActorCritic_general.generateActionInputLayer( self.actionSpace) if len(encoder_inputLayer_s) > 1: encoder_inputLayer = concatenate(encoder_inputLayer_s, name='concattenated_input') else: encoder_inputLayer = encoder_inputLayer_s[0] #encoder_intLayer = Dense(latentDim*4,activation='relu')(encoder_inputLayers) encoder_intLayer_last = Dense(latentDim * 2, activation='relu', name='encoding')(encoder_inputLayer) encoder_meanLayer = Dense(latentDim, activation='relu', name='latent_means')(encoder_intLayer_last) encoder_logVarianceLayer = Dense( latentDim, activation='relu', bias_initializer=Constant(value=0), name='latent_log_variance')(encoder_intLayer_last) #encoder_outputLayer = Dense(latentDim)(concatenate([encoder_meanLayer,encoder_logVarianceLayer])) encoder_outputLayer = Lambda(VAE_sampling, output_shape=(latentDim, ), name='sampling_latent_action')([ encoder_meanLayer, encoder_logVarianceLayer ]) #encoder_outputLayers = [encoder_meanLayer,encoder_logVarianceLayer,encoder_outputLayer] encoder_Model = Model(encoder_inputLayer_s, encoder_outputLayer, name='encoder') if verbose: encoder_Model.summary() #plot_model(encoder_Model, to_file='vae_encoder.png', show_shapes=True) #define decoder model decoder_inputLayerLatentAction = Input(shape=(latentDim, ), name='latentLayer') #decoder_intLayer = Dense(latentDim*2,activation='relu')(decoder_inputLayerLatentAction) decoder_intLayer_last = Dense( latentDim * 2, activation='relu', name='decoding')(decoder_inputLayerLatentAction) decoder_outputLayer, losses_reconstruction = ActorCritic_general.generateActionOutputLayer( self.actionSpace, decoder_intLayer_last) decoder_Model = Model(decoder_inputLayerLatentAction, decoder_outputLayer, name='decoder') if verbose: decoder_Model.summary() sgd = optimizers.SGD(lr=1) decoder_Model.compile(optimizer=sgd, loss='mean_squared_error', metrics=['accuracy']) #plot_model(decoder_Model, to_file='vae_decoder.png', show_shapes=True) #define VAE model outputs = decoder_Model(encoder_Model(encoder_inputLayer_s)) vae_model = Model(encoder_inputLayer_s, outputs, name='vae') if verbose: vae_model.summary() plot_model(vae_model, to_file='vae_model.png', show_shapes=True, expand_nested=True) #add KL-divergence to losses kl_loss = 1 + encoder_logVarianceLayer - K.square( encoder_meanLayer) - K.exp(encoder_logVarianceLayer) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 losses = [] for i, loss_recon_str in enumerate(losses_reconstruction): loss = self.lossWrapper(kl_loss, loss_recon_str) losses.append(loss) #vae_model.add_loss(losses) #define model sgd = optimizers.SGD(lr=1) vae_model.compile(optimizer=sgd, loss=losses_reconstruction, metrics=['accuracy']) #save models self.model = vae_model