def __init__(self, moments, model = 1, data_path = None, batch_size = None, input_dims = 6, trainset = 100, loadModel = None, reporducability = False): if reporducability: np.random.seed(2020) self.batch_size = batch_size save = f"model_{model}+moments_{moments}+batch_size{batch_size}.h5" self.save = ModelCheckpoint(save, save_best_only=True, monitor='val_loss', mode='min') self.moments = moments; self.stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5) self.x, self.y = self.get_data(data_path) if loadModel == None: '''make the model here''' i = Input(batch_shape=(self.batch_size, self.moments, 4)) if model == 1: x1 = TCN(return_sequences=False, nb_filters=(self.moments)*2, dilations=[2**i for i in range(int(np.log2(moments)))], nb_stacks=2, dropout_rate=.3, kernel_size=2)(i) x2 = Lambda(lambda z: backend.reverse(z, axes=-1))(i) x2 = TCN(return_sequences=False, nb_filters=(self.moments)*2, dilations=[2**i for i in range(int(np.log2(moments)))], nb_stacks=2, dropout_rate=.1, kernel_size=2)(x2) x = add([x1, x2]) o = Dense(1, activation='linear')(x) elif model == 2: x1 = TCN(return_sequences=True, nb_filters=(self.moments) * 2, dilations=[2**i for i in range(int(np.log2(moments)))], nb_stacks=2, dropout_rate=.3, kernel_size=2)(i) x2 = Lambda(lambda z: backend.reverse(z, axes=-1))(i) x2 = TCN(return_sequences=True, nb_filters=(self.moments) * 2, dilations=[2**i for i in range(int(np.log2(moments)))], nb_stacks=2, dropout_rate=.1, kernel_size=2)(x2) x = add([x1, x2]) x1 = LSTM(5, return_sequences=False, dropout=.3)(x) x2 = Lambda(lambda z: backend.reverse(z, axes=-1))(x) x2 = LSTM(5, return_sequences=False, dropout=.3)(x2) x = add([x1, x2]) o = Dense(1, activation='linear')(x) elif model == 3: # print([2**i for i in range(int(np.log2(moments) - 1))]) x = TCN(return_sequences=True, nb_filters=32, dilations=[2**i for i in range(int(np.log2(moments)))], nb_stacks=2, dropout_rate=.3, kernel_size=4)(i) x1 = TCN(return_sequences=True, nb_filters = 16, dilations = [2**i for i in range(int(np.log2(moments)))], nb_stacks = 2, dropout_rate=.3, kernel_size=4)(x) x2 = LSTM(32, return_sequences=True, dropout=.3)(i) x2 = LSTM(16, return_sequences=True, dropout=.3)(x2) x = add([x1, x2]) x = Dense(8, activation='linear')(x) x = TCN(return_sequences=True, nb_filters=4, dilations=[1, 2, 4], nb_stacks=1, dropout_rate=.3, kernel_size=2, activation=wave_net_activation)(x) x = concatenate([GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x)]) o = Dense(1, activation='linear')(x) self.m = Model(inputs=i, outputs=o) else: self.m = load_model(loadModel, custom_objects = {'TCN': TCN, 'wave_net_activation': wave_net_activation}) self.m.summary(); self.m.compile(optimizer='adam', loss='mse')
def call(self, x): x = K.concatenate([ K.reverse(x, 1)[:, (-1 - self.pad_left):-1, :, :], x, K.reverse(x, 1)[:, 1:(1 + self.pad_right), :, :] ], axis=1) x = K.concatenate([ K.reverse(x, 2)[:, :, (-1 - self.pad_top):-1, :], x, K.reverse(x, 2)[:, :, 1:(1 + self.pad_bottom), :] ], axis=2) return x
def get_quadrants(input_): batch_dim, z_dim, y_dim, x_dim, channel_dim = input_.shape y_half_dim = int(y_dim) // 2 + y_dim % 2 x_half_dim = int(x_dim) // 2 + x_dim % 2 q1 = Lambda(lambda x: x[:, :, :y_half_dim, :x_half_dim, :])(input_) q2 = Lambda(lambda x: K.reverse( x[:, :, :y_half_dim, x_half_dim - x_dim % 2:, :], axes=3))(input_) q3 = Lambda(lambda x: K.reverse( x[:, :, y_half_dim - y_dim % 2:, :x_half_dim, :], axes=2))(input_) q4 = Lambda(lambda x: K.reverse(x[:, :, y_half_dim - y_dim % 2:, x_half_dim - x_dim % 2:, :], axes=(3, 2)))(input_) return q1, q2, q3, q4
def recursion(self, input_energy, mask=None, go_backwards=False, return_sequences=True, return_logZ=True, input_length=None): """Forward (alpha) or backward (beta) recursion If `return_logZ = True`, compute the logZ, the normalization constant: \[ Z = \sum_{y1, y2, y3} exp(-E) # energy = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3)) = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3)) sum_{y1} exp(-(u1' y1' + y1' W y2))) \] Denote: \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \] \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \] \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \] Note that: yi's are one-hot vectors u1, u3: boundary energies have been merged If `return_logZ = False`, compute the Viterbi's best path lookup table. """ chain_energy = self.chain_kernel # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t chain_energy = K.expand_dims(chain_energy, 0) # shape=(B, F), dtype=float32 prev_target_val = K.zeros_like(input_energy[:, 0, :]) if go_backwards: input_energy = K.reverse(input_energy, 1) if mask is not None: mask = K.reverse(mask, 1) initial_states = [prev_target_val, K.zeros_like(prev_target_val[:, :1])] constants = [chain_energy] if mask is not None: mask2 = K.cast(K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1), K.floatx()) constants.append(mask2) def _step(input_energy_i, states): return self.step(input_energy_i, states, return_logZ) target_val_last, target_val_seq, _ = K.rnn(_step, input_energy, initial_states, constants=constants, input_length=input_length, unroll=self.unroll) if return_sequences: if go_backwards: target_val_seq = K.reverse(target_val_seq, 1) return target_val_seq else: return target_val_last
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, # as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) # matrix instead of vector is required by tf `K.rnn` initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] if K.backend() == 'theano': from theano import tensor as T initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': from theano import tensor as T return params[T.arange(n), indices] elif K.backend() == 'tensorflow': import tensorflow as tf indices = K.transpose(K.stack([tf.range(n), indices])) return tf.gather_nd(params, indices) else: raise NotImplementedError def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': from theano import tensor as T next_best_idx = T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
def call(self, inputs, mask=None, **kwargs): input_fw = inputs input_bw = inputs for i in range(self.layers): output_fw = self.fw_lstm[i](input_fw) output_bw = self.bw_lstm[i](input_bw) output_bw = Lambda(lambda x: K.reverse(x, 1), mask=lambda inputs, mask: mask)(output_bw) if i >= self.layers - self.res_layers: output_fw += input_fw output_bw += input_bw input_fw = output_fw input_bw = output_bw output_fw = input_fw output_bw = input_bw if self.merge_mode == "fw": output = output_fw elif self.merge_mode == "bw": output = output_bw elif self.merge_mode == 'concat': output = K.concatenate([output_fw, output_bw]) elif self.merge_mode == 'sum': output = output_fw + output_bw elif self.merge_mode == 'ave': output = (output_fw + output_bw) / 2 elif self.merge_mode == 'mul': output = output_fw * output_bw elif self.merge_mode is None: output = [output_fw, output_bw] return output
def paired_angle_between_batches(tensors): l0 = K.sqrt( K.sum(K.square(tensors[0]), axis=-1, keepdims=True) + K.epsilon()) l1 = K.sqrt( K.sum(K.square(tensors[1]), axis=-1, keepdims=True) + K.epsilon()) numerator = K.sum(tensors[0] * tensors[1], axis=-1, keepdims=True) angle_w_self = tf.acos(numerator / (l0 * l1)) # This is very hacky! we assume batch sizes are odd and reverse the batch to compare to others. l1_other = K.sqrt( K.sum(K.square(K.reverse(tensors[1], 0)), axis=-1, keepdims=True) + K.epsilon()) other_numerator = K.sum(tensors[0] * K.reverse(tensors[1], 0), axis=-1, keepdims=True) angle_w_other = tf.acos(other_numerator / (l0 * l1_other)) return angle_w_self - angle_w_other
def pred_rc_recursive(input): ki = K.repeat_elements(K.expand_dims(input[1][:, :, 0], axis=-1), input[0].shape[2], 2) temp = (input[0] - ki * K.reverse(input[0], axes=2)) / (1 - ki * ki) temp = Concatenate(axis=2)([temp, input[1]]) return temp
def call(self, input, **kwargs): #w = K.reverse(self.kernel, axes=-1) #n_filter_length = w.shape[-1] # arrange as 4D array for conv2d # input of the convolution #xx = K.reshape(input, (-1, n_filter_length, 1, 1)) # repeat, and arrange as 4D-array for conv2d # kernel of the convolution #ww = K.reshape(K.stack((w, w)), (2 * n_filter_length, 1, 1, 1)) #dims: n_eval_batches, n_filter_length #outputs = K.squeeze(K.squeeze(K.conv2d(xx, ww, strides=(1, 1), padding="same"), -1), -1) #outputs = K.expand_dims(outputs, axis=1) ################################################### 2d-convolution ################################################# w = K.reverse(self.kernel, axes=-1) w1 = K.reshape(w, shape=(-1, w.shape[1], self.n_antennas_MS, self.n_antennas_BS)) w1 = K.permute_dimensions(w1, pattern=(0, 1, 3, 2)) n_filter_length = w1.shape[-2] n_filter_width = w1.shape[-1] n_filter_mult = n_filter_length * n_filter_width # arrange as 4D array for conv2d xx1 = K.reshape(input, (-1, n_filter_mult, 1, 1)) xx1 = K.reshape(xx1, shape=(-1, self.n_antennas_MS, self.n_antennas_BS, 1)) xx1 = K.permute_dimensions(xx1, pattern=(0, 2, 1, 3)) # repeat, and arrange as 4D-array for conv2d wtemp = K.reshape(K.stack((w1,w1),axis = 2),(1,1,2*n_filter_length,-1)) wtemp = K.reshape(K.stack((wtemp,wtemp),axis=3),(1,1,-1,2*n_filter_width)) ww1 = K.permute_dimensions(wtemp, pattern=(2,3,0,1)) # dims: n_eval_batches, n_filter_length outputs1 = K.conv2d(xx1, ww1, padding="same") outputs1 = K.permute_dimensions(outputs1, pattern=(0, 2, 1, 3)) outputs1 = K.reshape(outputs1, shape=(-1,n_filter_mult,1)) outputs = K.permute_dimensions(outputs1,pattern=(0,2,1)) #outputs = K.squeeze(K.conv3d(xx1, ww, strides=(1, 1, 1), padding="same"), -1) #stop = 0 if self.use_bias: outputs = K.bias_add( outputs, self.bias) if self.activation is not None: outputs = self.activation(outputs) if self.output_type is not None: outputs = K.cast(outputs, self.output_type) return outputs
def get_model(self): input_current = Input((self.maxlen, )) input_left = Input((self.maxlen, )) input_right = Input((self.maxlen, )) embedder = Embedding(self.max_features, self.embedding_dims, input_length=self.maxlen) embedding_current = embedder(input_current) embedding_left = embedder(input_left) embedding_right = embedder(input_right) x_left = LSTM(128, return_sequences=True)(embedding_left) x_right = LSTM(128, return_sequences=True, go_backwards=True)(embedding_right) x_right = Lambda(lambda x: K.reverse(x, axes=1))(x_right) x = Concatenate(axis=2)([x_left, embedding_current, x_right]) x = Conv1D(64, kernel_size=1, activation='tanh')(x) x = GlobalMaxPooling1D()(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=[input_current, input_left, input_right], outputs=output) return model
def generation( self, z_k): # z_k->x_hat。生成阶段P(X|Z)也叫sample。从base 分布到目标分布,也叫forward阶段 h = self.denseh(z_k) _z_sigmas = [] for i in range(self.num_flow): reverse = (i > 0) # 后面的依赖是反向的 prev_z, enc_h = z_k, h if reverse: prev_z = Lambda(lambda x: K.reverse(x, axes=-1))(prev_z) iaf_in = [prev_z, enc_h] m = self.maskdense_m[i](iaf_in) s = self.maskdense_s[i](iaf_in) # IAF的公式12,这里的s用sigmoid激活后就是sigma sigma = self.act(s) z_k = prev_z * sigma + m * (1. - sigma) _z_sigmas.append(sigma) x_hat = self.densex1(z_k) x_hat = self.densex2(x_hat) return x_hat, _z_sigmas
def call(self, inputs): print(self.kernel) outputs_plus = K.conv2d( inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) outputs_minus = K.conv2d( inputs, K.reverse(self.kernel,0), strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs_plus = K.bias_add( outputs_plus, self.bias, data_format=self.data_format) outputs_minus = K.bias_add( outputs_minus, self.bias, data_format=self.data_format) if self.activation is not None: return [self.activation(outputs_plus),self.activation(outputs_minus)] return [outputs_plus,outputs_minus]
def sinc(band, t_right): y_right = K.sin( 2 * math.pi * band * t_right) / (2 * math.pi * band * t_right) # y_left = flip(y_right, 0) TODO remove if useless y_left = K.reverse(y_right, 0) y = K.concatenate([y_left, K.variable(K.ones(1)), y_right]) return y
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Legacy from keras using caffe models, see imagenet_utils.py in keras-applications inputs = K.reverse(inputs, axes=self.axis) # RGB to BGR normed = (inputs - K.constant([103.939, 116.779, 123.68])) return normed
def get_octants(input_): batch_dim, z_dim, y_dim, x_dim, channel_dim = input_.shape z_half_dim = int(z_dim) // 2 + z_dim % 2 y_half_dim = int(y_dim) // 2 + y_dim % 2 x_half_dim = int(x_dim) // 2 + x_dim % 2 oct1 = Lambda(lambda x: x[:, :z_half_dim, :y_half_dim, :x_half_dim, :])( input_) oct2 = Lambda(lambda x: K.reverse( x[:, :z_half_dim, :y_half_dim, x_half_dim - x_dim % 2:, :], axes=3))( input_) oct3 = Lambda(lambda x: K.reverse( x[:, :z_half_dim, y_half_dim - y_dim % 2:, :x_half_dim, :], axes=2))( input_) oct4 = Lambda(lambda x: K.reverse(x[:, :z_half_dim, y_half_dim - y_dim % 2:, x_half_dim - x_dim % 2:, :], axes=(3, 2)))(input_) oct5 = Lambda(lambda x: K.reverse( x[:, z_half_dim - z_dim % 2:, :y_half_dim, :x_half_dim, :], axes=1))( input_) oct6 = Lambda(lambda x: K.reverse( x[:, z_half_dim - z_dim % 2:, :y_half_dim, x_half_dim - x_dim % 2:, :], axes=(3, 1)))(input_) oct7 = Lambda(lambda x: K.reverse(x[:, z_half_dim - z_dim % 2:, y_half_dim - y_dim % 2:, :x_half_dim, :], axes=(2, 1)))(input_) oct8 = Lambda(lambda x: K.reverse( x[:, z_half_dim - z_dim % 2:, y_half_dim - y_dim % 2:, x_half_dim - x_dim % 2:, :], axes=(3, 2, 1), ))(input_) return oct1, oct2, oct3, oct4, oct5, oct6, oct7, oct8
def model(self): #build model around TCN #As a general rule (keeping kernel_size fixed at 2) and dilations increasing with a factor of 2 #The equation to find the ideal sizes is receptive field = nb_stacks_of_residuals_blocks(nb_stacks) * kernel_size * last_dilation) #Each layer adds linearly to the receptive field self.built = True i = Input(batch_shape=(self.batch_size, self.moments-1, self.input_dim)) #Model 1: Simple TCN for lower layer and LSTM for upper, set to handel a receptive field of around 64 #for TCN compressed down for LSTM, build for self.moments between 40-80, networth and accuracy can be used for testing ######################################################################### #x1 = TCN(return_sequences=True, nb_filters=32, nb_stacks = 1, dropout_rate=.0, kernel_size=2)(i) #x1 = Dense(4, activation='linear')(x1) #o = LSTM(4, dropout=.3)(x1) ######################################################################### # Model 2: 1*10^-6 error, average networth change per tick = 23/(774-60) = shit, build for self.moments between 40-80, networth and accuracy can be used for testing ######################################################################### #i = LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps, n_features))(i) #optional addition to try stacked LSTM not added yet #x = LSTM(50, dropout=.3, activation='relu')(i) #o = Dense(4, activation='softmax')(x) ######################################################################### #Model 3: TCN with LSTM on top with dual bottom layer, build for self.moments between 40-80, networth and accuracy can be used for testing ######################################################################### x1 = TCN(return_sequences=True, nb_filters=64, dilations = [1, 2, 4, 8, 16, 32], nb_stacks=1, dropout_rate=.1, kernel_size=2)(i) x2 = Lambda(lambda z: backend.reverse(z, axes=0))(i) x2 = TCN(return_sequences=True, nb_filters=64, dilations = [1, 2, 4, 8, 16, 32], nb_stacks=1, dropout_rate=.1, kernel_size=2)(x2) x = add([x1, x2]) o = LSTM(4, dropout=.1)(x) ######################################################################### # Model 4: Layered TCN with LSTM on top, build for self.moments between 40-80, networth and accuracy can be used for testing ######################################################################### #x1 = TCN(return_sequences=True, nb_filters = 64, dilations = [1, 2, 4, 8, 16, 32], nb_stacks = 1, dropout_rate=.1, kernel_size=2)(i) #x1 = TCN(return_sequences=True, nb_filters = 64, dilations = [1, 2, 4, 8, 16, 32], nb_stacks = 1, dropout_rate=.1, kernel_size=2)(x1) #x1 = Dense(4, activation='linear')(x1) #x2 = LSTM(4, dropout=.3)(i) #x = add([x1, x2]) #o = concatenate([GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x)]) #o = Dense(4, activation='linear')(o) ######################################################################### # Model 5: Dual TCN layered, build for self.moments between 40-80, networth and accuracy can be used for testing ######################################################################### #x1 = TCN(return_sequences=True, nb_filters=64, dilations =[1, 2, 4, 8, 16, 32], nb_stacks=1, dropout_rate=.1, kernel_size=1)(i) #x2 = Lambda(lambda z: backend.reverse(z, axes=0))(i) #x2 = TCN(return_sequences=True, nb_filters=64, dilations =[1, 2, 4, 8, 16, 32], nb_stacks=1, dropout_rate=.1, kernel_size=1)(x2) #x = add([x1, x2]) #x1 = TCN(return_sequences=True, nb_filters=64, dilations =[1, 2, 4, 8], nb_stacks=1, dropout_rate=.1, kernel_size=1)(x) #x2 = Lambda(lambda z: backend.reverse(z, axes=0))(x) #x2 = TCN(return_sequences=True, nb_filters=64, dilations =[1, 2, 4, 8], nb_stacks=1, dropout_rate=.1, kernel_size=1)(x2) #x = add([x1, x2]) #o = concatenate([GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x)]) #o = Dense(4, activation='linear')(o) self.m = Model(inputs=i, outputs=o) self.m.compile(optimizer='adam', loss=custom_loss) #optimizer and loss can be changed to what we want
def build_model(embedding_matrix, num_aux_targets, loss_weight): words = Input(shape=(MAX_LEN, )) x = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(words) x = SpatialDropout1D(0.3)(x) x1 = TCN(TCN_UNITS, return_sequences=True, dilations=[1, 2, 4, 8, 16], name='tnc1_forward')(x) # , activation = 'wavenet' x2 = Lambda(lambda z: K.reverse(z, axes=-1))(x) x2 = TCN(TCN_UNITS, return_sequences=True, dilations=[1, 2, 4, 8, 16], name='tnc1_backward')(x2) # ,dilations = [1, 2, 4] x = add([x1, x2]) x1 = TCN(TCN_UNITS, return_sequences=True, dilations=[1, 2, 4, 8, 16], name='tnc2_forward')(x) x2 = Lambda(lambda z: K.reverse(z, axes=-1))(x) x2 = TCN(TCN_UNITS, return_sequences=True, dilations=[1, 2, 4, 8, 16], name='tnc2_backward')(x2) x = add([x1, x2]) # x = concatenate([GlobalMaxPooling1D()(x),GlobalAveragePooling1D()(x)]) hidden = concatenate( [GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x)]) hidden = add( [hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)]) hidden = add( [hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)]) result = Dense(1, activation='sigmoid')(hidden) aux_result = Dense(num_aux_targets, activation='sigmoid')(hidden) model = Model(inputs=words, outputs=[result, aux_result]) model.compile(loss=[custom_loss, 'binary_crossentropy'], loss_weights=[loss_weight, 1.0], optimizer='adam') return model
def sinc(band, t_right): y_right = tf.sin( 2 * tf.constant(np.pi, dtype=tf.float32) * band * t_right) / (2 * tf.constant(np.pi, dtype=tf.float32) * band * t_right) # y_left = flip(y_right, 0) TODO remove if useless y_left = K.reverse(y_right, 0) y = K.cast(K.concatenate([y_left, tf.ones(1, dtype=tf.float32), y_right]), 'float32') return y
def Creat_LSTM_model(): # Two set of input inputA = Input(shape=(None,)) inputB = Input(shape=(None,)) embedding_layer = Embedding(input_dim=(len(word_indx) + 1), output_dim=300, weights=[embedding_matrix], mask_zero=True, trainable=False) lstm = Bidirectional(LSTM(300, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)) # first branch u = embedding_layer(inputA) x = lstm(u) ''' attention_x = TimeDistributed(Dense(1, activation='tanh'))(x) attention_x = Lambda(lambda x: x)(attention_x) attention_x = Flatten()(attention_x) attention_x = Activation('softmax')(attention_x) attention_x = RepeatVector(600)(attention_x) attention_x = Permute([2, 1])(attention_x) # apply the attention sent_representation = Multiply()([x, attention_x]) x = Lambda(lambda xin: k.sum(xin, axis=1))(sent_representation) ''' # second branch v = embedding_layer(inputB) y = lstm(v) ''' attention_y = TimeDistributed(Dense(1, activation='tanh'))(y) attention_y = Lambda(lambda x: x)(attention_y) attention_y = Flatten()(attention_y) attention_y = Activation('softmax')(attention_y) attention_y = RepeatVector(600)(attention_y) attention_y = Permute([2, 1])(attention_y) # apply the attention sent_representation = Multiply()([y, attention_y]) y = Lambda(lambda xin: k.sum(xin, axis=1))(sent_representation) ''' c1 = Lambda(lambda a: tf.subtract(a[0], a[1]))([x, y]) c2 = Lambda(lambda a: tf.multiply(a[0], a[1]))([x, y]) ry = Lambda(lambda a: k.reverse(a, axes=0))(y) c3 = Lambda(lambda a: tf.subtract(a[0], a[1]))([x, ry]) c4 = Lambda(lambda a: tf.multiply(a[0], a[1]))([x, ry]) w = Concatenate()([x, c1, c2, c3, c4, y]) z = Dense(1800, activation="relu")(w) z = Dropout(0.5)(z) z = Dense(512, activation="relu")(z) z = Dropout(0.5)(z) z = Dense(128, activation="relu")(z) z = Dense(3, activation="softmax")(z) model_LSTM = Model(inputs=[inputA, inputB], outputs=z) return (model_LSTM)
def define_NN(self): tf.keras.backend.clear_session() # For easy reset of notebook state. in1 = keras.Input(shape=(ImgGenerator.H, ImgGenerator.W // 2, 2), name='inp1') in_r1 = layers.Reshape((ImgGenerator.H, ImgGenerator.W), name="reshaped_input")(in1) lstm = layers.LSTM(units=256 * 4, name="lstm")(in_r1) lstm = layers.Reshape((128, 8), name="reshaped_lstm")(lstm) print(lstm) in_conv = layers.DepthwiseConv2D((128, 1), padding="same", data_format='channels_last', name="depth-conv")(in1) print(in_conv) in_conv = layers.Reshape((128, 128), name="reshape_conv")(in_conv) rnn = layers.SimpleRNN(256, name="rnn")(in_r1) rnn = layers.Reshape((128, 2), name="reshaped_rnn")(rnn) print(rnn) # Rotated rot_layer = layers.Lambda(lambda x: kbck.reverse(x, axes=0), output_shape=(64, 128, 2))(in1) rot_layer = layers.Reshape((ImgGenerator.H, ImgGenerator.W), name="reshaped_input2")(rot_layer) in_conv2 = layers.SimpleRNN(128, name="rnn2")(rot_layer) print(in_conv2) in_conv2 = layers.Reshape((128, 1), name="reshape_conv2")(in_conv2) d0 = layers.Dense(1024, activation="tanh", name="dense-inp")(in_r1) # print(d0) d0 = layers.Concatenate(axis=2)([d0, lstm, in_conv, rnn, in_conv2]) print(d0.shape) rnn = layers.Flatten()(d0) # rnn = layers.BatchNormalization(momentum=0.8)(rnn) # rnn = layers.LeakyReLU()(rnn) print(rnn) dense_1 = layers.Dense(2048, activation="relu")(rnn) # , activation="relu" # dense_1 = layers.BatchNormalization(momentum=0.8)(dense_1) # dense_1 = layers.LeakyReLU()(dense_1) print(dense_1) # for layer_idx in range(0, 5): # dense_1 = layers.BatchNormalization(momentum=0.8)(dense_1) # dense_1 = layers.Dense(1024, activation="tanh", name=f"muldence{layer_idx}")(dense_1)#, activation="relu" dense_2 = layers.Dense(4096, activation="relu")(dense_1) # , activation="relu" # dense_2 = layers.BatchNormalization(momentum=0.8)(dense_2) print(dense_2) output = layers.Dense(128 * 128)(dense_2) # ,, activation="softplus" # output = layers.Softmax()(output) print(f"Last dense:{output}") output = layers.Reshape((128, 128))(output) print(f"Out layer:{output}") return [in1], [output]
def call(self, inputs): reverse_inputs = K.reverse(inputs, 1) fw_states, _ = self._rnn_cell(inputs) bw_states, _ = self._rnn_cell(reverse_inputs) bw_states = K.reverse(bw_states, 1) if self._merge_mode == 'concat': outputs = K.concatenate([fw_states, bw_states], axis=-1) elif self._merge_mode == 'sum': outputs = fw_states + bw_states elif self._merge_mode == 'ave': outputs = (fw_states + bw_states) / 2 elif self._merge_mode == 'mul': outputs = fw_states * bw_states elif self._merge_mode is None: outputs = [fw_states, bw_states] else: raise ValueError('Unrecognized value for argument ' 'merge_mode: %s' % (self._merge_mode)) return outputs
def call(self, inputs): outputs1 = self._convolution_op(inputs, self.kernel) outputs2 = self._convolution_op(inputs, K.reverse(self.kernel, axes=1)) outputs = K.concatenate([outputs1, outputs2], axis=3) if self.use_bias: outputs = nn.bias_add(outputs, K.concatenate([self.bias, self.bias], axis=0), data_format=self._tf_data_format) return outputs
def bilateral_cumsum_error(p, p_hat): """ Mean of the original and reverse order of cumulative sums of p and p_hat """ a = flatten(p) b = flatten(p_hat) bottom = tf.keras.backend.min(concatenate((a, b))) a = a - bottom b = b - bottom ia = reverse(a, axes=0) ib = reverse(b, axes=0) ca = cumsum(a) cb = cumsum(b) ica = cumsum(ia) icb = cumsum(ib) return mean(concatenate((square(ca - cb), square(ica - icb))))
def transformation_feature_space(x): """ This function performs a transformation in feature space: The transformation does the following: 1) Switches first and second half of the embedding in the channels output dimension 2) Flips the the embedding in third axis (columns) """ x_first = x[:, :, :, :x.shape[3] // 2] x_second = x[:, :, :, x.shape[3] // 2:] x = K.concatenate([x_second, x_first], axis=3) x = K.reverse(x, axes=2) return x
def call(self, inputs): inputs_first = inputs[:,:,:, :inputs.shape[3]//2] inputs_second = inputs[:,:,:, inputs.shape[3]//2:] outputs1 = self._convolution_op(inputs_first, self.kernel) outputs2 = self._convolution_op(inputs_second, K.reverse(self.kernel,axes=1)) outputs = K.concatenate([outputs1, outputs2], axis=3) if self.use_bias: outputs = nn.bias_add( outputs, K.concatenate([self.bias, self.bias], axis=0), data_format=self._tf_data_format) return outputs
def _backward(gamma): """Backward recurrence of the linear chain crf.""" gamma = K.cast(gamma, "int32") def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) return y
def attention_model(type='train', flag=True): inputs = Input(shape=(TIME_STEPS,), name='input_data') # x = Masking()(inputs) x = Embedding(input_dim=41, output_dim=8)(inputs) gru_out = LSTM(128, return_sequences=True, name='encode_gru')(x) if flag: attention_x = attention_mechanism(gru_out) attention_mul = Permute((2, 1))(K.batch_dot(Permute((2, 1))(gru_out), attention_x)) else: attention_mul = gru_out x = LSTM(128, return_sequences=True, name='decode_gru')(K.reverse(attention_mul, axes=1)) output = TimeDistributed(Dense(len(string_index), activation='softmax'))(x) if type == 'test': model = Model(inputs=inputs, outputs=[output, attention_x]) else: model = Model(inputs=inputs, outputs=output) model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss=masking_loss, metrics=[masking_acc]) model.summary() return model
def encoder(self, text_embed, return_sequence): # We shift the document to the right to obtain the left-side contexts l_embedding = Lambda(lambda x: K.concatenate([K.zeros(shape=(K.shape(x)[0], 1, K.shape(x)[-1])), x[:, :-1]], axis=1))(text_embed) # We shift the document to the left to obtain the right-side contexts r_embedding = Lambda(lambda x: K.concatenate([K.zeros(shape=(K.shape(x)[0], 1, K.shape(x)[-1])), x[:, 1:]], axis=1))(text_embed) # use LSTM RNNs instead of vanilla RNNs as described in the paper. forward = LSTM(300, return_sequences=True)(l_embedding) # See equation (1) backward = LSTM(300, return_sequences=True, go_backwards=True)(r_embedding) # See equation (2) # Keras returns the output sequences in reverse order. backward = Lambda(lambda x: K.reverse(x, axes=1))(backward) together = concatenate([forward, text_embed, backward], axis=2) # See equation (3). # use conv1D instead of TimeDistributed and Dense semantic = Conv1D(300, kernel_size=1, activation="tanh")(together) # See equation (4). if return_sequence: return semantic sentence_embed = Lambda(lambda x: K.max(x, axis=1))(semantic) # See equation (5). return sentence_embed
def attention_model(input_shape): input_ = Input(shape=( TIME_STEPS, input_shape, ), name='input_data') gru_out = GRU(256, return_sequences=True, name='encode_gru')(input_) print('gru', gru_out.shape) attention_x = attention_mechanism(gru_out) gru_out = Permute((2, 1))(gru_out) attention_mul = K.batch_dot(gru_out, attention_x) attention_mul = Permute((2, 1))(attention_mul) output = GRU(input_shape, return_sequences=True, name='decode_gru')(K.reverse(attention_mul, axes=1)) if sys.argv[1] == 'train': model = Model(inputs=input_, outputs=output) elif sys.argv[1] == 'test': model = Model(inputs=input_, outputs=[output, attention_x]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.summary() return model
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y