def rnn_autoencoder(window_size, n_features): n_in = window_size n_out = window_size # define encoder visible = layers.Input(shape=(n_in, n_features)) masked = layers.Masking(mask_value=0.)(visible) encoder = layers.LSTM(128, activation='relu')(masked) # define reconstruction decoder decoder1 = layers.RepeatVector(n_in)(encoder) decoder1 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder1) decoder1 = layers.TimeDistributed(Dense(n_features))(decoder1) # define prediction decoder decoder2 = layers.RepeatVector(n_out)(encoder) decoder2 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder2) decoder2 = layers.TimeDistributed(Dense(n_features))(decoder2) # tie it together model = models.Model(inputs=visible, outputs=[decoder1, decoder2]) model.summary() model.compile(optimizer='adam', loss='mse') try: keras.utils.plot_model(model, show_shapes=True, to_file='composite_lstm_autoencoder.png') except: print('>>>> plot not working!') return model
def get_model(): char_action = layers.Input(shape=[2]) char_action_r = layers.RepeatVector(30)(char_action) position = layers.Input(shape=[6]) position_r = layers.RepeatVector(30)(position) # enemy_key = layers.Input(shape=[30, 45]) # enemy_key_a = attention_3d_block(enemy_key) # my_key = layers.Input(shape=[30, 45]) # my_key_a = attention_3d_block(my_key) concat = layers.Concatenate()([char_action_r, position_r]) gate = layers.Dense(8, activation="sigmoid")(concat) concat = layers.Multiply()([gate, concat]) '''flatten = layers.Flatten()(concat) dense = layers.Dense(128, activation="tanh")(flatten) c = layers.Dense(128, activation="tanh")(dense) c = layers.Dense(128, activation="tanh")(c)''' first = conv1d_block(32, 5, padding='causal', activation='tanh')(concat) A, B = wavenet_block(32, 2, 1)(first) skip_connections = [B] for i in range(1, 12): A, B = wavenet_block(32, 3, 2**(i % 3))(A) skip_connections.append(B) net = layers.Add()(skip_connections) net = layers.LeakyReLU()(net) net = conv1d_block(16, 1)(net) net = layers.LeakyReLU()(net) c = layers.Flatten()(net) dense_category = layers.Dense(45, activation='softmax')(c) return keras.models.Model(inputs=[char_action, position], outputs=[dense_category], name="TH123AI")
def buildAttention(self, seq, controller): controller_repeated = layers.RepeatVector( self._config['context_window_size'])(controller) attention = layers.merge([controller_repeated, seq], mode='concat', concat_axis=-1) #attention = layers.concatenate([controller_repeated, seq], axis=-1) #layers.Dense(1, activation='sigmoid') attention = layers.TimeDistributedDense( 1, activation='sigmoid')(attention) attention = layers.Flatten()(attention) attention = layers.Lambda( to_prob, output_shape=(self._config['context_window_size'], ))(attention) attention_repeated = layers.RepeatVector( self._w2v.conceptEmbeddingsSz)(attention) attention_repeated = layers.Permute((2, 1))(attention_repeated) weighted = layers.merge([attention_repeated, seq], mode='mul') #weighted = layers.multiply([attention_repeated, seq]) summed = layers.Lambda( sum_seq, output_shape=(self._w2v.conceptEmbeddingsSz, ))(weighted) return summed, attention
def make_model(self): inputs = K_layer.Input(shape=(self.timesteps, self.input_dim)) reshaped = K_layer.Reshape( (self.unit_n, self.unit_t, self.input_dim))(inputs) encode_reshape = K_layer.Reshape( (self.unit_n, self.partial_latent_dim)) encode_1 = abs_model.RNN_UNIT(self.partial_latent_dim) encode_2 = abs_model.RNN_UNIT(self.latent_dim) def encode_partials(seq): encoded = [None] * self.unit_n for i in range(self.unit_n): rs = K_layer.Lambda(lambda x: x[:, i], output_shape=(self.unit_t, self.input_dim))(seq) encoded[i] = encode_1(rs) return encode_reshape(K_layer.concatenate(encoded, axis=1)) encoded = encode_partials(reshaped) encoded = encode_2(encoded) z = K_layer.Input(shape=(self.latent_dim, )) decode_repeat_units = K_layer.RepeatVector(self.unit_n) decode_units = abs_model.RNN_UNIT(self.partial_latent_dim, return_sequences=True, activation=self.activation) decode_euler_1 = K_layer.Dense(self.output_dim * 4, activation=self.activation) decode_euler_2 = K_layer.Dense(self.output_dim, activation=self.activation) decode_repete_angles = K_layer.Lambda( lambda x: K_backend.repeat_elements(x, self.unit_t, 1), output_shape=(self.timesteps, self.output_dim)) decode_repete = K_layer.RepeatVector(self.timesteps) decode_residual_1 = abs_model.RNN_UNIT(self.output_dim * 4, return_sequences=True, activation=self.activation) decode_residual_2 = abs_model.RNN_UNIT(self.output_dim, return_sequences=True, activation=self.activation) def decode_angle(e): angle = decode_units(decode_repeat_units(e)) angle = K_layer.TimeDistributed(decode_euler_1)(angle) angle = K_layer.TimeDistributed(decode_euler_2)(angle) angle = decode_repete_angles(angle) residual = decode_repete(e) residual = decode_residual_2(decode_residual_1(residual)) angle = K_layer.Activation(self.activation)(K_layer.add( [angle, residual])) return angle decoded = decode_angle(encoded) decoded_ = decode_angle(z) self.encoder = Model(inputs, encoded) self.decoder = Model(z, decoded_) self.model = Model(inputs, decoded)
def build_network(self, label_size, input_shape_Q, input_shape_A, vocab_size, embed_hidden_size=50, rnn_size=100, dropout=0.3): print('Build model...') print('input_shape_Q: {}'.format(input_shape_Q)) print('input_shape_A: {}'.format(input_shape_A)) question = layers.Input(shape=(input_shape_Q,), dtype='int32') encoded_question = layers.Embedding(vocab_size, embed_hidden_size)(question) encoded_question = layers.Dropout(dropout)(encoded_question) # encoded_question = RNN(RNN_SIZE)(encoded_question) # encoded_question = layers.RepeatVector(RNN_SIZE)(encoded_question) print(encoded_question) answer_1 = layers.Input(shape=(input_shape_A,), dtype='int32') encoded_answer_1 = layers.Embedding(vocab_size, embed_hidden_size)(answer_1) encoded_answer_1 = layers.Dropout(dropout)(encoded_answer_1) encoded_answer_1 = RNN(embed_hidden_size)(encoded_answer_1) encoded_answer_1 = layers.RepeatVector(input_shape_Q)(encoded_answer_1) print(encoded_answer_1) answer_2 = layers.Input(shape=(input_shape_A,), dtype='int32') encoded_answer_2 = layers.Embedding(vocab_size, embed_hidden_size)(answer_2) encoded_answer_2 = layers.Dropout(dropout)(encoded_answer_2) encoded_answer_2 = RNN(embed_hidden_size)(encoded_answer_2) encoded_answer_2 = layers.RepeatVector(input_shape_Q)(encoded_answer_2) print(encoded_answer_2) merged = layers.add([encoded_question, encoded_answer_1, encoded_answer_2]) merged = RNN(rnn_size)(merged) print(merged) merged = layers.Dropout(dropout)(merged) print(merged) preds = layers.Dense(label_size, activation='softmax')(merged) return preds, question, answer_1, answer_2
def build_model(width, cgru_size_1, cgru_size_2, embed_size=256, **params): batch_size = params['batch_size'] input_img = layers.Input(batch_shape=(batch_size, width, width, IMG_CHANNELS)) input_words = layers.Input(batch_shape=(batch_size, MAX_WORDS), dtype='int32') language = layers.Embedding(embed_size, words.VOCABULARY_SIZE)(input_words) language = layers.GRU(embed_size)(language) language_output = layers.Dense(embed_size)(language) # Apply the convolutional layers of VGG16 from keras.applications.vgg16 import VGG16 vgg = VGG16(include_top=False) for layer in vgg.layers: layer.trainable = False # Run a pretrained network x = vgg(input_img) # Broadcast language into every convolutional output shape = map(int, x.shape) language = layers.RepeatVector(shape[1] * shape[2])(language_output) language = layers.Reshape((shape[1], shape[2], embed_size))(language) x = layers.Concatenate()([x, language]) # Statefully scan the image in each of four directions x = SpatialCGRU(x, cgru_size_1) # Stack another one on there x = SpatialCGRU(x, cgru_size_2) # Add language output again! shape = map(int, x.shape) language = layers.RepeatVector(shape[1] * shape[2])(language_output) language = layers.Reshape((shape[1], shape[2], embed_size))(language) x = layers.Concatenate()([x, language]) # Upsample and convolve x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) # Output an RGB image x = layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x) model = models.Model(inputs=[input_img, input_words], outputs=x) model.compile(optimizer='adam', loss='binary_crossentropy', lr=.0001) model.summary() return model
def build_conv_model(det_shape=default_det_shape, n_tracks=1): """Build current iteration of convolutional tracking model. Returns tuple: (full_model, track_pred_model, conv_model, pretrain_layers) where: -full_model is the entire model -track_pred_model is the part that predicts track parameters (excluding covariances) -conv_model is the convolutional part only -pretrain_layers is a list of layers for which trainable=False should be set after training the track-finding portion of the model (if training that part separately)""" pretrain_layers = [] input_layer = layers.Input(shape=(1, det_shape[0], det_shape[1])) layer = layers.Convolution2D(8, 3, 3, border_mode='same')(input_layer) pretrain_layers.append(layer) layer = layers.Activation('relu')(layer) layer = layers.Convolution2D(8, 3, 3, border_mode='same')(layer) pretrain_layers.append(layer) layer = layers.Activation('relu')(layer) layer = layers.MaxPooling2D(pool_size=(2, 2))(layer) layer = layers.Convolution2D(32, 3, 3, border_mode='same')(layer) pretrain_layers.append(layer) layer = layers.Activation('relu')(layer) layer = layers.Convolution2D(32, 3, 3, border_mode='same')(layer) pretrain_layers.append(layer) layer = layers.Activation('relu')(layer) conv_model = models.Model(input=input_layer, output=layer) layer = layers.Flatten()(layer) layer_tracks = layers.Dense(400)(layer) pretrain_layers.append(layer_tracks) layer_tracks = layers.RepeatVector(n_tracks)(layer_tracks) layer_tracks = layers.LSTM(400, return_sequences=True)(layer_tracks) pretrain_layers.append(layer_tracks) output_layer_tracks = layers.TimeDistributed(layers.Dense(2))( layer_tracks) # track parameters pretrain_layers.append(output_layer_tracks) track_pred_model = models.Model(input=input_layer, output=output_layer_tracks) layer_cov = layers.Dense(400)(layer) layer_cov = layers.RepeatVector(n_tracks)(layer_cov) layer_cov = layers.LSTM(400, return_sequences=True)(layer_cov) layer_cov = layers.TimeDistributed(layers.Dense(3))( layer_cov) # track covariance matrix parameters output_layer_cov = layers.Lambda( gauss_likelihood_loss.covariance_from_network_outputs)(layer_cov) output_layer = layers.merge([output_layer_tracks, output_layer_cov], mode='concat', concat_axis=2) full_model = models.Model(input=input_layer, output=output_layer) return full_model, track_pred_model, conv_model, pretrain_layers
def multitask_rnn_2(window_size, n_features): n_in = window_size # define encoder visible = layers.Input(shape=(n_in, n_features)) masked = layers.Masking(mask_value=0.)(visible) encoder = layers.LSTM(128, activation='relu', return_sequences=True)(masked) encoder = layers.LSTM(128, activation='relu')(encoder) # define reconstruction decoder decoder1 = layers.RepeatVector(n_in)(encoder) decoder1 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder1) decoder1 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder1) decoder1 = layers.TimeDistributed(Dense(n_features), name='decoder1_output')(decoder1) # define forecasting decoder pred_hidden = layers.RepeatVector(n_in)(encoder) pred_hidden = layers.LSTM(128, activation='relu', return_sequences=True)(pred_hidden) decoder2 = layers.LSTM(64, activation='relu', return_sequences=True)(pred_hidden) decoder2 = layers.TimeDistributed(Dense(1), name='decoder2_output')(decoder2) # define outcome predictor predictor = layers.LSTM(64, activation='relu')(pred_hidden) predictor = layers.Dense(64, activation='relu')(predictor) predictor = layers.Dense(2, activation='softmax', name='predictor_output')(predictor) # tie it together model = models.Model(inputs=visible, outputs=[decoder1, decoder2, predictor]) model.summary() keras.utils.plot_model(model, show_shapes=True, to_file='multitask_rnn_v3.png') model.compile(optimizer='adam', loss={ 'decoder1_output': 'mse', 'decoder2_output': 'mse', 'predictor_output': 'categorical_crossentropy' }, loss_weights={ 'decoder1_output': args.weight, 'decoder2_output': 1 - args.weight, 'predictor_output': 1 - args.weight }) # model.compile(optimizer='adam', loss='mse') model_predictor = models.Model(inputs=model.inputs, outputs=predictor) return model, model_predictor
def build_model(GRU_SIZE=1024, WORDVEC_SIZE=300, ACTIVATION='relu'): resnet = build_resnet() # Global Image featuers (convnet output for the whole image) input_img_global = layers.Input(shape=IMG_SHAPE) image_global = resnet(input_img_global) image_global = layers.BatchNormalization()(image_global) image_global = layers.Dense(WORDVEC_SIZE / 2, activation=ACTIVATION)(image_global) image_global = layers.BatchNormalization()(image_global) image_global = layers.RepeatVector(MAX_WORDS)(image_global) # Local Image features (convnet output inside the bounding box) input_img_local = layers.Input(shape=IMG_SHAPE) image_local = resnet(input_img_local) image_local = layers.BatchNormalization()(image_local) image_local = layers.Dense(WORDVEC_SIZE / 2, activation=ACTIVATION)(image_local) image_local = layers.BatchNormalization()(image_local) image_local = layers.RepeatVector(MAX_WORDS)(image_local) # Context Vector input # normalized to [0,1] the values: # left, top, right, bottom, (box area / image area) input_ctx = layers.Input(shape=(5, )) ctx = layers.BatchNormalization()(input_ctx) ctx = layers.RepeatVector(MAX_WORDS)(ctx) language_model = models.Sequential() input_words = layers.Input(shape=(MAX_WORDS, ), dtype='int32') language = layers.Embedding(words.VOCABULARY_SIZE, WORDVEC_SIZE, input_length=MAX_WORDS)(input_words) language = layers.BatchNormalization()(language) language = layers.GRU(GRU_SIZE, return_sequences=True)(language) language = layers.BatchNormalization()(language) language = layers.TimeDistributed( layers.Dense(WORDVEC_SIZE, activation=ACTIVATION))(language) language = layers.BatchNormalization()(language) # Problem with Keras 2: # TypeError: Tensors in list passed to 'values' of 'ConcatV2' Op have types [uint8, uint8, bool, uint8] that don't all match. # Masking doesn't work along with concatenation. # How do I get mask_zero=True working in the embed layer? x = layers.concatenate([image_global, image_local, ctx, language]) x = layers.GRU(GRU_SIZE)(x) x = layers.BatchNormalization()(x) x = layers.Dense(words.VOCABULARY_SIZE, activation='softmax')(x) return models.Model( inputs=[input_img_global, input_img_local, input_words, input_ctx], outputs=x)
def make_model(self): self.partial_latent_dim = 256 # self.latent_dim/2 # Similar to HH_RNN inputs = K_layer.Input(shape=(self.timesteps_in, self.input_dim)) reshaped = K_layer.Reshape((self.unit_n, self.unit_t, self.input_dim))(inputs) encode_reshape = K_layer.Reshape((self.unit_n, self.partial_latent_dim)) encode_1 = abs_model.RNN_UNIT(self.partial_latent_dim) encode_2 = abs_model.RNN_UNIT(self.latent_dim) def encode_partials(seq): encoded = [None]*self.unit_n for i in range(self.unit_n): rs = K_layer.Lambda(lambda x: x[:,i], output_shape=(self.unit_t, self.input_dim))(seq) encoded[i] = encode_1(rs) return encode_reshape(K_layer.concatenate(encoded, axis=1)) encoded = encode_partials(reshaped) encoded = encode_2(encoded) z = K_layer.Input(shape=(self.latent_dim,)) decode_euler_1 = K_layer.Dense(self.partial_latent_dim, activation=self.activation) decode_euler_2 = K_layer.Dense(self.output_dim, activation=self.activation) decode_repete = K_layer.RepeatVector(self.timesteps_out) decode_residual_1 = abs_model.RNN_UNIT(self.partial_latent_dim, return_sequences=True, activation=self.activation) decode_residual_2 = abs_model.RNN_UNIT(self.output_dim, return_sequences=True, activation=self.activation) decoded = decode_residual_2(decode_residual_1(decode_repete(encoded))) decoded_ = decode_residual_2(decode_residual_1(decode_repete(z))) self.encoder = Model(inputs, encoded) self.decoder = Model(z, decoded_) self.model = Model(inputs, decoded)
def buildLSTMModel(input_size, max_output_seq_len, hidden_size): model = km.Sequential() layer0 = kl.Masking(mask_value=0, input_shape=(max_output_seq_len, input_size)) model.add(layer0) # print layer0.input_shape, layer0.output_shape layer1 = kl.LSTM(input_dim=input_size, output_dim=hidden_size, return_sequences=False) model.add(layer1) # print layer1.input_shape, layer1.output_shape layer2 = kl.Dense(hidden_size, activation='relu') model.add(layer2) # print layer2.input_shape, layer2.output_shape layer3 = kl.RepeatVector(max_output_seq_len) model.add(layer3) # print layer3.input_shape, layer3.output_shape layer4 = kl.LSTM(hidden_size, return_sequences=True) model.add(layer4) # print layer4.input_shape, layer4.output_shape layer5 = kl.TimeDistributed(kl.Dense(output_dim=1, activation="linear")) model.add(layer5) # print layer5.input_shape, layer5.output_shape model.compile(loss='mse', optimizer='adam') return model
def make_model(self): self.input_dim = self.input_dim - self.name_dim self.output_dim = self.name_dim inputs = K_layer.Input(shape=(self.timesteps, self.input_dim)) reshaped = K_layer.Reshape( (self.unit_n, self.unit_t, self.input_dim))(inputs) encode_reshape = K_layer.Reshape((self.unit_n, self.latent_dim / 2)) encode_1 = abs_model.RNN_UNIT(self.latent_dim / 2) encode_2 = abs_model.RNN_UNIT(self.latent_dim) def encode_partials(seq): encoded = [None] * self.unit_n for i in range(self.unit_n): rs = K_layer.Lambda(lambda x: x[:, i], output_shape=(self.unit_t, self.input_dim))(seq) encoded[i] = encode_1(rs) return encode_reshape(K_layer.concatenate(encoded, axis=1)) encoded = encode_partials(reshaped) encoded = encode_2(encoded) decoded = K_layer.Dense(self.output_dim, activation='sigmoid')(encoded) decoded = K_layer.Lambda(lambda x: K.tf.nn.softmax(x))(decoded) output = K_layer.RepeatVector(self.timesteps)(decoded) self.model = Model(inputs, output) self.encoder = self.model self.decoder = self.model
def get_model(vocab_size, story_maxlen, query_maxlen): RNN = recurrent.LSTM EMBED_HIDDEN_SIZE = 50 BATCH_SIZE = 16 EPOCHS = 400 sentence = layers.Input(shape=(story_maxlen, ), dtype='int32') encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence) encoded_sentence = layers.Dropout(0.3)(encoded_sentence) question = layers.Input(shape=(query_maxlen, ), dtype='int32') encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds = layers.Dense(vocab_size, activation='softmax')(merged) model = Model([sentence, question], preds) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model, BATCH_SIZE, EPOCHS
def decoder2_module(n_in, dim_encoder, n_features, x): # define forecasting decoder x = layers.Dense(dim_encoder, activation='relu')(x) x = layers.RepeatVector(n_in)(x) x = layers.LSTM(dim_encoder, activation='relu', return_sequences=True)(x) x = layers.TimeDistributed(Dense(1))(x) return x
def _build(self, model): """ Create the backend-specific placeholder. """ backend = model.get_backend() if backend.get_name() == 'keras': import keras.layers as L # pylint: disable=import-error yield L.RepeatVector( self.count, name=self.name ) elif backend.get_name() == 'pytorch': def connect(inputs): """ Connects the layer. """ assert len(inputs) == 1 ndim = len(inputs[0]['shape']) sizes = (1, self.count) + (1, )*ndim return { 'shape' : self.shape([inputs[0]['shape']]), 'layer' : model.data.add_operation( lambda x: x.unsqueeze(1).repeat(*sizes) )(inputs[0]['layer']) } yield connect else: raise ValueError( 'Unknown or unsupported backend: {}'.format(backend))
def addPreAttentionLayer(self, merged_input): """Add attention mechanisms to the tensor merged_input. Args: merged_input: 3-dimensional Tensor, where the first dimension corresponds to the batch size, the second to the sequence timesteps and the last one to the concatenation of features. Retruns: 3-dimensional Tensor of the same dimension as merged_input """ activation = self.params.get('attentionActivation', None) if activation == 'None': activation = None feature_vector_size = K.int_shape(merged_input)[-1] att_layer = layers.TimeDistributed( layers.Dense(feature_vector_size, activation=activation), name='attention_matrix_score')(merged_input) # Calculate a single score for each timestep att_layer = layers.Lambda(lambda x: K.mean(x, axis=2), name='attention_vector_score')(att_layer) # Reshape to obtain the same shape as input att_layer = layers.Permute( (2, 1))(layers.RepeatVector(feature_vector_size)(att_layer)) merged_input = layers.multiply([att_layer, merged_input]) return merged_input
def decoder(encoder_layer): dense_r_1 = L.Dense(32, activation='relu', name='merged_decoder_dense1')(encoder_layer) dense_r_2 = L.Dense(64, activation='relu', name='merged_decoder_dense2')(dense_r_1) outputs = [] for name, length in zip(['VL', 'VH'], [VL_LENGTH, VH_LENGTH]): dense_r_3 = L.Dense( 16, activation='relu', name='{}_decoder_dense1'.format(name))(dense_r_2) repeat_vector_r_1 = L.RepeatVector( length, name='{}_decoder_repeatvector1'.format(name))(dense_r_3) rnn_r = RNN(16, return_sequences=True, name='{}_decoder_rnn1'.format(name))(repeat_vector_r_1) output_r = L.Dense(input_dims, name='{}_output'.format(name))(rnn_r) outputs.append(output_r) return outputs
def fn_setup_model(inputs, labels): input_size = len(inputs[0]) label_size = len(labels[0]) HIDDEN_SIZE = 128 # BATCH_SIZE = 128 NUM_OF_HIDDEN_LAYERS = 1 model = Sequential() model.add( layers.LSTM(HIDDEN_SIZE, input_shape=(input_size, len(char_array)))) model.add(layers.RepeatVector(label_size)) model.add(layers.LSTM(HIDDEN_SIZE, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(len(char_array)))) model.add(layers.Activation('softmax')) fn_compile_model(model) model.summary() return model
def build_attn_with_layer(seq, controller, layer, cell_size=300): """ Build attention mechanism in computation graph. """ controller_repeated = layers.RepeatVector(20)(controller) controller_repeated = layer(controller_repeated) attention = layers.Lambda(my_dot, output_shape=(20,))([controller_repeated, seq]) attention_s = layers.Flatten()(attention) attention = layers.Lambda(to_prob, output_shape=(20,))(attention_s) attention_repeated = layers.RepeatVector(cell_size)(attention) attention_repeated = layers.Permute((2, 1))(attention_repeated) weighted = layers.merge([attention_repeated, seq], mode='mul') summed = layers.Lambda(sum_seq, output_shape=(cell_size,))(weighted) return summed, attention
def cnn_2x_encdec_siamese(voc_size, max_len, dropout=0.5): """Two siamese branches, each embedding a statement. Binary classifier on top. Args: voc_size: size of the vocabulary for the input statements. max_len: maximum length for the input statements. Returns: A Keras model instance. """ pivot_input = layers.Input(shape=(max_len, ), dtype='int32') statement_input = layers.Input(shape=(max_len, ), dtype='int32') x = layers.Embedding(output_dim=256, input_dim=voc_size, input_length=max_len)(pivot_input) x = layers.Convolution1D(256, 7, activation='relu')(x) x = layers.MaxPooling1D(3)(x) x = layers.Convolution1D(256, 7, activation='relu')(x) x = layers.MaxPooling1D(5)(x) x = layers.GRU(256)(x) #this acts as the encoder x = layers.RepeatVector(256)( x) #take the last output of GRU and feed it to the decoder embedded_pivot = layers.GRU(256)(x) encoder_model = Model(pivot_input, embedded_pivot) embedded_statement = encoder_model(statement_input) concat = layers.merge([embedded_pivot, embedded_statement], mode='concat') x = layers.Dense(256, activation='tanh')(concat) x = layers.Dropout(dropout)(x) prediction = layers.Dense(1, activation='sigmoid')(x) model = Model([pivot_input, statement_input], prediction) return model
def addPreAttentionLayer(self, merged_input): """Add attention mechanisms to the tensor merged_input. Args: merged_input: 3-dimensional Tensor, where the first dimension corresponds to the batch size, the second to the sequence timesteps and the last one to the concatenation of features. Retruns: 3-dimensional Tensor of the same dimension as merged_input """ activation = self.params.get('attentionActivation', None) if activation == 'None': activation = None feature_vector_size = K.int_shape(merged_input)[-1] merged_input = layers.Permute((2, 1))(merged_input) att_layer = layers.TimeDistributed( layers.Dense(self.max_sentece_length, activation=activation), name='attention_matrix_score')(merged_input) # Calculate a single score for each timestep att_layer = layers.Lambda(lambda x: K.mean(x, axis=1), name='attention_vector_score')(att_layer) # Reshape to obtain the same shape as input att_layer = layers.RepeatVector(feature_vector_size)(att_layer) merged_input = layers.multiply([att_layer, merged_input]) merged_input = layers.Permute((2, 1))(merged_input) # We re add the mask layer after the attention is applied. # Of course we have the risk of masking elements that were zeroed # after the application of the attention scores. merged_input = layers.Masking(mask_value=0.0)(merged_input) return merged_input
def build_decoder(self): """ Build a decoder that transforms latent representations into decoded outputs. """ self.latent_inputs = Input(shape=(self.latent_dim, ), name='Vhod_v_dekodirnik') x = layers.Dense(self.latent_dim, activation='tanh', name="Polno_povezan_sloj_2")(self.latent_inputs) x = layers.Dropout(self.dropout_rate_mid, name="Izpustveni_sloj_2")(x) x = layers.BatchNormalization(axis=-1, name="Paketna_normalizacija_5")(x) x = layers.RepeatVector(self.compound_length, name="Ponovi_vektor")(x) x = layers.GRU(self.gru_size, activation='tanh', return_sequences=True, name="GRU_sloj_1")(x) x = layers.GRU(self.gru_size, activation='tanh', return_sequences=True, name="GRU_sloj_2")(x) x = layers.GRU(self.gru_size, activation='tanh', return_sequences=True, name="GRU_sloj_3")(x) x = layers.GRU(self.charset_length, return_sequences=True, activation='softmax')(x) self.outputs = x
def GCAE(param): filter_sizes = [2, 3, 4] num_filters = 128 inp1 = layers.Input(shape=(param['sentence_len'], )) x1 = layers.Embedding(input_dim=param['vocab_size'], output_dim=param['embed_size'])(inp1) # x1 = layers.SpatialDropout1D(rate = 0.2)(x1) inp2 = layers.Input(shape=(1, )) x2 = layers.Embedding(input_dim=param['num_subject'], output_dim=param['embed_size'])(inp2) x_fla = layers.Flatten()(x2) maxpool_pool = [] for filter_size in filter_sizes: conv1 = layers.Conv1D(num_filters, kernel_size=filter_size, activation='tanh')(x1) conv2 = layers.Conv1D(num_filters, kernel_size=filter_size, activation=None)(x1) x2 = layers.RepeatVector(param['sentence_len'] - filter_size + 1)(x_fla) conv = Aspect_conv()([conv1, conv2, x2]) maxpool_pool.append(layers.GlobalMaxPooling1D()(conv)) z = layers.Concatenate(axis=1)(maxpool_pool) z = layers.Dropout(0.1)(z) outp = layers.Dense(param['num_class'], activation='softmax')(z) model = Model(inputs=[inp1, inp2], outputs=outp) optimizer = optimizers.Adam() model.compile(loss='categorical_crossentropy', optimizer=optimizer) return model
def build_model(self, vocab_users: WordTable, vocab_comps: WordTable): RNN = recurrent.LSTM USER_MAXLEN = self.user_maxlen COMP_MAXLEN = self.comp_maxlen EMBED_HIDDEN_SIZE = self.embed_hidden_size user = layers.Input(shape=(USER_MAXLEN, ), dtype=np.float32) encoded_user = layers.Embedding(vocab_users.vocab_size(), EMBED_HIDDEN_SIZE)(user) # 罚重用户上下文对其产生的影响,使推荐结果更有可能推荐其他用户使用的构件 encoded_user = layers.Dropout(0.85)(encoded_user) comp = layers.Input(shape=(COMP_MAXLEN, ), dtype=np.float32) encoded_comp = layers.Embedding(vocab_comps.vocab_size(), EMBED_HIDDEN_SIZE)(comp) encoded_comp = RNN(EMBED_HIDDEN_SIZE)(encoded_comp) encoded_comp = layers.RepeatVector(USER_MAXLEN)(encoded_comp) merged = layers.add([encoded_user, encoded_comp]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) preds = layers.Dense(vocab_comps.vocab_size(), activation='softmax')(merged) model = Model([user, comp], preds) model.compile(optimizer=keras.optimizers.Adam(lr=self.lr), loss=keras.losses.categorical_crossentropy, metrics=['accuracy']) return model
def build_bidirectional_model(num_tokens): # Try replacing GRU, or SimpleRNN. # RNN = layers.LSTM HIDDEN_SIZE = 128 LAYERS = 3 print('Build model...') model = Sequential() model.add( Bidirectional(LSTM(HIDDEN_SIZE), input_shape=(MAXLEN, num_tokens))) model.add(layers.RepeatVector(MAXLEN)) for _ in range(LAYERS): model.add( Bidirectional(layers.LSTM(HIDDEN_SIZE, return_sequences=True))) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add(layers.TimeDistributed(layers.Dense(num_tokens))) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def build_model(self): embedding_layer = Embedding(prep.vocab_size, prep.EMBEDDING_DIM, weights = self.embedding_matrix) sentence = layers.Input(shape = (self.story_maxlen,), dtype="int32") encoded_sentence = embedding_layer(sentence) encoded_sentence = layers.Dropout(0.3)(encoded_sentence) question = layers.Input(shape = (self.query_maxlen,), dtype="int32") encoded_question = embedding_layer(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = RNN(self.EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(self.story_maxlen)(encoded_question) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds_beg = layers.Dense(story_maxlen+3, activation='softmax')(merged) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds_end = layers.Dense(story_maxlen+3, activation='softmax')(merged) model = Model([sentence, question], [preds_beg, preds_end]) model = Model([sentence, question], [preds_beg, preds_end]) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model
def model_build(DIGITS, MAXLEN, chars, checkpoint=''): RNN = layers.LSTM HIDDEN_SIZE = 128 LAYERS = 1 model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)))) # As the decoder RNN's input, repeatedly provide with the last output of # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum # length of output, e.g., when DIGITS=3, max output is 999+999=1998. model.add(layers.RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single layer. for _ in range(LAYERS): # By setting return_sequences to True, return not only the last output but # all the outputs so far in the form of (num_samples, timesteps, # output_dim). This is necessary as TimeDistributed in the below expects # the first dimension to be the timesteps. model.add(RNN(HIDDEN_SIZE, return_sequences=True)) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add( layers.TimeDistributed(layers.Dense(len(chars), activation='softmax'))) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if checkpoint != '': model.load_weights(checkpoint) return model
def test_sequential_model_pickling(): model = keras.Sequential() model.add(layers.Dense(2, input_shape=(3,))) model.add(layers.RepeatVector(3)) model.add(layers.TimeDistributed(layers.Dense(3))) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) state = pickle.dumps(model) new_model = pickle.loads(state) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05)
def create_LSTM(self, x_train, y_train, x_val, y_val): model = Sequential() model.add( RNN(HIDDEN_SIZE, input_shape=(self.MAX_SEQUENCE_LENGTH, len(self.labels_index)))) model.add(layers.RepeatVector(len(self.labels_index))) for _ in range(5): model.add(RNN(HIDDEN_SIZE, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(128))) model.add(layers.Activation('softmax')) #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.compile(loss='categorical_crossentropy', optimizer=md.OPTIMIZER_PROP, metrics=['acc']) model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=self.EPOCHS, batch_size=self.BATCH_SIZE)
def create_model(MAXLEN, LAYERS, ENCODE_LENGTH, HIDDEN_SIZE): RNN = layers.GRU print('Build model...') # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). input_tensor = layers.Input(shape=(MAXLEN, ENCODE_LENGTH)) mask_layer = layers.Masking(mask_value=0.0)(input_tensor) hid_layer = RNN(HIDDEN_SIZE, activation='relu')(mask_layer) hid_layer = layers.Dense(HIDDEN_SIZE)(hid_layer) hid_layer = layers.RepeatVector(MAXLEN)(hid_layer) # As the decoder RNN's input, repeatedly provide with the last hidden state of # RNN for each time step. #model.add(layers.RepeatVector(MAXLEN)) # The decoder RNN could be multiple layers stacked or a single layer. for _ in range(LAYERS): # By setting return_sequences to True hid_layer = RNN(HIDDEN_SIZE, activation='tanh', return_sequences=True)(hid_layer) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. hid_layer = layers.Dense(HIDDEN_SIZE)(hid_layer) #linear_mapping = layers.TimeDistributed(layers.Dense(ENCODE_LENGTH))(hid_layer) linear_mapping = layers.Dense(ENCODE_LENGTH)(hid_layer) pred = layers.Activation('sigmoid')(linear_mapping) model = Model(inputs=input_tensor, outputs=pred) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model