def inception_resnet_v2_A(x): shortcut = x a = Convolution2D(32//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) b = Convolution2D(32//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) b = Convolution2D(32//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(b) c = Convolution2D(32//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) c = Convolution2D(48//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(c) c = Convolution2D(64//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(c) x = merge([a, b, c], mode='concat', concat_axis=-1) x = Convolution2D(384//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='linear', init='he_normal', border_mode='same', dim_ordering='tf')(x) x = merge([shortcut, x], mode='sum') x = Activation('relu')(x) return x
def unet_model_3d(): inputs = Input(config["input_shape"]) conv1 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(inputs) conv1 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(conv1) pool1 = MaxPooling3D(pool_size=config["pool_size"])(conv1) conv2 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(pool1) conv2 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(conv2) pool2 = MaxPooling3D(pool_size=config["pool_size"])(conv2) conv3 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(pool2) conv3 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(conv3) pool3 = MaxPooling3D(pool_size=config["pool_size"])(conv3) conv4 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(pool3) conv4 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(conv4) pool4 = MaxPooling3D(pool_size=config["pool_size"])(conv4) conv5 = Conv3D(512, 3, 3, 3, activation='relu', border_mode='same')(pool4) conv5 = Conv3D(512, 3, 3, 3, activation='relu', border_mode='same')(conv5) up6 = merge([UpSampling3D(size=config["pool_size"])(conv5), conv4], mode='concat', concat_axis=1) conv6 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(up6) conv6 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(conv6) up7 = merge([UpSampling3D(size=config["pool_size"])(conv6), conv3], mode='concat', concat_axis=1) conv7 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(up7) conv7 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(conv7) up8 = merge([UpSampling3D(size=config["pool_size"])(conv7), conv2], mode='concat', concat_axis=1) conv8 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(up8) conv8 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(conv8) up9 = merge([UpSampling3D(size=config["pool_size"])(conv8), conv1], mode='concat', concat_axis=1) conv9 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(up9) conv9 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(conv9) conv10 = Conv3D(config["n_labels"], 1, 1, 1)(conv9) act = Activation('sigmoid')(conv10) model = Model(input=inputs, output=act) model.compile(optimizer=Adam(lr=config["initial_learning_rate"]), loss=dice_coef_loss, metrics=[dice_coef]) return model
def _build_trending(self, phase): prior_input = merge([self.x_tm1, self.z_tm1], mode="concat") rnn_prior = RecurrentLayer(self.n_hidden_recurrent, return_sequences=True, stateful=(phase == Phases.predict), consume_less='gpu')(prior_input) rnn_rec_mu = TimeDistributed( Dense(self.latent_dim, activation='linear'))(rnn_prior) rnn_rec_sigma = TimeDistributed( Dense(self.latent_dim, activation="softplus"))(rnn_prior) return merge([rnn_rec_mu, rnn_rec_sigma], mode="concat")
def inception_resnet_v2_B(x): shortcut = x a = Convolution2D(192 // nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) b = Convolution2D(128 // nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) b = Convolution2D(160 // nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(b) b = Convolution2D(192 // nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(b) x = merge([a, b], mode='concat', concat_axis=-1) x = Convolution2D(1154 // nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='linear', init='he_normal', border_mode='same', dim_ordering='tf')(x) x = merge([shortcut, x], mode='sum') x = Activation('relu')(x) return x
def constraint_lstm(timesteps, num_features, num_pitches, num_units_lstm, dropout_prob=0.2): input_seq = Input((timesteps, num_features), name='input_seq') constraint = Input((timesteps, num_features + 1), name='constraint') repr_input = input_seq repr_constraint = constraint repr_constraint = LSTM(num_units_lstm, return_sequences=True)(repr_constraint) repr_constraint = LSTM(num_units_lstm, return_sequences=False)(repr_constraint) tiled_constraint = Reshape((1, num_units_lstm))(repr_constraint) # todo timesteps en dur.. # only info at step one tiled_constraint = Lambda(lambda x: K.concatenate( (K.concatenate([x, K.zeros_like(x)[:, :, 0:1]], axis=2), K.tile( K.concatenate( [K.zeros_like(x), K.ones_like(x)[:, :, 0:1]], axis=2), (1, 16 - 1, 1))), axis=1))(tiled_constraint) repr_input = merge([repr_input, tiled_constraint], mode='concat', concat_axis=2) repr_input = LSTM(num_units_lstm, return_sequences=True)(repr_input) repr_input = LSTM(num_units_lstm, return_sequences=False)(repr_input) hidden_repr = merge([repr_input, repr_constraint], mode='concat') # NN hidden_repr = Dense(num_units_lstm, activation='relu')(hidden_repr) hidden_repr = Dense(num_pitches)(hidden_repr) preds = Activation('softmax', name='label')(hidden_repr) model = Model(input=[input_seq, constraint], output=preds) model.compile(optimizer='adam', loss={'label': 'categorical_crossentropy'}, metrics=['accuracy']) return model
def __init__(self, hidden_LSTM, hidden_MLP1, hidden_MLP2): parent = Input(shape=(hidden_LSTM * 2, ), name='parent') head = Input(shape=(hidden_LSTM * 2, ), name='head') tail = Input(shape=(hidden_LSTM * 2, ), name='tail') input_MLP = merge([parent, head, tail], mode='concat', name='input_MLP') h0 = Dense(input_dim=hidden_LSTM * 6, output_dim=hidden_MLP1, activation='linear', name='h0')(input_MLP) h1 = Dense(input_dim=hidden_MLP1, output_dim=hidden_MLP2, activation='tanh', name='h1')(h0) output = Dense(input_dim=hidden_MLP2, output_dim=49, activation='softmax', name='output')(h1) self.__model = Model(input=[parent, head, tail], output=output) self.__model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
def __init__(self, hidden_LSTM, hidden_MLP): s2v = Input(shape=(None, 300), name='sentence2vec') ms1 = Input(shape=(None, ), dtype=tf.bool, name='mask_stack_1') ms2 = Input(shape=(None, ), dtype=tf.bool, name='mask_stack_2') mb = Input(shape=(None, ), dtype=tf.bool, name='mask_buffer') lstm = Bidirectional(LSTM(input_dim=300, output_dim=hidden_LSTM, return_sequences=True, name='lstm'), merge_mode='concat', name='bi')(s2v) stack1 = Lambda(lambda x: tf.boolean_mask(x, ms1), name='stack1')(lstm) stack2 = Lambda(lambda x: tf.boolean_mask(x, ms2), name='stack2')(lstm) buffer = Lambda(lambda x: tf.boolean_mask(x, mb), name='buffer')(lstm) input_MLP = merge([stack1, stack2, buffer], mode='concat', name='input_MLP') h0 = Dense(input_dim=hidden_LSTM * 6, output_dim=hidden_MLP, activation='tanh', name='h0')(input_MLP) output = Dense(input_dim=hidden_MLP, output_dim=3, activation='softmax', name='output')(h0) self.__model = Model(input=[s2v, ms1, ms2, mb], output=output) self.__model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
def weighted_states(activations, rnn_size, input_length, attention="single"): if attention == "all": attention = Flatten()(activations) attention = Dense(input_length, activation='tanh')(attention) attention = Activation('softmax')(attention) attention = RepeatVector(rnn_size)(attention) attention = Permute([2, 1])(attention) return merge([activations, attention], mode='mul') elif attention == "single": attention = TimeDistributed(Dense(1, activation='tanh'))(activations) # attention = Dense(1, activation='tanh')(activations) attention = Flatten()(attention) attention = Activation('softmax')(attention) attention = RepeatVector(rnn_size)(attention) attention = Permute([2, 1])(attention) return merge([activations, attention], mode='mul')
def cnn_multi_filters(wv, sent_length, nfilters, nb_filters, **kwargs): noise = kwargs.get("noise", 0) trainable = kwargs.get("trainable", False) drop_text_input = kwargs.get("drop_text_input", 0.) activity_l2 = kwargs.get("activity_l2", 0.) input_text = Input(shape=(sent_length,), dtype='int32') emb_text = embeddings_layer(max_length=sent_length, embeddings=wv, trainable=trainable, masking=False)(input_text) emb_text = GaussianNoise(noise)(emb_text) emb_text = Dropout(drop_text_input)(emb_text) pooling_reps = [] for i in nfilters: feat_maps = Convolution1D(nb_filter=nb_filters, filter_length=i, border_mode="valid", activation="relu", subsample_length=1)(emb_text) pool_vecs = GlobalMaxPooling1D()(feat_maps) pooling_reps.append(pool_vecs) representation = merge(pooling_reps, mode='concat') probabilities = Dense(3, activation='softmax', activity_regularizer=l2(activity_l2))(representation) model = Model(input=input_text, output=probabilities) model.compile(optimizer="adam", loss='categorical_crossentropy') return model
def residual_block(x, nb_filters=16, subsample_factor=1): prev_nb_channels = K.int_shape(x)[4] if subsample_factor > 1: subsample = (subsample_factor, subsample_factor, subsample_factor) # shortcut: subsample + zero-pad channel dim shortcut = MaxPooling3D(pool_size=subsample)(x) else: subsample = (1, 1, 1) # shortcut: identity shortcut = x if nb_filters > prev_nb_channels: shortcut = Lambda(zero_pad_channels, arguments={ 'pad': nb_filters - prev_nb_channels})(shortcut) y = BatchNormalization(axis=4)(x) y = Activation('relu')(y) y = Convolution3D(nb_filters, 3, 3, 3, subsample=subsample, init='he_normal', border_mode='same')(y) y = BatchNormalization(axis=4)(y) y = Activation('relu')(y) y = Convolution3D(nb_filters, 3, 3, 3, subsample=(1, 1, 1), init='he_normal', border_mode='same')(y) out = merge([y, shortcut], mode='sum') return out
def test_merge_model_model_concat(self): input_data1 = np.random.random_sample([2, 4]) input_data2 = np.random.random_sample([2, 3]) input1 = Input((4, )) input2 = Input((3, )) out1 = Dense(4)(input1) out1_1 = Dense(4)(out1) out2 = Dense(3)(input2) out2_1 = Dense(3)(out2) branch1 = Model(input=[input1], output=out1_1) branch2 = Model(input=[input2], output=out2_1) branch1_tensor = branch1(input1) branch2_tensor = branch2(input2) from keras.engine import merge m = merge([branch1_tensor, branch2_tensor], mode="concat", concat_axis=1) kmodel = Model(input=[input1, input2], output=m) self.modelTest([input_data1, input_data2], kmodel, random_weights=False, dump_weights=True, is_training=False)
def inception_v4_B(x, nb_filters_reduction_factor=8): a = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering='tf')(x) a = Convolution2D(128//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(a) b = Convolution2D(384//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) c = Convolution2D(192//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) c = Convolution2D(224//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(c) c = Convolution2D(256//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(c) d = Convolution2D(192//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) d = Convolution2D(192//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(d) d = Convolution2D(224//nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(d) d = Convolution2D(224//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(d) d = Convolution2D(256//nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(d) x = merge([a, b, c, d], mode='concat', concat_axis=-1) return x
def test_merge_model_model_concat(self): input_data1 = np.random.random_sample([2, 4]) input_data2 = np.random.random_sample([2, 3]) input1 = Input((4,)) input2 = Input((3,)) out1 = Dense(4)(input1) out1_1 = Dense(4)(out1) out2 = Dense(3)(input2) out2_1 = Dense(3)(out2) branch1 = Model(input=[input1], output=out1_1) branch2 = Model(input=[input2], output=out2_1) branch1_tensor = branch1(input1) branch2_tensor = branch2(input2) from keras.engine import merge m = merge([branch1_tensor, branch2_tensor], mode="concat", concat_axis=1) kmodel = Model(input=[input1, input2], output=m) self.modelTest([input_data1, input_data2], kmodel, random_weights=False, dump_weights=True, is_training=False)
def identity_block(input_tensor, kernel_size, nb_filter, stage, block, subsumpling=False): if K.image_dim_ordering() == 'tf': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + '_' + block + '_branch' bn_name_base = 'bn' + str(stage) + '_' + block + '_branch' if (subsumpling): x = Convolution2D(nb_filter, kernel_size, kernel_size, border_mode='same', subsample=(2, 2), name=conv_name_base + '2a')(input_tensor) else: x = Convolution2D(nb_filter, kernel_size, kernel_size, border_mode='same', name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Convolution2D(nb_filter, kernel_size, kernel_size, border_mode='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) if (subsumpling): x1 = Convolution2D(nb_filter, 1, 1, border_mode='same', subsample=(2, 2), name=conv_name_base + '2c')(input_tensor) x = merge([x, x1], mode='sum') else: x = merge([x, input_tensor], mode='sum') x = Activation('relu')(x) return x
def get_entity_masking_model(len_embedding, data, filter_sizes): num_class = 3 num_filters = 200 embedding_dim = len_embedding size_voca = len(data.idx2vect) num_entity = len(set(data.keyword2idx.keys())) eew = [numpy.random.uniform(-0.01, 0.01, size=(num_entity, entity_embed_length))] sent_input = Input(shape=(len_sentence,), dtype='int32', name='sent_level_input') ei_input = Input(shape=(1,), name='entity_indicator_input') sent_x = Embedding(size_voca, embedding_dim, input_length=len_sentence, weights=[data.idx2vect])(sent_input) ei_emb = Embedding(num_entity, entity_embed_length, input_length=1, weights=eew)(ei_input) ei_emb = Reshape([entity_embed_length])(ei_emb) sent_x = Dropout(0.5, input_shape=(len_sentence, embedding_dim))(sent_x) ei_emb = Dropout(0.5, input_shape=(1, entity_embed_length))(ei_emb) multiple_filter_output= [] for i in xrange(len(filter_sizes)): conv = Convolution1D(nb_filter=num_filters, filter_length= filter_sizes[i], border_mode='valid', bias=True, activation='relu', subsample_length=1)(sent_x) pool = MaxPooling1D(pool_length = len_sentence - filter_sizes[i] + 1)(conv) multiple_filter_output.append(Flatten()(pool)) if len(filter_sizes) == 1: text_feature = multiple_filter_output[0] else: text_feature = merge(multiple_filter_output, mode = 'concat') # text features from CNN text_ei_feature = merge([text_feature, ei_emb], mode='concat') text_ei_feature = Dropout(0.5)(text_ei_feature) sent_loss = Dense(num_class, activation='softmax', name='sent_level_output')(text_ei_feature) adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, clipnorm=l2value) model = Model(input=[sent_input, ei_input], output=sent_loss) # TODO : take multiple inputs model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=adadelta) return model
def _build(self, phase, seq_shape=None, batch_size=None): if phase == Phases.train: x_t = Input(shape=(seq_shape, self.data_dim), name="stornREC_input_train", dtype="float32") else: x_t = Input(batch_shape=(batch_size, 1, self.data_dim), name="stornREC_input_predict", dtype="float32") # Recognition model # Fix of keras/engine/topology.py required for masked layer! # Otherwise concat with masked and non masked layer returns an error! # recogn_input = Masking()(x_t) # Unmasked Layer recogn_input = x_t for i in range(self.n_deep): recogn_input = TimeDistributed( Dense(self.n_hidden_dense, activation=self.activation))(recogn_input) if self.dropout != 0.0: recogn_input = Dropout(self.dropout)(recogn_input) recogn_rnn = RecurrentLayer(self.n_hidden_recurrent, return_sequences=True, stateful=(phase == Phases.predict), consume_less='gpu')(recogn_input) recogn_map = recogn_rnn for i in range(self.n_deep): recogn_map = TimeDistributed( Dense(self.n_hidden_dense, activation=self.activation))(recogn_map) if self.dropout != 0: recogn_map = Dropout(self.dropout)(recogn_map) recogn_mu = TimeDistributed(Dense(self.latent_dim, activation='linear'))(recogn_map) recogn_sigma = TimeDistributed( Dense(self.latent_dim, activation="softplus"))(recogn_map) recogn_stats = merge([recogn_mu, recogn_sigma], mode='concat') # sample z from the distribution in X z_t = TimeDistributed( LambdaWithMasking( STORNRecognitionModel.do_sample, output_shape=STORNRecognitionModel.sample_output_shape, arguments={ 'batch_size': (None if (phase == Phases.train) else batch_size), 'dim_size': self.latent_dim }))(recogn_stats) return recogn_stats, x_t, z_t
def upconv2_2(self, input, concat_tensor, no_features): out_shape = [dim.value for dim in concat_tensor.get_shape()] up_conv = Deconvolution2D(no_features, 5, 5, out_shape, subsample=(2, 2))(input) # up_conv = Convolution2D(no_features, 2, 2)(UpSampling2D()(input)) merged = merge([concat_tensor, up_conv], mode='concat', concat_axis=3) return merged
def get_model( data_path, #Path to dataset hid_dim, #Dimension of the hidden GRU layers optimizer='rmsprop', #Optimization function to be used loss='categorical_crossentropy' #Loss function to be used ): metadata_dict = {} f = open(os.path.join(data_path, 'metadata', 'metadata.txt'), 'r') for line in f: entry = line.split(':') metadata_dict[entry[0]] = int(entry[1]) f.close() story_maxlen = metadata_dict['input_length'] query_maxlen = metadata_dict['query_length'] vocab_size = metadata_dict['vocab_size'] entity_dim = metadata_dict['entity_dim'] embed_weights = np.load(os.path.join(data_path, 'metadata', 'weights.npy')) word_dim = embed_weights.shape[1] ########## MODEL ############ story_input = Input(shape=(story_maxlen,), dtype='int32', name="StoryInput") x = Embedding(input_dim=vocab_size+2, output_dim=word_dim, input_length=story_maxlen, mask_zero=True, weights=[embed_weights])(story_input) query_input = Input(shape=(query_maxlen,), dtype='int32', name='QueryInput') x_q = Embedding(input_dim=vocab_size+2, output_dim=word_dim, input_length=query_maxlen, mask_zero=True, weights=[embed_weights])(query_input) concat_embeddings = masked_concat([x_q, x], concat_axis=1) lstm = GRU(hid_dim, consume_less='gpu')(concat_embeddings) reverse_lstm = GRU(hid_dim, consume_less='gpu', go_backwards=True)(concat_embeddings) merged = merge([lstm, reverse_lstm], mode='concat') result = Dense(entity_dim, activation='softmax')(merged) model = Model(input=[story_input, query_input], output=result) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) print(model.summary()) return model
def inception_resnet_v2_stem(x): # in original inception-resnet-v2, conv stride is 2 x = Convolution2D(32//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(x) x = Convolution2D(32//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(x) x = Convolution2D(64//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) # in original inception-resnet-v2, stride is 2 a = MaxPooling2D((3, 3), strides=(1, 1), border_mode='valid', dim_ordering='tf')(x) # in original inception-resnet-v2, conv stride is 2 b = Convolution2D(96//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(x) x = merge([a, b], mode='concat', concat_axis=-1) a = Convolution2D(64//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) a = Convolution2D(96//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(a) b = Convolution2D(64//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) b = Convolution2D(64//nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(b) b = Convolution2D(64//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(b) b = Convolution2D(96//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(b) x = merge([a, b], mode='concat', concat_axis=-1) # in original inception-resnet-v2, conv stride should be 2 a = Convolution2D(192//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(x) # in original inception-resnet-v2, stride is 2 b = MaxPooling2D((3, 3), strides=(1, 1), border_mode='valid', dim_ordering='tf')(x) x = merge([a, b], mode='concat', concat_axis=-1) x = Activation('relu')(x) return x
def build_net_DiagLSTM(self, load_weights = False): img = Input(batch_shape=(10, self.img_channels, self.img_rows, self.img_cols), name='input_img') model_in = MaskedConvolution2D(self.h,7,7,mask_type='a', direction='Right', border_mode='same', init='he_uniform')(img) for _ in range(12): model_LSTM_F = DiagLSTM(self.h_2,3, return_sequences=True, init='he_uniform', inner_init='he_uniform', direction='Right')(model_in) model_LSTM_B = DiagLSTM(self.h_2,3, return_sequences=True, init='he_uniform', inner_init='he_uniform', direction='Right', reverse=True)(model_in) model_LSTM = merge([model_LSTM_F, model_LSTM_B], mode='sum') model_per = Convolution2D(self.h,1,1, init='he_normal')(model_LSTM) model_in = merge([model_in, model_per], mode='sum') model_out = MaskedConvolution2D(self.h,1,1,mask_type='b', direction='Right', border_mode='same', activation='relu', init='he_uniform')(model_in) model_out = MaskedConvolution2D(256*3,1,1,mask_type='b', direction='Right', border_mode='same', activation='relu', init='he_uniform')(model_out) Red = GetColors(0)(model_out) Green = GetColors(1)(model_out) Blue = GetColors(2)(model_out) Red_out = SoftmaxLayer(name='Red_out')(Red) Green_out = SoftmaxLayer(name='Green_out')(Green) Blue_out = SoftmaxLayer(name='Blue_out')(Blue) Col_Model = Model(img, [Red_out, Green_out, Blue_out]) if load_weights: Col_Model.load_weights('Data/comp_model.h5') print("Compiling...") Col_Model.compile(optimizer=self.optimizer, loss={'Red_out': image_categorical_crossentropy, 'Green_out': image_categorical_crossentropy, 'Blue_out': image_categorical_crossentropy}, metrics={'Red_out': 'accuracy', 'Green_out': 'accuracy', 'Blue_out': 'accuracy'}) self.comp_net = Col_Model
def inception_resnet_v2_reduction_A(x): a = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid', dim_ordering='tf')(x) b = Convolution2D(384//nb_filters_reduction_factor, 3, 3, subsample=(2, 2), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(x) c = Convolution2D(256//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(x) c = Convolution2D(256//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu', init='he_normal', border_mode='same', dim_ordering='tf')(c) c = Convolution2D(384//nb_filters_reduction_factor, 3, 3, subsample=(2, 2), activation='relu', init='he_normal', border_mode='valid', dim_ordering='tf')(c) x = merge([a, b, c], mode='concat', concat_axis=-1) return x
def test_merge_method_cos(self): input_data1 = np.random.random_sample([2, 4]) input_data2 = np.random.random_sample([2, 4]) input1 = Input((4,)) input2 = Input((4,)) out1 = Dense(4)(input1) out2 = Dense(4)(input2) from keras.engine import merge m = merge([out1, out2], mode="cos", dot_axes=1) kmodel = Model(input=[input1, input2], output=m) self.modelTest([input_data1, input_data2], kmodel, random_weights=False, dump_weights=True, is_training=False)
def test_merge_method_cos(self): input_data1 = np.random.random_sample([2, 4]) input_data2 = np.random.random_sample([2, 4]) input1 = Input((4, )) input2 = Input((4, )) out1 = Dense(4)(input1) out2 = Dense(4)(input2) from keras.engine import merge m = merge([out1, out2], mode="cos", dot_axes=1) kmodel = Model(input=[input1, input2], output=m) self.modelTest([input_data1, input_data2], kmodel, random_weights=False, dump_weights=True, is_training=False)
def __init__(self, word_vec, word_to_index, index_to_word, classes, title_output=128, content_output=512, dense_neurons=(1024, 256,), title_len=50, content_len=2000, weights=None, directory='.'): self.directory = directory self.word_to_index = word_to_index self.index_to_word = index_to_word self.title_len = title_len self.content_len = content_len self.word_vec = word_vec self.classes = classes self.title_output = title_output self.content_output = content_output self.dense_neurons = dense_neurons # Encode document's title title_inp = Input(shape=(title_len,), name='Title_Input') title_embed = Embedding(input_dim=np.size(word_vec, 0), output_dim=np.size(word_vec, 1), weights=[word_vec], mask_zero=True, name='Title_Embedding') self.t_encoder = Sequential(name='Title_Encoder') self.t_encoder.add(title_embed) self.t_encoder.add(GRU(title_output, name='Title_GRU', consume_less='mem')) title_vec = self.t_encoder(title_inp) # Encode document's content content_inp = Input(shape=(content_len,), name='Content_Input') content_embed = Embedding(input_dim=np.size(word_vec, 0), output_dim=np.size(word_vec, 1), weights=[word_vec], mask_zero=True, name='Content_Embedding') self.c_encoder = Sequential(name='Content_Encoder') self.c_encoder.add(content_embed) self.c_encoder.add(GRU(content_output, name='Content_GRU', consume_less='mem')) content_vec = self.c_encoder(content_inp) # Merge vectors to create output doc_vec = merge(inputs=[title_vec, content_vec], mode='concat') # Decode using dense layers self.decoder = Sequential(name='Decoder') self.decoder.add(Dense(dense_neurons[0], input_shape=(title_output + content_output,), name='Dense_0', activation='hard_sigmoid')) for i, n in enumerate(dense_neurons[1:]): self.decoder.add(Dense(n, activation='hard_sigmoid', name='Dense_%s' % (i + 1))) self.decoder.add(Dense(len(classes), activation='softmax', name='Dense_Output')) output = self.decoder(doc_vec) self.model = Model(input=[title_inp, content_inp], output=output, name='Model') if weights is not None: self.model.load_weights(weights)
def get2way_model(len_embedding, len_sentence, idx2vect, filter_sizes): dropout_prob = (0.1, 0.3) num_filters = 10 hidden_dims = 10 embedding_dim = len_embedding size_voca = len(idx2vect) sent_input = Input(shape=(len_sentence,), dtype='int32', name='sent_level_input') sent_x = Embedding(size_voca, embedding_dim, input_length=len_sentence, weights=[idx2vect])(sent_input) sent_x = Dropout(dropout_prob[0], input_shape=(len_sentence, embedding_dim))(sent_x) multiple_filter_output= [] for i in xrange(len(filter_sizes)): conv = Convolution1D(nb_filter=num_filters, filter_length= filter_sizes[i], border_mode='valid', bias=True, activation='relu', subsample_length=1)(sent_x) pool = MaxPooling1D(pool_length = len_sentence - filter_sizes[i] + 1)(conv) multiple_filter_output.append(Flatten()(pool)) if len(filter_sizes) == 1: sent_v = multiple_filter_output[0] else: sent_v = merge(multiple_filter_output, mode = 'concat') sent_v = Dense(hidden_dims)(sent_v) sent_v = Dropout(dropout_prob[1])(sent_v) sent_v = Activation('relu')(sent_v) sent_loss = Dense(2, activation='softmax', name='sent_level_output')(sent_v) adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=l2value) model = Model(input=sent_input, output=sent_loss) model.compile(loss='categorical_crossentropy', metrics=['accuracy', 'fmeasure'], optimizer=adadelta) return model
def test_learning_phase(): a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') a_2 = Dense(16, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) assert dp.uses_learning_phase assert not a_2._uses_learning_phase assert b_2._uses_learning_phase # test merge m = merge([a_2, b_2], mode='concat') assert m._uses_learning_phase # Test recursion model = Model([a, b], [a_2, b_2]) print(model.input_spec) assert model.uses_learning_phase c = Input(shape=(32, ), name='input_c') d = Input(shape=(32, ), name='input_d') c_2, b_2 = model([c, d]) assert c_2._uses_learning_phase assert b_2._uses_learning_phase # try actually running graph fn = K.function(model.inputs + [K.learning_phase()], model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs_no_dp = fn([input_a_np, input_b_np, 0]) fn_outputs_dp = fn([input_a_np, input_b_np, 1]) # output a: nothing changes assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum() # output b: dropout applied assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
def test_learning_phase(): a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') a_2 = Dense(16, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) assert dp.uses_learning_phase assert not a_2._uses_learning_phase assert b_2._uses_learning_phase # test merge m = merge([a_2, b_2], mode='concat') assert m._uses_learning_phase # Test recursion model = Model([a, b], [a_2, b_2]) print(model.input_spec) assert model.uses_learning_phase c = Input(shape=(32,), name='input_c') d = Input(shape=(32,), name='input_d') c_2, b_2 = model([c, d]) assert c_2._uses_learning_phase assert b_2._uses_learning_phase # try actually running graph fn = K.function(model.inputs + [K.learning_phase()], model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs_no_dp = fn([input_a_np, input_b_np, 0]) fn_outputs_dp = fn([input_a_np, input_b_np, 1]) # output a: nothing changes assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum() # output b: dropout applied assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
def countdown_constraint_lstm(timesteps, num_features, num_pitches, num_units_lstm, dropout_prob=0.2): input_seq = Input((timesteps, num_features), name='input_seq') constraint = Input((timesteps, num_features + 1), name='constraint') countdown = Input((timesteps, timesteps), name='countdown') repr_input = input_seq repr_constraint = constraint repr_constraint = LSTM(num_units_lstm, return_sequences=True)(repr_constraint) repr_constraint = Dropout(dropout_prob)(repr_constraint) repr_constraint = LSTM(num_units_lstm, return_sequences=False)(repr_constraint) tiled_constraint = RepeatVector(timesteps)(repr_constraint) output = merge([repr_input, tiled_constraint, countdown], mode='concat', concat_axis=2) output = LSTM(num_units_lstm, return_sequences=True)(output) output = Dropout(dropout_prob)(output) output = LSTM(num_units_lstm, return_sequences=False)(output) # NN output = Dense(num_units_lstm, activation='relu')(output) output = Dense(num_pitches)(output) preds = Activation('softmax', name='label')(output) model = Model(input=[input_seq, constraint, countdown], output=preds) model.compile(optimizer='adam', loss={'label': 'categorical_crossentropy'}, metrics=['accuracy']) return model
def simple_second_model(): # Define and create a simple Conv2D model n = 8 input_tensor = Input(INPUT_SHAPE[1:]) x = Convolution2D(1024, 3, 3)(input_tensor) x = Convolution2D(2048, 3, 3)(x) list_covs = SeparateConvolutionFeatures(n)(x) list_covs = Regrouping(None)(list_covs) list_outputs = [] for cov in list_covs: cov = SecondaryStatistic()(cov) cov = O2Transform(100)(cov) cov = O2Transform(100)(cov) list_outputs.append(WeightedVectorization(10)(cov)) x = merge(list_outputs, mode='concat') x = Dense(10)(x) model = Model(input_tensor, x) model.compile(optimizer='sgd', loss='categorical_crossentropy') model.summary() return model
def test_multi_input_layer(): #################################################### # test multi-input layer a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = merge([a_2, b_2], mode='concat', name='merge') assert merged._keras_shape == (None, 16 * 2) merge_layer, merge_node_index, merge_tensor_index = merged._keras_history assert merge_node_index == 0 assert merge_tensor_index == 0 assert len(merge_layer.inbound_nodes) == 1 assert len(merge_layer.outbound_nodes) == 0 assert len(merge_layer.inbound_nodes[0].input_tensors) == 2 assert len(merge_layer.inbound_nodes[0].inbound_layers) == 2 c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(input=[a, b], output=[c, d], name='model') assert len(model.layers) == 6 print('model.input_layers:', model.input_layers) print('model.input_layers_node_indices:', model.input_layers_node_indices) print('model.input_layers_tensor_indices:', model.input_layers_tensor_indices) print('model.output_layers', model.output_layers) print('output_shape:', model.get_output_shape_for([(None, 32), (None, 32)])) assert model.get_output_shape_for([(None, 32), (None, 32)]) == [(None, 64), (None, 5)] assert model.compute_mask([a, b], [None, None]) == [None, None] print('output_shape:', model.get_output_shape_for([(None, 32), (None, 32)])) assert model.get_output_shape_for([(None, 32), (None, 32)]) == [(None, 64), (None, 5)] # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('layers:', [layer.name for layer in model.layers]) assert [l.name for l in model.layers][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3'] print('input_layers:', [l.name for l in model.input_layers]) assert [l.name for l in model.input_layers] == ['input_a', 'input_b'] print('output_layers:', [l.name for l in model.output_layers]) assert [l.name for l in model.output_layers] == ['dense_2', 'dense_3'] # actually run model fn = K.function(model.inputs, model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] # test get_source_inputs print(get_source_inputs(c)) assert get_source_inputs(c) == [a, b] # serialization / deserialization json_config = model.to_json() recreated_model = model_from_json(json_config) recreated_model.compile('rmsprop', 'mse') print('recreated:') print([layer.name for layer in recreated_model.layers]) print([layer.name for layer in recreated_model.input_layers]) print([layer.name for layer in recreated_model.output_layers]) assert [l.name for l in recreated_model.layers][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3'] assert [l.name for l in recreated_model.input_layers] == ['input_a', 'input_b'] assert [l.name for l in recreated_model.output_layers] == ['dense_2', 'dense_3'] fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
def target_RNN(wv, tweet_max_length, aspect_max_length, classes=2, **kwargs): ###################################################### # HyperParameters ###################################################### noise = kwargs.get("noise", 0) trainable = kwargs.get("trainable", False) rnn_size = kwargs.get("rnn_size", 75) rnn_type = kwargs.get("rnn_type", LSTM) final_size = kwargs.get("final_size", 100) final_type = kwargs.get("final_type", "linear") use_final = kwargs.get("use_final", False) drop_text_input = kwargs.get("drop_text_input", 0.) drop_text_rnn = kwargs.get("drop_text_rnn", 0.) drop_text_rnn_U = kwargs.get("drop_text_rnn_U", 0.) drop_target_rnn = kwargs.get("drop_target_rnn", 0.) drop_rep = kwargs.get("drop_rep", 0.) drop_final = kwargs.get("drop_final", 0.) activity_l2 = kwargs.get("activity_l2", 0.) clipnorm = kwargs.get("clipnorm", 5) bi = kwargs.get("bi", False) lr = kwargs.get("lr", 0.001) attention = kwargs.get("attention", "simple") ##################################################### shared_RNN = get_RNN(rnn_type, rnn_size, bi=bi, return_sequences=True, dropout_U=drop_text_rnn_U) input_tweet = Input(shape=[tweet_max_length], dtype='int32') input_aspect = Input(shape=[aspect_max_length], dtype='int32') # Embeddings tweets_emb = embeddings_layer(max_length=tweet_max_length, embeddings=wv, trainable=trainable, masking=True)(input_tweet) tweets_emb = GaussianNoise(noise)(tweets_emb) tweets_emb = Dropout(drop_text_input)(tweets_emb) aspects_emb = embeddings_layer(max_length=aspect_max_length, embeddings=wv, trainable=trainable, masking=True)(input_aspect) aspects_emb = GaussianNoise(noise)(aspects_emb) # Recurrent NN h_tweets = shared_RNN(tweets_emb) h_tweets = Dropout(drop_text_rnn)(h_tweets) h_aspects = shared_RNN(aspects_emb) h_aspects = Dropout(drop_target_rnn)(h_aspects) h_aspects = MeanOverTime()(h_aspects) h_aspects = RepeatVector(tweet_max_length)(h_aspects) # Merge of Aspect + Tweet representation = merge([h_tweets, h_aspects], mode='concat') # apply attention over the hidden outputs of the RNN's att_layer = AttentionWithContext if attention == "context" else Attention representation = att_layer()(representation) representation = Dropout(drop_rep)(representation) if use_final: if final_type == "maxout": representation = MaxoutDense(final_size)(representation) else: representation = Dense(final_size, activation=final_type)(representation) representation = Dropout(drop_final)(representation) ###################################################### # Probabilities ###################################################### probabilities = Dense(1 if classes == 2 else classes, activation="sigmoid" if classes == 2 else "softmax", activity_regularizer=l2(activity_l2))(representation) model = Model(input=[input_aspect, input_tweet], output=probabilities) loss = "binary_crossentropy" if classes == 2 else "categorical_crossentropy" model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss=loss) return model
def siamese_RNN(wv, sent_length, **params): rnn_size = params.get("rnn_size", 100) rnn_drop_U = params.get("rnn_drop_U", 0.2) noise_words = params.get("noise_words", 0.3) drop_words = params.get("drop_words", 0.2) drop_sent = params.get("drop_sent", 0.3) sent_dense = params.get("sent_dense", 50) final_size = params.get("final_size", 100) drop_final = params.get("drop_final", 0.5) ################################################### # Shared Layers ################################################### embedding = embeddings_layer(max_length=sent_length, embeddings=wv, masking=True) encoder = get_RNN(LSTM, rnn_size, bi=False, return_sequences=True, dropout_U=rnn_drop_U) attention = Attention() sent_dense = Dense(sent_dense, activation="relu") ################################################### # Input A ################################################### input_a = Input(shape=[sent_length], dtype='int32') # embed sentence A emb_a = embedding(input_a) emb_a = GaussianNoise(noise_words)(emb_a) emb_a = Dropout(drop_words)(emb_a) # encode sentence A enc_a = encoder(emb_a) enc_a = Dropout(drop_sent)(enc_a) enc_a = attention(enc_a) enc_a = sent_dense(enc_a) enc_a = Dropout(drop_sent)(enc_a) ################################################### # Input B ################################################### input_b = Input(shape=[sent_length], dtype='int32') # embed sentence B emb_b = embedding(input_b) emb_b = GaussianNoise(noise_words)(emb_b) emb_b = Dropout(drop_words)(emb_b) # encode sentence B enc_b = encoder(emb_b) enc_b = Dropout(drop_sent)(enc_b) enc_b = attention(enc_b) enc_b = sent_dense(enc_b) enc_b = Dropout(drop_sent)(enc_b) ################################################### # Comparison ################################################### comparison = merge([enc_a, enc_b], mode='concat') comparison = MaxoutDense(final_size)(comparison) comparison = Dropout(drop_final)(comparison) probabilities = Dense(1, activation='sigmoid')(comparison) model = Model(input=[input_a, input_b], output=probabilities) model.compile(optimizer=Adam(clipnorm=1., lr=0.001), loss='binary_crossentropy', metrics=["binary_accuracy"]) return model
def aspect_RNN(wv, text_length, target_length, loss, activation, **kwargs): ###################################################### # HyperParameters ###################################################### noise = kwargs.get("noise", 0) trainable = kwargs.get("trainable", False) rnn_size = kwargs.get("rnn_size", 75) rnn_type = kwargs.get("rnn_type", LSTM) final_size = kwargs.get("final_size", 100) final_type = kwargs.get("final_type", "linear") use_final = kwargs.get("use_final", False) drop_text_input = kwargs.get("drop_text_input", 0.) drop_text_rnn = kwargs.get("drop_text_rnn", 0.) drop_text_rnn_U = kwargs.get("drop_text_rnn_U", 0.) drop_target_rnn = kwargs.get("drop_target_rnn", 0.) drop_rep = kwargs.get("drop_rep", 0.) drop_final = kwargs.get("drop_final", 0.) activity_l2 = kwargs.get("activity_l2", 0.) clipnorm = kwargs.get("clipnorm", 5) bi = kwargs.get("bi", False) lr = kwargs.get("lr", 0.001) attention = kwargs.get("attention", "simple") ##################################################### shared_RNN = get_RNN(rnn_type, rnn_size, bi=bi, return_sequences=True, dropout_U=drop_text_rnn_U) # shared_RNN = LSTM(rnn_size, return_sequences=True, dropout_U=drop_text_rnn_U) input_text = Input(shape=[text_length], dtype='int32') input_target = Input(shape=[target_length], dtype='int32') ###################################################### # Embeddings ###################################################### emb_text = embeddings_layer(max_length=text_length, embeddings=wv, trainable=trainable, masking=True)(input_text) emb_text = GaussianNoise(noise)(emb_text) emb_text = Dropout(drop_text_input)(emb_text) emb_target = embeddings_layer(max_length=target_length, embeddings=wv, trainable=trainable, masking=True)(input_target) emb_target = GaussianNoise(noise)(emb_target) ###################################################### # RNN - Tweet ###################################################### enc_text = shared_RNN(emb_text) enc_text = Dropout(drop_text_rnn)(enc_text) ###################################################### # RNN - Aspect ###################################################### enc_target = shared_RNN(emb_target) enc_target = MeanOverTime()(enc_target) enc_target = Dropout(drop_target_rnn)(enc_target) enc_target = RepeatVector(text_length)(enc_target) ###################################################### # Merge of Aspect + Tweet ###################################################### representation = merge([enc_text, enc_target], mode='concat') att_layer = AttentionWithContext if attention == "context" else Attention representation = att_layer()(representation) representation = Dropout(drop_rep)(representation) if use_final: if final_type == "maxout": representation = MaxoutDense(final_size)(representation) else: representation = Dense(final_size, activation=final_type)(representation) representation = Dropout(drop_final)(representation) ###################################################### # Probabilities ###################################################### probabilities = Dense(1, activation=activation, activity_regularizer=l2(activity_l2))(representation) model = Model(input=[input_target, input_text], output=probabilities) # model = Model(input=[input_text, input_target], output=probabilities) model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss=loss) return model
def test_functional_guide(): # MNIST from keras.layers import Input, Dense, LSTM from keras.models import Model from keras.utils import np_utils # this returns a tensor inputs = Input(shape=(784,)) # a layer instance is callable on a tensor, and returns a tensor x = Dense(64, activation='relu')(inputs) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) # this creates a model that includes # the Input layer and three Dense layers model = Model(input=inputs, output=predictions) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # the data, shuffled and split between tran and test sets X_train = np.random.random((100, 784)) Y_train = np.random.random((100, 10)) model.fit(X_train, Y_train, nb_epoch=2, batch_size=128) assert model.inputs == [inputs] assert model.outputs == [predictions] assert model.input == inputs assert model.output == predictions assert model.input_shape == (None, 784) assert model.output_shape == (None, 10) # try calling the sequential model inputs = Input(shape=(784,)) new_outputs = model(inputs) new_model = Model(input=inputs, output=new_outputs) new_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) ################################################## # multi-io ################################################## tweet_a = Input(shape=(4, 25)) tweet_b = Input(shape=(4, 25)) # this layer can take as input a matrix # and will return a vector of size 64 shared_lstm = LSTM(64) # when we reuse the same layer instance # multiple times, the weights of the layer # are also being reused # (it is effectively *the same* layer) encoded_a = shared_lstm(tweet_a) encoded_b = shared_lstm(tweet_b) # we can then concatenate the two vectors: merged_vector = merge([encoded_a, encoded_b], mode='concat', concat_axis=-1) # and add a logistic regression on top predictions = Dense(1, activation='sigmoid')(merged_vector) # we define a trainable model linking the # tweet inputs to the predictions model = Model(input=[tweet_a, tweet_b], output=predictions) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) data_a = np.random.random((1000, 4, 25)) data_b = np.random.random((1000, 4, 25)) labels = np.random.random((1000,)) model.fit([data_a, data_b], labels, nb_epoch=1) model.summary() assert model.inputs == [tweet_a, tweet_b] assert model.outputs == [predictions] assert model.input == [tweet_a, tweet_b] assert model.output == predictions assert model.output == predictions assert model.input_shape == [(None, 4, 25), (None, 4, 25)] assert model.output_shape == (None, 1) assert shared_lstm.get_output_at(0) == encoded_a assert shared_lstm.get_output_at(1) == encoded_b assert shared_lstm.input_shape == (None, 4, 25)
from __future__ import absolute_import import keras.backend as K from keras.engine import merge from ..layers import PassThrough from . import loggers def normalize_mask(x, mask): '''Keep the mask align wtih the tensor x Arguments: x is a data tensor; mask is a binary tensor Rationale: keep mask at same dimensionality as x, but only with a length-1 trailing dimension. This ensures broadcastability, which is important because inferring shapes is hard and shapes are easy to get wrong. ''' mask = K.cast(mask, K.floatx()) while K.ndim(mask) != K.ndim(x): if K.ndim(mask) > K.ndim(x): mask = K.any(mask, axis=-1) elif K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) return K.any(mask, axis=-1, keepdims=True) concat = lambda x: merge(x, mode='concat') def xor(a,b, v=None): return (a is not v and b is v) or (a is v and b is not v)
characters.append(EOS) int2char = list(characters) char2int = {c:i for i,c in enumerate(characters)} print(char2int) VOCAB_SIZE = len(characters) input_seq = Input(shape=(None,), dtype='int32') embedded = Embedding(VOCAB_SIZE, voc_dim, name='embd')(input_seq) #drop_out = Dropout(0.1, name='d_o')(embedded) forward = LSTM(middle_dim, return_sequences=True, consume_less='mem', name='fwd')(embedded) backward = LSTM(middle_dim, return_sequences=True, go_backwards=True, name='bwd')(embedded) sum_res = merge([forward, backward], mode='sum', name='mrg') repeat = RepeatTimeDistributedVector(max_out, name='RTD')(sum_res) alstm = ALSTM(voc_dim, return_sequences=True, name='ALSTM')(repeat) dense = TimeDistributed(Dense(VOCAB_SIZE, name='d_t_d'), name='t_d1')(alstm) out = TimeDistributed(HierarchicalSoftmax(levels=2, name='HSM'), name='t_d2')(dense) model = Model(input_seq,out) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') sentence = "May the force be with you" sentence = [EOS] + list(sentence) + [EOS]
nb_filters=nb_filters, block=4 * blocks_per_group + i, nb_total_blocks=nb_total_blocks, subsample_factor=subsample_factor) f16 = x f16 = UpSampling2D(size=(16, 16), dim_ordering='tf')(f16) f16 = Convolution2D(16, 3, 3, init='he_uniform', border_mode='same', activation='relu', dim_ordering='tf')(f16) segmentation = merge([f16, f8, f4, f2], mode='concat', concat_axis=-1) segmentation = Convolution2D(1, 1, 1, activation='sigmoid', init='uniform', border_mode='same', dim_ordering='tf')(segmentation) segmentation = Reshape((img_rows_segment, img_cols_segment))(segmentation) model_segment = Model(input=images_segment, output=segmentation) model_segment.summary() print('') print('model init time: {}'.format(time.time() - start_time))
voc_size = 35000 voc_dim = 100 middle_dim = 200 max_out = 10 input_seq = Input(shape=(None,), dtype='int32') embedded = Embedding(voc_size, voc_dim)(input_seq) drop_out = Dropout(0.1)(embedded) forward = LSTM(middle_dim, return_sequences=True, consume_less='mem')(drop_out) backward = LSTM(middle_dim, return_sequences=True, go_backwards=True)(drop_out) sum_res = merge([forward, backward], mode='sum') repeat = RepeatTimeDistributedVector(max_out)(sum_res) alstm = ALSTM(voc_dim, return_sequences=True)(repeat) dense = TimeDistributed(Dense(voc_size))(alstm) out = TimeDistributed(HierarchicalSoftmax(levels=3))(dense) model = Model(input_seq,out) model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
def build_model(spectral_input_size, temporal_input_size, spectral_n_feature, temporal_n_feature, conv_layers, dense_layers, init, learning_rate, optimizer, pooling, dropout, atrous, regularizer_conf, temporal, objective, penalty, activation, last_layer, batch_size, n_batch_per_file, lstm_dropout): """Build the model""" spectral_input = Input( batch_shape=(batch_size, spectral_input_size, spectral_n_feature), name='spectral_input') temporal_input = Input( batch_shape=(batch_size, temporal_input_size, temporal_n_feature), name='temporal_input') pre_temporal_input = Input( batch_shape=(batch_size, temporal_input_size, temporal_n_feature), name='pre_temporal_input') n_conv = len(conv_layers) n_dense = len(dense_layers) nb_kernel = conv_layers[0][0] he_kernel = conv_layers[0][1] regularizer = None if regularizer_conf['name'] == 'l1': regularizer = l1(l=regularizer_conf['value']) elif regularizer_conf['name'] == 'l2': regularizer = l2(l=regularizer_conf['value']) # spectral_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid', # dilation_rate=1, activation=activation, # use_bias=True, # kernel_initializer=init, # bias_initializer='zeros', # kernel_regularizer=regularizer, # bias_regularizer=None, # activity_regularizer=None, # kernel_constraint=None, # bias_constraint=None)(spectral_input) spectral_conv = Convolution1D(nb_kernel, he_kernel, border_mode='valid', activation=activation, bias=True, init=init, W_regularizer=regularizer)(spectral_input) # spectral_conv = MaxPooling1D(pool_length=2)(spectral_conv) spectral_conv = Dropout(dropout)(spectral_conv) # spectral_conv = MaxPooling1D(pool_size=2, strides=2, # padding='valid')(spectral_conv) # temporal_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid', # dilation_rate=1, activation=activation, # use_bias=True, # kernel_initializer=init, # bias_initializer='zeros', # kernel_regularizer=regularizer, # bias_regularizer=None, # activity_regularizer=None, # kernel_constraint=None, # bias_constraint=None)(temporal_input) temporal_conv = Convolution1D(nb_kernel, he_kernel, border_mode='valid', activation=activation, bias=True, init=init, W_regularizer=regularizer)(temporal_input) # temporal_conv = MaxPooling1D(pool_size=2, strides=2, # padding='valid')(temporal_conv) # temporal_conv = MaxPooling1D(pool_length=2)(temporal_conv) temporal_conv = Dropout(dropout)(temporal_conv) pre_temporal_conv = Convolution1D(nb_kernel, he_kernel, border_mode='valid', activation=activation, bias=True, init=init, W_regularizer=regularizer)(pre_temporal_input) # pre_temporal_conv = MaxPooling1D(pool_length=2)(pre_temporal_conv) pre_temporal_conv = Dropout(dropout)(pre_temporal_conv) for i in range(1, n_conv): nb_kernel = conv_layers[i][0] he_kernel = conv_layers[i][1] regularizer = None if regularizer_conf['name'] == 'l1': regularizer = l1(l=regularizer_conf['value']) elif regularizer_conf['name'] == 'l2': regularizer = l2(l=regularizer_conf['value']) # spectral_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid', # dilation_rate=1, activation=activation, # use_bias=True, # kernel_initializer=init, # bias_initializer='zeros', # kernel_regularizer=None, # bias_regularizer=None, # activity_regularizer=regularizer, # kernel_constraint=None, # bias_constraint=None)(spectral_conv) spectral_conv = Convolution1D(nb_kernel, he_kernel, border_mode='valid', activation=activation, bias=True, init=init, W_regularizer=None)( spectral_conv) spectral_conv = Dropout(dropout)(spectral_conv) # temporal_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid', # dilation_rate=1, activation=activation, # use_bias=True, # kernel_initializer=init, # bias_initializer='zeros', # kernel_regularizer=None, # bias_regularizer=None, # activity_regularizer=regularizer, # kernel_constraint=None, # bias_constraint=None)(temporal_conv) temporal_conv = Convolution1D(nb_kernel, he_kernel, border_mode='valid', activation=activation, bias=True, init=init, W_regularizer=None)( temporal_conv) # temporal_conv = MaxPooling1D(pool_size=2, strides=2, # padding='valid')(temporal_conv) # if i < 3: # temporal_conv = MaxPooling1D(pool_length=2)(temporal_conv) temporal_conv = Dropout(dropout)(temporal_conv) pre_temporal_conv = Convolution1D(nb_kernel, he_kernel, border_mode='valid', activation=activation, bias=True, init=init, W_regularizer=None)( pre_temporal_conv) # if i < 3: # pre_temporal_conv = MaxPooling1D(pool_length=2)(pre_temporal_conv) pre_temporal_conv = Dropout(dropout)(pre_temporal_conv) # spectral_conv = GRU(dense_layers[0], activation='tanh', # recurrent_activation='hard_sigmoid', # use_bias=True, # kernel_initializer='glorot_uniform', # recurrent_initializer='orthogonal', # bias_initializer='zeros', # kernel_regularizer=None, # recurrent_regularizer=None, # bias_regularizer=None, # activity_regularizer=None, # kernel_constraint=None, # recurrent_constraint=None, # bias_constraint=None, # dropout=0.0, # stateful=True, # implementation=0, # recurrent_dropout=lstm_dropout)(spectral_conv) # temporal_conv = GRU(dense_layers[0], activation='tanh', # recurrent_activation='hard_sigmoid', # use_bias=True, # kernel_initializer='glorot_uniform', # recurrent_initializer='orthogonal', # bias_initializer='zeros', # kernel_regularizer=None, # recurrent_regularizer=None, # bias_regularizer=None, # activity_regularizer=None, # kernel_constraint=None, # recurrent_constraint=None, # bias_constraint=None, # dropout=0.0, # stateful=True, # implementation=0, # recurrent_dropout=lstm_dropout)(temporal_conv) spectral_conv = Flatten()(spectral_conv) temporal_conv = Flatten()(temporal_conv) pre_temporal_conv = Flatten()(pre_temporal_conv) # merged_conv = concatenate([spectral_conv, temporal_conv]) merged_conv = merge([spectral_conv, temporal_conv, pre_temporal_conv], mode='concat') for i in range(0, n_dense): regularizer = None if regularizer_conf['name'] == 'l1': regularizer = l1(l=regularizer_conf['value']) elif regularizer_conf['name'] == 'l2': regularizer = l2(l=regularizer_conf['value']) # merged_conv = Dense(dense_layers[i], # activation=activation, kernel_initializer=init, # activity_regularizer=None, use_bias=True, # bias_initializer='zeros', # kernel_regularizer=regularizer, # bias_regularizer=None, # kernel_constraint=None, bias_constraint=None # )(merged_conv) merged_conv = Dense(dense_layers[i], activation=activation, init=init, W_regularizer=l2(l=1e-02))( merged_conv) merged_conv = Dropout(lstm_dropout)(merged_conv) last_regularizer = None if last_layer['regularization']['name'] == 'l1': last_regularizer = l1(l=last_layer['regularization']['value']) elif last_layer['regularization']['name'] == 'l2': last_regularizer = l2(l=last_layer['regularization']['value']) # output = Dense(1, activation=last_layer['activation'], # kernel_initializer=init, # activity_regularizer=None, use_bias=True, # bias_initializer='zeros', # kernel_regularizer=last_regularizer, # bias_regularizer=None, # kernel_constraint=None, bias_constraint=None, # name='output')(merged_conv) output = Dense(1, activation=last_layer['activation'], init=init, W_regularizer=last_regularizer,name='output')(merged_conv) # model = Model(inputs=[spectral_input, temporal_input], # outputs=[output]) model = Model(input=[spectral_input, temporal_input, pre_temporal_input], output=output) compile_model(model, objective, penalty, learning_rate, optimizer) model_structure = '' print(model.summary()) return [model_structure, model]
def test_recursion(): #################################################### # test recursion a = Input(shape=(32,), name='input_a') b = Input(shape=(32,), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = merge([a_2, b_2], mode='concat', name='merge') c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(input=[a, b], output=[c, d], name='model') e = Input(shape=(32,), name='input_e') f = Input(shape=(32,), name='input_f') g, h = model([e, f]) # g2, h2 = model([e, f]) assert g._keras_shape == c._keras_shape assert h._keras_shape == d._keras_shape # test separate manipulation of different layer outputs i = Dense(7, name='dense_4')(h) final_model = Model(input=[e, f], output=[i, g], name='final') assert len(final_model.inputs) == 2 assert len(final_model.outputs) == 2 assert len(final_model.layers) == 4 # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('final_model layers:', [layer.name for layer in final_model.layers]) assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4'] print(model.compute_mask([e, f], [None, None])) assert model.compute_mask([e, f], [None, None]) == [None, None] print(final_model.get_output_shape_for([(10, 32), (10, 32)])) assert final_model.get_output_shape_for([(10, 32), (10, 32)]) == [(10, 7), (10, 64)] # run recursive model fn = K.function(final_model.inputs, final_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] # test serialization model_config = final_model.get_config() print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] #################################################### # test multi-input multi-output j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) o = Input(shape=(32,), name='input_o') p = Input(shape=(32,), name='input_p') q, r = model([o, p]) assert n._keras_shape == (None, 5) assert q._keras_shape == (None, 64) s = merge([n, q], mode='concat', name='merge_nq') assert s._keras_shape == (None, 64 + 5) # test with single output as 1-elem list multi_io_model = Model([j, k, o, p], [s]) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32))]) assert [x.shape for x in fn_outputs] == [(10, 69)] # test with single output as tensor multi_io_model = Model([j, k, o, p], s) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32))]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] # test serialization print('multi_io_model.layers:', multi_io_model.layers) print('len(model.inbound_nodes):', len(model.inbound_nodes)) print('len(model.outbound_nodes):', len(model.outbound_nodes)) model_config = multi_io_model.get_config() print(model_config) print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32))]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] config = model.get_config() new_model = Model.from_config(config) model.summary() json_str = model.to_json() new_model = model_from_json(json_str) yaml_str = model.to_yaml() new_model = model_from_yaml(yaml_str) #################################################### # test invalid graphs # input is not an Input tensor j = Input(shape=(32,), name='input_j') j = Dense(32)(j) k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k], [m, n]) # disconnected graph j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(Exception) as e: invalid_model = Model([j], [m, n]) # redudant outputs j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) # this should work lol # TODO: raise a warning invalid_model = Model([j, k], [m, n, n]) # redundant inputs j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k, j], [m, n]) # i have not idea what I'm doing: garbage as inputs/outputs j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k], [m, n, 0]) #################################################### # test calling layers/models on TF tensors if K._BACKEND == 'tensorflow': import tensorflow as tf j = Input(shape=(32,), name='input_j') k = Input(shape=(32,), name='input_k') m, n = model([j, k]) tf_model = Model([j, k], [m, n]) # magic j_tf = tf.placeholder(dtype=K.floatx()) k_tf = tf.placeholder(dtype=K.floatx()) m_tf, n_tf = tf_model([j_tf, k_tf]) assert not hasattr(m_tf, '_keras_shape') assert not hasattr(n_tf, '_keras_shape') assert K.int_shape(m_tf) == (None, 64) assert K.int_shape(n_tf) == (None, 5) # test merge o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1) # test tensor input x = tf.placeholder(shape=(None, 2), dtype=K.floatx()) input_layer = InputLayer(input_tensor=x) x = Input(tensor=x) y = Dense(2)(x)