def inception_resnet_v2_A(x):
    shortcut = x
    
    a = Convolution2D(32//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    
    b = Convolution2D(32//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    b = Convolution2D(32//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(b)
    
    c = Convolution2D(32//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    c = Convolution2D(48//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(c)
    c = Convolution2D(64//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(c)
    
    x = merge([a, b, c], mode='concat', concat_axis=-1)
    x = Convolution2D(384//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='linear',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    
    x = merge([shortcut, x], mode='sum')
    x = Activation('relu')(x)
    
    return x
Beispiel #2
0
def unet_model_3d():
    inputs = Input(config["input_shape"])
    conv1 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(inputs)
    conv1 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(conv1)
    pool1 = MaxPooling3D(pool_size=config["pool_size"])(conv1)

    conv2 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(pool1)
    conv2 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(conv2)
    pool2 = MaxPooling3D(pool_size=config["pool_size"])(conv2)

    conv3 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(pool2)
    conv3 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(conv3)
    pool3 = MaxPooling3D(pool_size=config["pool_size"])(conv3)

    conv4 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(pool3)
    conv4 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(conv4)
    pool4 = MaxPooling3D(pool_size=config["pool_size"])(conv4)

    conv5 = Conv3D(512, 3, 3, 3, activation='relu', border_mode='same')(pool4)
    conv5 = Conv3D(512, 3, 3, 3, activation='relu', border_mode='same')(conv5)

    up6 = merge([UpSampling3D(size=config["pool_size"])(conv5), conv4],
                mode='concat',
                concat_axis=1)
    conv6 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(up6)
    conv6 = Conv3D(256, 3, 3, 3, activation='relu', border_mode='same')(conv6)

    up7 = merge([UpSampling3D(size=config["pool_size"])(conv6), conv3],
                mode='concat',
                concat_axis=1)
    conv7 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(up7)
    conv7 = Conv3D(128, 3, 3, 3, activation='relu', border_mode='same')(conv7)

    up8 = merge([UpSampling3D(size=config["pool_size"])(conv7), conv2],
                mode='concat',
                concat_axis=1)
    conv8 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(up8)
    conv8 = Conv3D(64, 3, 3, 3, activation='relu', border_mode='same')(conv8)

    up9 = merge([UpSampling3D(size=config["pool_size"])(conv8), conv1],
                mode='concat',
                concat_axis=1)
    conv9 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(up9)
    conv9 = Conv3D(32, 3, 3, 3, activation='relu', border_mode='same')(conv9)

    conv10 = Conv3D(config["n_labels"], 1, 1, 1)(conv9)
    act = Activation('sigmoid')(conv10)
    model = Model(input=inputs, output=act)

    model.compile(optimizer=Adam(lr=config["initial_learning_rate"]),
                  loss=dice_coef_loss,
                  metrics=[dice_coef])

    return model
Beispiel #3
0
    def _build_trending(self, phase):
        prior_input = merge([self.x_tm1, self.z_tm1], mode="concat")
        rnn_prior = RecurrentLayer(self.n_hidden_recurrent,
                                   return_sequences=True,
                                   stateful=(phase == Phases.predict),
                                   consume_less='gpu')(prior_input)
        rnn_rec_mu = TimeDistributed(
            Dense(self.latent_dim, activation='linear'))(rnn_prior)
        rnn_rec_sigma = TimeDistributed(
            Dense(self.latent_dim, activation="softplus"))(rnn_prior)

        return merge([rnn_rec_mu, rnn_rec_sigma], mode="concat")
Beispiel #4
0
def inception_resnet_v2_B(x):
    shortcut = x

    a = Convolution2D(192 // nb_filters_reduction_factor,
                      1,
                      1,
                      subsample=(1, 1),
                      activation='relu',
                      init='he_normal',
                      border_mode='same',
                      dim_ordering='tf')(x)

    b = Convolution2D(128 // nb_filters_reduction_factor,
                      1,
                      1,
                      subsample=(1, 1),
                      activation='relu',
                      init='he_normal',
                      border_mode='same',
                      dim_ordering='tf')(x)
    b = Convolution2D(160 // nb_filters_reduction_factor,
                      1,
                      7,
                      subsample=(1, 1),
                      activation='relu',
                      init='he_normal',
                      border_mode='same',
                      dim_ordering='tf')(b)
    b = Convolution2D(192 // nb_filters_reduction_factor,
                      7,
                      1,
                      subsample=(1, 1),
                      activation='relu',
                      init='he_normal',
                      border_mode='same',
                      dim_ordering='tf')(b)

    x = merge([a, b], mode='concat', concat_axis=-1)
    x = Convolution2D(1154 // nb_filters_reduction_factor,
                      1,
                      1,
                      subsample=(1, 1),
                      activation='linear',
                      init='he_normal',
                      border_mode='same',
                      dim_ordering='tf')(x)

    x = merge([shortcut, x], mode='sum')
    x = Activation('relu')(x)

    return x
Beispiel #5
0
def constraint_lstm(timesteps,
                    num_features,
                    num_pitches,
                    num_units_lstm,
                    dropout_prob=0.2):
    input_seq = Input((timesteps, num_features), name='input_seq')
    constraint = Input((timesteps, num_features + 1), name='constraint')

    repr_input = input_seq
    repr_constraint = constraint

    repr_constraint = LSTM(num_units_lstm,
                           return_sequences=True)(repr_constraint)
    repr_constraint = LSTM(num_units_lstm,
                           return_sequences=False)(repr_constraint)

    tiled_constraint = Reshape((1, num_units_lstm))(repr_constraint)

    # todo timesteps en dur..
    # only info at step one
    tiled_constraint = Lambda(lambda x: K.concatenate(
        (K.concatenate([x, K.zeros_like(x)[:, :, 0:1]], axis=2),
         K.tile(
             K.concatenate(
                 [K.zeros_like(x), K.ones_like(x)[:, :, 0:1]], axis=2),
             (1, 16 - 1, 1))),
        axis=1))(tiled_constraint)

    repr_input = merge([repr_input, tiled_constraint],
                       mode='concat',
                       concat_axis=2)

    repr_input = LSTM(num_units_lstm, return_sequences=True)(repr_input)
    repr_input = LSTM(num_units_lstm, return_sequences=False)(repr_input)

    hidden_repr = merge([repr_input, repr_constraint], mode='concat')

    # NN
    hidden_repr = Dense(num_units_lstm, activation='relu')(hidden_repr)
    hidden_repr = Dense(num_pitches)(hidden_repr)
    preds = Activation('softmax', name='label')(hidden_repr)

    model = Model(input=[input_seq, constraint], output=preds)

    model.compile(optimizer='adam',
                  loss={'label': 'categorical_crossentropy'},
                  metrics=['accuracy'])
    return model
    def __init__(self, hidden_LSTM, hidden_MLP1, hidden_MLP2):
        parent = Input(shape=(hidden_LSTM * 2, ), name='parent')
        head = Input(shape=(hidden_LSTM * 2, ), name='head')
        tail = Input(shape=(hidden_LSTM * 2, ), name='tail')

        input_MLP = merge([parent, head, tail],
                          mode='concat',
                          name='input_MLP')

        h0 = Dense(input_dim=hidden_LSTM * 6,
                   output_dim=hidden_MLP1,
                   activation='linear',
                   name='h0')(input_MLP)
        h1 = Dense(input_dim=hidden_MLP1,
                   output_dim=hidden_MLP2,
                   activation='tanh',
                   name='h1')(h0)

        output = Dense(input_dim=hidden_MLP2,
                       output_dim=49,
                       activation='softmax',
                       name='output')(h1)

        self.__model = Model(input=[parent, head, tail], output=output)
        self.__model.compile(loss='categorical_crossentropy',
                             optimizer='adam',
                             metrics=['accuracy'])
    def __init__(self, hidden_LSTM, hidden_MLP):
        s2v = Input(shape=(None, 300), name='sentence2vec')
        ms1 = Input(shape=(None, ), dtype=tf.bool, name='mask_stack_1')
        ms2 = Input(shape=(None, ), dtype=tf.bool, name='mask_stack_2')
        mb = Input(shape=(None, ), dtype=tf.bool, name='mask_buffer')

        lstm = Bidirectional(LSTM(input_dim=300,
                                  output_dim=hidden_LSTM,
                                  return_sequences=True,
                                  name='lstm'),
                             merge_mode='concat',
                             name='bi')(s2v)

        stack1 = Lambda(lambda x: tf.boolean_mask(x, ms1), name='stack1')(lstm)
        stack2 = Lambda(lambda x: tf.boolean_mask(x, ms2), name='stack2')(lstm)
        buffer = Lambda(lambda x: tf.boolean_mask(x, mb), name='buffer')(lstm)

        input_MLP = merge([stack1, stack2, buffer],
                          mode='concat',
                          name='input_MLP')

        h0 = Dense(input_dim=hidden_LSTM * 6,
                   output_dim=hidden_MLP,
                   activation='tanh',
                   name='h0')(input_MLP)
        output = Dense(input_dim=hidden_MLP,
                       output_dim=3,
                       activation='softmax',
                       name='output')(h0)

        self.__model = Model(input=[s2v, ms1, ms2, mb], output=output)
        self.__model.compile(loss='categorical_crossentropy',
                             optimizer='adam',
                             metrics=['accuracy'])
def weighted_states(activations, rnn_size, input_length, attention="single"):
    if attention == "all":
        attention = Flatten()(activations)
        attention = Dense(input_length, activation='tanh')(attention)
        attention = Activation('softmax')(attention)
        attention = RepeatVector(rnn_size)(attention)
        attention = Permute([2, 1])(attention)
        return merge([activations, attention], mode='mul')
    elif attention == "single":
        attention = TimeDistributed(Dense(1, activation='tanh'))(activations)
        # attention = Dense(1, activation='tanh')(activations)
        attention = Flatten()(attention)
        attention = Activation('softmax')(attention)
        attention = RepeatVector(rnn_size)(attention)
        attention = Permute([2, 1])(attention)
        return merge([activations, attention], mode='mul')
def cnn_multi_filters(wv, sent_length, nfilters, nb_filters, **kwargs):
    noise = kwargs.get("noise", 0)
    trainable = kwargs.get("trainable", False)
    drop_text_input = kwargs.get("drop_text_input", 0.)
    activity_l2 = kwargs.get("activity_l2", 0.)

    input_text = Input(shape=(sent_length,), dtype='int32')

    emb_text = embeddings_layer(max_length=sent_length, embeddings=wv, trainable=trainable, masking=False)(input_text)
    emb_text = GaussianNoise(noise)(emb_text)
    emb_text = Dropout(drop_text_input)(emb_text)

    pooling_reps = []
    for i in nfilters:
        feat_maps = Convolution1D(nb_filter=nb_filters,
                                  filter_length=i,
                                  border_mode="valid",
                                  activation="relu",
                                  subsample_length=1)(emb_text)
        pool_vecs = GlobalMaxPooling1D()(feat_maps)
        pooling_reps.append(pool_vecs)

    representation = merge(pooling_reps, mode='concat')

    probabilities = Dense(3, activation='softmax', activity_regularizer=l2(activity_l2))(representation)

    model = Model(input=input_text, output=probabilities)
    model.compile(optimizer="adam", loss='categorical_crossentropy')

    return model
    def residual_block(x, nb_filters=16, subsample_factor=1):
      prev_nb_channels = K.int_shape(x)[4]

      if subsample_factor > 1:
        subsample = (subsample_factor, subsample_factor, subsample_factor)
        # shortcut: subsample + zero-pad channel dim
        shortcut = MaxPooling3D(pool_size=subsample)(x)
      else:
        subsample = (1, 1, 1)
        # shortcut: identity
        shortcut = x

      if nb_filters > prev_nb_channels:
        shortcut = Lambda(zero_pad_channels,
                          arguments={
                              'pad': nb_filters - prev_nb_channels})(shortcut)

      y = BatchNormalization(axis=4)(x)
      y = Activation('relu')(y)
      y = Convolution3D(nb_filters, 3, 3, 3, subsample=subsample,
                        init='he_normal', border_mode='same')(y)
      y = BatchNormalization(axis=4)(y)
      y = Activation('relu')(y)
      y = Convolution3D(nb_filters, 3, 3, 3, subsample=(1, 1, 1),
                        init='he_normal', border_mode='same')(y)

      out = merge([y, shortcut], mode='sum')

      return out
Beispiel #11
0
    def test_merge_model_model_concat(self):
        input_data1 = np.random.random_sample([2, 4])
        input_data2 = np.random.random_sample([2, 3])
        input1 = Input((4, ))
        input2 = Input((3, ))
        out1 = Dense(4)(input1)
        out1_1 = Dense(4)(out1)

        out2 = Dense(3)(input2)
        out2_1 = Dense(3)(out2)

        branch1 = Model(input=[input1], output=out1_1)
        branch2 = Model(input=[input2], output=out2_1)
        branch1_tensor = branch1(input1)
        branch2_tensor = branch2(input2)

        from keras.engine import merge
        m = merge([branch1_tensor, branch2_tensor],
                  mode="concat",
                  concat_axis=1)
        kmodel = Model(input=[input1, input2], output=m)

        self.modelTest([input_data1, input_data2],
                       kmodel,
                       random_weights=False,
                       dump_weights=True,
                       is_training=False)
def inception_v4_B(x, nb_filters_reduction_factor=8):
    a = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering='tf')(x)
    a = Convolution2D(128//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(a)
    
    b = Convolution2D(384//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    
    c = Convolution2D(192//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    c = Convolution2D(224//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(c)
    c = Convolution2D(256//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(c)
    
    d = Convolution2D(192//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    d = Convolution2D(192//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(d)
    d = Convolution2D(224//nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(d)
    d = Convolution2D(224//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(d)
    d = Convolution2D(256//nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(d)
    
    x = merge([a, b, c, d], mode='concat', concat_axis=-1)
    
    return x
Beispiel #13
0
    def test_merge_model_model_concat(self):
        input_data1 = np.random.random_sample([2, 4])
        input_data2 = np.random.random_sample([2, 3])
        input1 = Input((4,))
        input2 = Input((3,))
        out1 = Dense(4)(input1)
        out1_1 = Dense(4)(out1)

        out2 = Dense(3)(input2)
        out2_1 = Dense(3)(out2)

        branch1 = Model(input=[input1], output=out1_1)
        branch2 = Model(input=[input2], output=out2_1)
        branch1_tensor = branch1(input1)
        branch2_tensor = branch2(input2)

        from keras.engine import merge
        m = merge([branch1_tensor, branch2_tensor], mode="concat", concat_axis=1)
        kmodel = Model(input=[input1, input2], output=m)

        self.modelTest([input_data1, input_data2],
                       kmodel,
                       random_weights=False,
                       dump_weights=True,
                       is_training=False)
def identity_block(input_tensor,
                   kernel_size,
                   nb_filter,
                   stage,
                   block,
                   subsumpling=False):
    if K.image_dim_ordering() == 'tf':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + '_' + block + '_branch'
    bn_name_base = 'bn' + str(stage) + '_' + block + '_branch'

    if (subsumpling):
        x = Convolution2D(nb_filter,
                          kernel_size,
                          kernel_size,
                          border_mode='same',
                          subsample=(2, 2),
                          name=conv_name_base + '2a')(input_tensor)
    else:
        x = Convolution2D(nb_filter,
                          kernel_size,
                          kernel_size,
                          border_mode='same',
                          name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Convolution2D(nb_filter,
                      kernel_size,
                      kernel_size,
                      border_mode='same',
                      name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    if (subsumpling):
        x1 = Convolution2D(nb_filter,
                           1,
                           1,
                           border_mode='same',
                           subsample=(2, 2),
                           name=conv_name_base + '2c')(input_tensor)
        x = merge([x, x1], mode='sum')
    else:
        x = merge([x, input_tensor], mode='sum')
    x = Activation('relu')(x)
    return x
def get_entity_masking_model(len_embedding, data, filter_sizes):
    num_class = 3
    num_filters = 200
    embedding_dim = len_embedding
    size_voca = len(data.idx2vect)
    num_entity = len(set(data.keyword2idx.keys()))
    eew = [numpy.random.uniform(-0.01, 0.01, size=(num_entity, entity_embed_length))]

    sent_input = Input(shape=(len_sentence,), dtype='int32', name='sent_level_input')
    ei_input = Input(shape=(1,), name='entity_indicator_input')

    sent_x = Embedding(size_voca, embedding_dim,
      input_length=len_sentence, weights=[data.idx2vect])(sent_input)

    ei_emb = Embedding(num_entity, entity_embed_length, input_length=1, weights=eew)(ei_input)
    ei_emb = Reshape([entity_embed_length])(ei_emb)

    sent_x = Dropout(0.5, input_shape=(len_sentence, embedding_dim))(sent_x)
    ei_emb = Dropout(0.5, input_shape=(1, entity_embed_length))(ei_emb)

    multiple_filter_output= []
    for i in xrange(len(filter_sizes)):
        conv = Convolution1D(nb_filter=num_filters,
          filter_length= filter_sizes[i],
          border_mode='valid',
          bias=True,
          activation='relu',
          subsample_length=1)(sent_x)
        pool = MaxPooling1D(pool_length = len_sentence - filter_sizes[i] + 1)(conv)
        multiple_filter_output.append(Flatten()(pool))

    if len(filter_sizes) == 1:
        text_feature = multiple_filter_output[0]
    else:
        text_feature = merge(multiple_filter_output, mode = 'concat') # text features from CNN

    text_ei_feature = merge([text_feature, ei_emb], mode='concat')
    text_ei_feature = Dropout(0.5)(text_ei_feature)
    sent_loss = Dense(num_class, activation='softmax', name='sent_level_output')(text_ei_feature)
    adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, clipnorm=l2value)

    model = Model(input=[sent_input, ei_input], output=sent_loss) # TODO : take multiple inputs
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=adadelta)

    return model
Beispiel #16
0
    def _build(self, phase, seq_shape=None, batch_size=None):
        if phase == Phases.train:
            x_t = Input(shape=(seq_shape, self.data_dim),
                        name="stornREC_input_train",
                        dtype="float32")
        else:
            x_t = Input(batch_shape=(batch_size, 1, self.data_dim),
                        name="stornREC_input_predict",
                        dtype="float32")

        # Recognition model

        # Fix of keras/engine/topology.py required for masked layer!
        # Otherwise concat with masked and non masked layer returns an error!
        # recogn_input = Masking()(x_t)

        # Unmasked Layer
        recogn_input = x_t

        for i in range(self.n_deep):
            recogn_input = TimeDistributed(
                Dense(self.n_hidden_dense,
                      activation=self.activation))(recogn_input)
            if self.dropout != 0.0:
                recogn_input = Dropout(self.dropout)(recogn_input)

        recogn_rnn = RecurrentLayer(self.n_hidden_recurrent,
                                    return_sequences=True,
                                    stateful=(phase == Phases.predict),
                                    consume_less='gpu')(recogn_input)

        recogn_map = recogn_rnn
        for i in range(self.n_deep):
            recogn_map = TimeDistributed(
                Dense(self.n_hidden_dense,
                      activation=self.activation))(recogn_map)
            if self.dropout != 0:
                recogn_map = Dropout(self.dropout)(recogn_map)

        recogn_mu = TimeDistributed(Dense(self.latent_dim,
                                          activation='linear'))(recogn_map)
        recogn_sigma = TimeDistributed(
            Dense(self.latent_dim, activation="softplus"))(recogn_map)
        recogn_stats = merge([recogn_mu, recogn_sigma], mode='concat')

        # sample z from the distribution in X
        z_t = TimeDistributed(
            LambdaWithMasking(
                STORNRecognitionModel.do_sample,
                output_shape=STORNRecognitionModel.sample_output_shape,
                arguments={
                    'batch_size': (None if
                                   (phase == Phases.train) else batch_size),
                    'dim_size': self.latent_dim
                }))(recogn_stats)

        return recogn_stats, x_t, z_t
Beispiel #17
0
 def upconv2_2(self, input, concat_tensor, no_features):
     out_shape = [dim.value for dim in concat_tensor.get_shape()]
     up_conv = Deconvolution2D(no_features,
                               5,
                               5,
                               out_shape,
                               subsample=(2, 2))(input)
     # up_conv = Convolution2D(no_features, 2, 2)(UpSampling2D()(input))
     merged = merge([concat_tensor, up_conv], mode='concat', concat_axis=3)
     return merged
Beispiel #18
0
def get_model(
        data_path, #Path to dataset
        hid_dim, #Dimension of the hidden GRU layers
        optimizer='rmsprop', #Optimization function to be used
        loss='categorical_crossentropy' #Loss function to be used
        ):

    metadata_dict = {}
    f = open(os.path.join(data_path, 'metadata', 'metadata.txt'), 'r')
    for line in f:
        entry = line.split(':')
        metadata_dict[entry[0]] = int(entry[1])
    f.close()
    story_maxlen = metadata_dict['input_length']
    query_maxlen = metadata_dict['query_length']
    vocab_size = metadata_dict['vocab_size']
    entity_dim = metadata_dict['entity_dim']

    embed_weights = np.load(os.path.join(data_path, 'metadata', 'weights.npy'))
    word_dim = embed_weights.shape[1]

########## MODEL ############

    story_input = Input(shape=(story_maxlen,), dtype='int32', name="StoryInput")

    x = Embedding(input_dim=vocab_size+2,
                  output_dim=word_dim,
                  input_length=story_maxlen,
                  mask_zero=True,
                  weights=[embed_weights])(story_input)

    query_input = Input(shape=(query_maxlen,), dtype='int32', name='QueryInput')

    x_q = Embedding(input_dim=vocab_size+2,
            output_dim=word_dim,
            input_length=query_maxlen,
            mask_zero=True,
            weights=[embed_weights])(query_input)

    concat_embeddings = masked_concat([x_q, x], concat_axis=1)

    lstm = GRU(hid_dim, consume_less='gpu')(concat_embeddings)

    reverse_lstm = GRU(hid_dim, consume_less='gpu', go_backwards=True)(concat_embeddings)

    merged = merge([lstm, reverse_lstm], mode='concat')

    result = Dense(entity_dim, activation='softmax')(merged)

    model = Model(input=[story_input, query_input], output=result)
    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=['accuracy'])
    print(model.summary())
    return model
def inception_resnet_v2_stem(x):
    # in original inception-resnet-v2, conv stride is 2
    x = Convolution2D(32//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(x)
    x = Convolution2D(32//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(x)
    x = Convolution2D(64//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    
    # in original inception-resnet-v2, stride is 2
    a = MaxPooling2D((3, 3), strides=(1, 1), border_mode='valid', dim_ordering='tf')(x)
    # in original inception-resnet-v2, conv stride is 2
    b = Convolution2D(96//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(x)
    x = merge([a, b], mode='concat', concat_axis=-1)
    
    a = Convolution2D(64//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    a = Convolution2D(96//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(a)
    b = Convolution2D(64//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    b = Convolution2D(64//nb_filters_reduction_factor, 7, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(b)
    b = Convolution2D(64//nb_filters_reduction_factor, 1, 7, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(b)
    b = Convolution2D(96//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(b)
    x = merge([a, b], mode='concat', concat_axis=-1)
    
    # in original inception-resnet-v2, conv stride should be 2
    a = Convolution2D(192//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(x)
    # in original inception-resnet-v2, stride is 2
    b = MaxPooling2D((3, 3), strides=(1, 1), border_mode='valid', dim_ordering='tf')(x)
    x = merge([a, b], mode='concat', concat_axis=-1)
    
    x = Activation('relu')(x)
    
    return x
Beispiel #20
0
    def build_net_DiagLSTM(self, load_weights = False):

        img = Input(batch_shape=(10, self.img_channels, self.img_rows, self.img_cols), name='input_img')

        model_in = MaskedConvolution2D(self.h,7,7,mask_type='a', direction='Right', border_mode='same', init='he_uniform')(img)

        for _ in range(12):
            model_LSTM_F = DiagLSTM(self.h_2,3, return_sequences=True, init='he_uniform', inner_init='he_uniform', direction='Right')(model_in)
            model_LSTM_B = DiagLSTM(self.h_2,3, return_sequences=True, init='he_uniform', inner_init='he_uniform', direction='Right', reverse=True)(model_in)
            model_LSTM = merge([model_LSTM_F, model_LSTM_B], mode='sum')
            model_per = Convolution2D(self.h,1,1, init='he_normal')(model_LSTM)
            model_in = merge([model_in, model_per], mode='sum')

        model_out = MaskedConvolution2D(self.h,1,1,mask_type='b', direction='Right', border_mode='same', activation='relu', init='he_uniform')(model_in)
        model_out = MaskedConvolution2D(256*3,1,1,mask_type='b', direction='Right', border_mode='same', activation='relu', init='he_uniform')(model_out)

        Red = GetColors(0)(model_out)
        Green = GetColors(1)(model_out)
        Blue = GetColors(2)(model_out)

        Red_out = SoftmaxLayer(name='Red_out')(Red)
        Green_out = SoftmaxLayer(name='Green_out')(Green)
        Blue_out = SoftmaxLayer(name='Blue_out')(Blue)

        Col_Model = Model(img, [Red_out, Green_out, Blue_out])

        if load_weights:
            Col_Model.load_weights('Data/comp_model.h5')

        print("Compiling...")
        Col_Model.compile(optimizer=self.optimizer,
                              loss={'Red_out': image_categorical_crossentropy,
                                    'Green_out': image_categorical_crossentropy,
                                    'Blue_out': image_categorical_crossentropy},
                              metrics={'Red_out': 'accuracy',
                                       'Green_out': 'accuracy',
                                       'Blue_out': 'accuracy'})
        self.comp_net = Col_Model
def inception_resnet_v2_reduction_A(x):
    a = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid', dim_ordering='tf')(x)
    b = Convolution2D(384//nb_filters_reduction_factor, 3, 3, subsample=(2, 2), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(x)
    c = Convolution2D(256//nb_filters_reduction_factor, 1, 1, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(x)
    c = Convolution2D(256//nb_filters_reduction_factor, 3, 3, subsample=(1, 1), activation='relu',
                      init='he_normal', border_mode='same', dim_ordering='tf')(c)
    c = Convolution2D(384//nb_filters_reduction_factor, 3, 3, subsample=(2, 2), activation='relu',
                      init='he_normal', border_mode='valid', dim_ordering='tf')(c)
    
    x = merge([a, b, c], mode='concat', concat_axis=-1)
    
    return x
Beispiel #22
0
 def test_merge_method_cos(self):
     input_data1 = np.random.random_sample([2, 4])
     input_data2 = np.random.random_sample([2, 4])
     input1 = Input((4,))
     input2 = Input((4,))
     out1 = Dense(4)(input1)
     out2 = Dense(4)(input2)
     from keras.engine import merge
     m = merge([out1, out2], mode="cos", dot_axes=1)
     kmodel = Model(input=[input1, input2], output=m)
     self.modelTest([input_data1, input_data2],
                    kmodel,
                    random_weights=False,
                    dump_weights=True,
                    is_training=False)
Beispiel #23
0
 def test_merge_method_cos(self):
     input_data1 = np.random.random_sample([2, 4])
     input_data2 = np.random.random_sample([2, 4])
     input1 = Input((4, ))
     input2 = Input((4, ))
     out1 = Dense(4)(input1)
     out2 = Dense(4)(input2)
     from keras.engine import merge
     m = merge([out1, out2], mode="cos", dot_axes=1)
     kmodel = Model(input=[input1, input2], output=m)
     self.modelTest([input_data1, input_data2],
                    kmodel,
                    random_weights=False,
                    dump_weights=True,
                    is_training=False)
Beispiel #24
0
    def __init__(self, word_vec, word_to_index, index_to_word, classes, title_output=128, content_output=512,
                 dense_neurons=(1024, 256,), title_len=50, content_len=2000, weights=None, directory='.'):
        self.directory = directory
        self.word_to_index = word_to_index
        self.index_to_word = index_to_word
        self.title_len = title_len
        self.content_len = content_len
        self.word_vec = word_vec
        self.classes = classes
        self.title_output = title_output
        self.content_output = content_output
        self.dense_neurons = dense_neurons

        # Encode document's title
        title_inp = Input(shape=(title_len,), name='Title_Input')
        title_embed = Embedding(input_dim=np.size(word_vec, 0), output_dim=np.size(word_vec, 1),
                                weights=[word_vec], mask_zero=True, name='Title_Embedding')
        self.t_encoder = Sequential(name='Title_Encoder')
        self.t_encoder.add(title_embed)
        self.t_encoder.add(GRU(title_output, name='Title_GRU', consume_less='mem'))
        title_vec = self.t_encoder(title_inp)

        # Encode document's content
        content_inp = Input(shape=(content_len,), name='Content_Input')
        content_embed = Embedding(input_dim=np.size(word_vec, 0), output_dim=np.size(word_vec, 1),
                                  weights=[word_vec], mask_zero=True, name='Content_Embedding')
        self.c_encoder = Sequential(name='Content_Encoder')
        self.c_encoder.add(content_embed)
        self.c_encoder.add(GRU(content_output, name='Content_GRU', consume_less='mem'))
        content_vec = self.c_encoder(content_inp)

        # Merge vectors to create output
        doc_vec = merge(inputs=[title_vec, content_vec], mode='concat')

        # Decode using dense layers
        self.decoder = Sequential(name='Decoder')
        self.decoder.add(Dense(dense_neurons[0], input_shape=(title_output + content_output,),
                               name='Dense_0', activation='hard_sigmoid'))
        for i, n in enumerate(dense_neurons[1:]):
            self.decoder.add(Dense(n, activation='hard_sigmoid', name='Dense_%s' % (i + 1)))
        self.decoder.add(Dense(len(classes), activation='softmax', name='Dense_Output'))
        output = self.decoder(doc_vec)

        self.model = Model(input=[title_inp, content_inp], output=output, name='Model')
        if weights is not None:
            self.model.load_weights(weights)
def get2way_model(len_embedding, len_sentence, idx2vect, filter_sizes):
    dropout_prob = (0.1, 0.3)
    num_filters = 10
    hidden_dims = 10
    embedding_dim = len_embedding
    size_voca = len(idx2vect)

    sent_input = Input(shape=(len_sentence,), dtype='int32', name='sent_level_input')

    sent_x = Embedding(size_voca, embedding_dim,
      input_length=len_sentence, weights=[idx2vect])(sent_input)


    sent_x = Dropout(dropout_prob[0], input_shape=(len_sentence, embedding_dim))(sent_x)

    multiple_filter_output= []
    for i in xrange(len(filter_sizes)):
        conv = Convolution1D(nb_filter=num_filters,
          filter_length= filter_sizes[i],
          border_mode='valid',
          bias=True,
          activation='relu',
          subsample_length=1)(sent_x)
        pool = MaxPooling1D(pool_length = len_sentence - filter_sizes[i] + 1)(conv)
        multiple_filter_output.append(Flatten()(pool))

    if len(filter_sizes) == 1:
        sent_v = multiple_filter_output[0]
    else:
        sent_v = merge(multiple_filter_output, mode = 'concat')

    sent_v = Dense(hidden_dims)(sent_v)
    sent_v = Dropout(dropout_prob[1])(sent_v)
    sent_v = Activation('relu')(sent_v)
    sent_loss = Dense(2, activation='softmax', name='sent_level_output')(sent_v)

    adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=l2value)

    model = Model(input=sent_input, output=sent_loss)
    model.compile(loss='categorical_crossentropy', metrics=['accuracy', 'fmeasure'], optimizer=adadelta)
    return model
Beispiel #26
0
def test_learning_phase():
    a = Input(shape=(32, ), name='input_a')
    b = Input(shape=(32, ), name='input_b')

    a_2 = Dense(16, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    assert dp.uses_learning_phase

    assert not a_2._uses_learning_phase
    assert b_2._uses_learning_phase

    # test merge
    m = merge([a_2, b_2], mode='concat')
    assert m._uses_learning_phase

    # Test recursion
    model = Model([a, b], [a_2, b_2])
    print(model.input_spec)
    assert model.uses_learning_phase

    c = Input(shape=(32, ), name='input_c')
    d = Input(shape=(32, ), name='input_d')

    c_2, b_2 = model([c, d])
    assert c_2._uses_learning_phase
    assert b_2._uses_learning_phase

    # try actually running graph
    fn = K.function(model.inputs + [K.learning_phase()], model.outputs)
    input_a_np = np.random.random((10, 32))
    input_b_np = np.random.random((10, 32))
    fn_outputs_no_dp = fn([input_a_np, input_b_np, 0])
    fn_outputs_dp = fn([input_a_np, input_b_np, 1])
    # output a: nothing changes
    assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum()
    # output b: dropout applied
    assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
Beispiel #27
0
def test_learning_phase():
    a = Input(shape=(32,), name='input_a')
    b = Input(shape=(32,), name='input_b')

    a_2 = Dense(16, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    assert dp.uses_learning_phase

    assert not a_2._uses_learning_phase
    assert b_2._uses_learning_phase

    # test merge
    m = merge([a_2, b_2], mode='concat')
    assert m._uses_learning_phase

    # Test recursion
    model = Model([a, b], [a_2, b_2])
    print(model.input_spec)
    assert model.uses_learning_phase

    c = Input(shape=(32,), name='input_c')
    d = Input(shape=(32,), name='input_d')

    c_2, b_2 = model([c, d])
    assert c_2._uses_learning_phase
    assert b_2._uses_learning_phase

    # try actually running graph
    fn = K.function(model.inputs + [K.learning_phase()], model.outputs)
    input_a_np = np.random.random((10, 32))
    input_b_np = np.random.random((10, 32))
    fn_outputs_no_dp = fn([input_a_np, input_b_np, 0])
    fn_outputs_dp = fn([input_a_np, input_b_np, 1])
    # output a: nothing changes
    assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum()
    # output b: dropout applied
    assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
Beispiel #28
0
def countdown_constraint_lstm(timesteps,
                              num_features,
                              num_pitches,
                              num_units_lstm,
                              dropout_prob=0.2):
    input_seq = Input((timesteps, num_features), name='input_seq')
    constraint = Input((timesteps, num_features + 1), name='constraint')
    countdown = Input((timesteps, timesteps), name='countdown')

    repr_input = input_seq
    repr_constraint = constraint

    repr_constraint = LSTM(num_units_lstm,
                           return_sequences=True)(repr_constraint)
    repr_constraint = Dropout(dropout_prob)(repr_constraint)
    repr_constraint = LSTM(num_units_lstm,
                           return_sequences=False)(repr_constraint)
    tiled_constraint = RepeatVector(timesteps)(repr_constraint)

    output = merge([repr_input, tiled_constraint, countdown],
                   mode='concat',
                   concat_axis=2)

    output = LSTM(num_units_lstm, return_sequences=True)(output)
    output = Dropout(dropout_prob)(output)
    output = LSTM(num_units_lstm, return_sequences=False)(output)

    # NN
    output = Dense(num_units_lstm, activation='relu')(output)
    output = Dense(num_pitches)(output)
    preds = Activation('softmax', name='label')(output)

    model = Model(input=[input_seq, constraint, countdown], output=preds)

    model.compile(optimizer='adam',
                  loss={'label': 'categorical_crossentropy'},
                  metrics=['accuracy'])
    return model
Beispiel #29
0
def simple_second_model():
    # Define and create a simple Conv2D model
    n = 8
    input_tensor = Input(INPUT_SHAPE[1:])
    x = Convolution2D(1024, 3, 3)(input_tensor)
    x = Convolution2D(2048, 3, 3)(x)

    list_covs = SeparateConvolutionFeatures(n)(x)
    list_covs = Regrouping(None)(list_covs)
    list_outputs = []
    for cov in list_covs:
        cov = SecondaryStatistic()(cov)
        cov = O2Transform(100)(cov)
        cov = O2Transform(100)(cov)
        list_outputs.append(WeightedVectorization(10)(cov))

    x = merge(list_outputs, mode='concat')
    x = Dense(10)(x)

    model = Model(input_tensor, x)

    model.compile(optimizer='sgd', loss='categorical_crossentropy')
    model.summary()
    return model
Beispiel #30
0
def test_multi_input_layer():
    ####################################################
    # test multi-input layer
    a = Input(shape=(32,), name='input_a')
    b = Input(shape=(32,), name='input_b')

    dense = Dense(16, name='dense_1')
    a_2 = dense(a)
    b_2 = dense(b)

    merged = merge([a_2, b_2], mode='concat', name='merge')
    assert merged._keras_shape == (None, 16 * 2)
    merge_layer, merge_node_index, merge_tensor_index = merged._keras_history

    assert merge_node_index == 0
    assert merge_tensor_index == 0

    assert len(merge_layer.inbound_nodes) == 1
    assert len(merge_layer.outbound_nodes) == 0

    assert len(merge_layer.inbound_nodes[0].input_tensors) == 2
    assert len(merge_layer.inbound_nodes[0].inbound_layers) == 2

    c = Dense(64, name='dense_2')(merged)
    d = Dense(5, name='dense_3')(c)

    model = Model(input=[a, b], output=[c, d], name='model')
    assert len(model.layers) == 6
    print('model.input_layers:', model.input_layers)
    print('model.input_layers_node_indices:', model.input_layers_node_indices)
    print('model.input_layers_tensor_indices:', model.input_layers_tensor_indices)
    print('model.output_layers', model.output_layers)

    print('output_shape:', model.get_output_shape_for([(None, 32), (None, 32)]))
    assert model.get_output_shape_for([(None, 32), (None, 32)]) == [(None, 64), (None, 5)]

    assert model.compute_mask([a, b], [None, None]) == [None, None]

    print('output_shape:', model.get_output_shape_for([(None, 32), (None, 32)]))
    assert model.get_output_shape_for([(None, 32), (None, 32)]) == [(None, 64), (None, 5)]

    # we don't check names of first 2 layers (inputs) because
    # ordering of same-level layers is not fixed
    print('layers:', [layer.name for layer in model.layers])
    assert [l.name for l in model.layers][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3']
    print('input_layers:', [l.name for l in model.input_layers])
    assert [l.name for l in model.input_layers] == ['input_a', 'input_b']
    print('output_layers:', [l.name for l in model.output_layers])
    assert [l.name for l in model.output_layers] == ['dense_2', 'dense_3']

    # actually run model
    fn = K.function(model.inputs, model.outputs)
    input_a_np = np.random.random((10, 32))
    input_b_np = np.random.random((10, 32))
    fn_outputs = fn([input_a_np, input_b_np])
    assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]

    # test get_source_inputs
    print(get_source_inputs(c))
    assert get_source_inputs(c) == [a, b]

    # serialization / deserialization
    json_config = model.to_json()
    recreated_model = model_from_json(json_config)
    recreated_model.compile('rmsprop', 'mse')

    print('recreated:')
    print([layer.name for layer in recreated_model.layers])
    print([layer.name for layer in recreated_model.input_layers])
    print([layer.name for layer in recreated_model.output_layers])
    assert [l.name for l in recreated_model.layers][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3']
    assert [l.name for l in recreated_model.input_layers] == ['input_a', 'input_b']
    assert [l.name for l in recreated_model.output_layers] == ['dense_2', 'dense_3']

    fn = K.function(recreated_model.inputs, recreated_model.outputs)
    input_a_np = np.random.random((10, 32))
    input_b_np = np.random.random((10, 32))
    fn_outputs = fn([input_a_np, input_b_np])
    assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
def target_RNN(wv, tweet_max_length, aspect_max_length, classes=2, **kwargs):
    ######################################################
    # HyperParameters
    ######################################################
    noise = kwargs.get("noise", 0)
    trainable = kwargs.get("trainable", False)
    rnn_size = kwargs.get("rnn_size", 75)
    rnn_type = kwargs.get("rnn_type", LSTM)
    final_size = kwargs.get("final_size", 100)
    final_type = kwargs.get("final_type", "linear")
    use_final = kwargs.get("use_final", False)
    drop_text_input = kwargs.get("drop_text_input", 0.)
    drop_text_rnn = kwargs.get("drop_text_rnn", 0.)
    drop_text_rnn_U = kwargs.get("drop_text_rnn_U", 0.)
    drop_target_rnn = kwargs.get("drop_target_rnn", 0.)
    drop_rep = kwargs.get("drop_rep", 0.)
    drop_final = kwargs.get("drop_final", 0.)
    activity_l2 = kwargs.get("activity_l2", 0.)
    clipnorm = kwargs.get("clipnorm", 5)
    bi = kwargs.get("bi", False)
    lr = kwargs.get("lr", 0.001)

    attention = kwargs.get("attention", "simple")
    #####################################################
    shared_RNN = get_RNN(rnn_type,
                         rnn_size,
                         bi=bi,
                         return_sequences=True,
                         dropout_U=drop_text_rnn_U)

    input_tweet = Input(shape=[tweet_max_length], dtype='int32')
    input_aspect = Input(shape=[aspect_max_length], dtype='int32')

    # Embeddings
    tweets_emb = embeddings_layer(max_length=tweet_max_length,
                                  embeddings=wv,
                                  trainable=trainable,
                                  masking=True)(input_tweet)
    tweets_emb = GaussianNoise(noise)(tweets_emb)
    tweets_emb = Dropout(drop_text_input)(tweets_emb)

    aspects_emb = embeddings_layer(max_length=aspect_max_length,
                                   embeddings=wv,
                                   trainable=trainable,
                                   masking=True)(input_aspect)
    aspects_emb = GaussianNoise(noise)(aspects_emb)

    # Recurrent NN
    h_tweets = shared_RNN(tweets_emb)
    h_tweets = Dropout(drop_text_rnn)(h_tweets)

    h_aspects = shared_RNN(aspects_emb)
    h_aspects = Dropout(drop_target_rnn)(h_aspects)
    h_aspects = MeanOverTime()(h_aspects)
    h_aspects = RepeatVector(tweet_max_length)(h_aspects)

    # Merge of Aspect + Tweet
    representation = merge([h_tweets, h_aspects], mode='concat')

    # apply attention over the hidden outputs of the RNN's
    att_layer = AttentionWithContext if attention == "context" else Attention
    representation = att_layer()(representation)
    representation = Dropout(drop_rep)(representation)

    if use_final:
        if final_type == "maxout":
            representation = MaxoutDense(final_size)(representation)
        else:
            representation = Dense(final_size,
                                   activation=final_type)(representation)
        representation = Dropout(drop_final)(representation)

    ######################################################
    # Probabilities
    ######################################################
    probabilities = Dense(1 if classes == 2 else classes,
                          activation="sigmoid" if classes == 2 else "softmax",
                          activity_regularizer=l2(activity_l2))(representation)

    model = Model(input=[input_aspect, input_tweet], output=probabilities)

    loss = "binary_crossentropy" if classes == 2 else "categorical_crossentropy"
    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss=loss)
    return model
def siamese_RNN(wv, sent_length, **params):
    rnn_size = params.get("rnn_size", 100)
    rnn_drop_U = params.get("rnn_drop_U", 0.2)
    noise_words = params.get("noise_words", 0.3)
    drop_words = params.get("drop_words", 0.2)
    drop_sent = params.get("drop_sent", 0.3)
    sent_dense = params.get("sent_dense", 50)
    final_size = params.get("final_size", 100)
    drop_final = params.get("drop_final", 0.5)

    ###################################################
    # Shared Layers
    ###################################################
    embedding = embeddings_layer(max_length=sent_length,
                                 embeddings=wv,
                                 masking=True)
    encoder = get_RNN(LSTM,
                      rnn_size,
                      bi=False,
                      return_sequences=True,
                      dropout_U=rnn_drop_U)
    attention = Attention()
    sent_dense = Dense(sent_dense, activation="relu")

    ###################################################
    # Input A
    ###################################################
    input_a = Input(shape=[sent_length], dtype='int32')
    # embed sentence A
    emb_a = embedding(input_a)
    emb_a = GaussianNoise(noise_words)(emb_a)
    emb_a = Dropout(drop_words)(emb_a)
    # encode sentence A
    enc_a = encoder(emb_a)
    enc_a = Dropout(drop_sent)(enc_a)
    enc_a = attention(enc_a)
    enc_a = sent_dense(enc_a)
    enc_a = Dropout(drop_sent)(enc_a)

    ###################################################
    # Input B
    ###################################################
    input_b = Input(shape=[sent_length], dtype='int32')
    # embed sentence B
    emb_b = embedding(input_b)
    emb_b = GaussianNoise(noise_words)(emb_b)
    emb_b = Dropout(drop_words)(emb_b)
    # encode sentence B
    enc_b = encoder(emb_b)
    enc_b = Dropout(drop_sent)(enc_b)
    enc_b = attention(enc_b)
    enc_b = sent_dense(enc_b)
    enc_b = Dropout(drop_sent)(enc_b)

    ###################################################
    # Comparison
    ###################################################
    comparison = merge([enc_a, enc_b], mode='concat')
    comparison = MaxoutDense(final_size)(comparison)
    comparison = Dropout(drop_final)(comparison)

    probabilities = Dense(1, activation='sigmoid')(comparison)
    model = Model(input=[input_a, input_b], output=probabilities)

    model.compile(optimizer=Adam(clipnorm=1., lr=0.001),
                  loss='binary_crossentropy',
                  metrics=["binary_accuracy"])
    return model
def aspect_RNN(wv, text_length, target_length, loss, activation, **kwargs):
    ######################################################
    # HyperParameters
    ######################################################
    noise = kwargs.get("noise", 0)
    trainable = kwargs.get("trainable", False)
    rnn_size = kwargs.get("rnn_size", 75)
    rnn_type = kwargs.get("rnn_type", LSTM)
    final_size = kwargs.get("final_size", 100)
    final_type = kwargs.get("final_type", "linear")
    use_final = kwargs.get("use_final", False)
    drop_text_input = kwargs.get("drop_text_input", 0.)
    drop_text_rnn = kwargs.get("drop_text_rnn", 0.)
    drop_text_rnn_U = kwargs.get("drop_text_rnn_U", 0.)
    drop_target_rnn = kwargs.get("drop_target_rnn", 0.)
    drop_rep = kwargs.get("drop_rep", 0.)
    drop_final = kwargs.get("drop_final", 0.)
    activity_l2 = kwargs.get("activity_l2", 0.)
    clipnorm = kwargs.get("clipnorm", 5)
    bi = kwargs.get("bi", False)
    lr = kwargs.get("lr", 0.001)

    attention = kwargs.get("attention", "simple")
    #####################################################

    shared_RNN = get_RNN(rnn_type,
                         rnn_size,
                         bi=bi,
                         return_sequences=True,
                         dropout_U=drop_text_rnn_U)
    # shared_RNN = LSTM(rnn_size, return_sequences=True, dropout_U=drop_text_rnn_U)

    input_text = Input(shape=[text_length], dtype='int32')
    input_target = Input(shape=[target_length], dtype='int32')

    ######################################################
    # Embeddings
    ######################################################
    emb_text = embeddings_layer(max_length=text_length,
                                embeddings=wv,
                                trainable=trainable,
                                masking=True)(input_text)
    emb_text = GaussianNoise(noise)(emb_text)
    emb_text = Dropout(drop_text_input)(emb_text)

    emb_target = embeddings_layer(max_length=target_length,
                                  embeddings=wv,
                                  trainable=trainable,
                                  masking=True)(input_target)
    emb_target = GaussianNoise(noise)(emb_target)

    ######################################################
    # RNN - Tweet
    ######################################################
    enc_text = shared_RNN(emb_text)
    enc_text = Dropout(drop_text_rnn)(enc_text)

    ######################################################
    # RNN - Aspect
    ######################################################
    enc_target = shared_RNN(emb_target)
    enc_target = MeanOverTime()(enc_target)
    enc_target = Dropout(drop_target_rnn)(enc_target)
    enc_target = RepeatVector(text_length)(enc_target)

    ######################################################
    # Merge of Aspect + Tweet
    ######################################################
    representation = merge([enc_text, enc_target], mode='concat')
    att_layer = AttentionWithContext if attention == "context" else Attention
    representation = att_layer()(representation)
    representation = Dropout(drop_rep)(representation)

    if use_final:
        if final_type == "maxout":
            representation = MaxoutDense(final_size)(representation)
        else:
            representation = Dense(final_size,
                                   activation=final_type)(representation)
        representation = Dropout(drop_final)(representation)

    ######################################################
    # Probabilities
    ######################################################
    probabilities = Dense(1,
                          activation=activation,
                          activity_regularizer=l2(activity_l2))(representation)

    model = Model(input=[input_target, input_text], output=probabilities)
    # model = Model(input=[input_text, input_target], output=probabilities)
    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss=loss)
    return model
Beispiel #34
0
def test_functional_guide():
    # MNIST
    from keras.layers import Input, Dense, LSTM
    from keras.models import Model
    from keras.utils import np_utils

    # this returns a tensor
    inputs = Input(shape=(784,))

    # a layer instance is callable on a tensor, and returns a tensor
    x = Dense(64, activation='relu')(inputs)
    x = Dense(64, activation='relu')(x)
    predictions = Dense(10, activation='softmax')(x)

    # this creates a model that includes
    # the Input layer and three Dense layers
    model = Model(input=inputs, output=predictions)
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # the data, shuffled and split between tran and test sets
    X_train = np.random.random((100, 784))
    Y_train = np.random.random((100, 10))

    model.fit(X_train, Y_train, nb_epoch=2, batch_size=128)

    assert model.inputs == [inputs]
    assert model.outputs == [predictions]
    assert model.input == inputs
    assert model.output == predictions
    assert model.input_shape == (None, 784)
    assert model.output_shape == (None, 10)

    # try calling the sequential model
    inputs = Input(shape=(784,))
    new_outputs = model(inputs)
    new_model = Model(input=inputs, output=new_outputs)
    new_model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    ##################################################
    # multi-io
    ##################################################
    tweet_a = Input(shape=(4, 25))
    tweet_b = Input(shape=(4, 25))
    # this layer can take as input a matrix
    # and will return a vector of size 64
    shared_lstm = LSTM(64)

    # when we reuse the same layer instance
    # multiple times, the weights of the layer
    # are also being reused
    # (it is effectively *the same* layer)
    encoded_a = shared_lstm(tweet_a)
    encoded_b = shared_lstm(tweet_b)

    # we can then concatenate the two vectors:
    merged_vector = merge([encoded_a, encoded_b],
                          mode='concat', concat_axis=-1)

    # and add a logistic regression on top
    predictions = Dense(1, activation='sigmoid')(merged_vector)

    # we define a trainable model linking the
    # tweet inputs to the predictions
    model = Model(input=[tweet_a, tweet_b], output=predictions)

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    data_a = np.random.random((1000, 4, 25))
    data_b = np.random.random((1000, 4, 25))
    labels = np.random.random((1000,))
    model.fit([data_a, data_b], labels, nb_epoch=1)

    model.summary()
    assert model.inputs == [tweet_a, tweet_b]
    assert model.outputs == [predictions]
    assert model.input == [tweet_a, tweet_b]
    assert model.output == predictions

    assert model.output == predictions
    assert model.input_shape == [(None, 4, 25), (None, 4, 25)]
    assert model.output_shape == (None, 1)

    assert shared_lstm.get_output_at(0) == encoded_a
    assert shared_lstm.get_output_at(1) == encoded_b
    assert shared_lstm.input_shape == (None, 4, 25)
Beispiel #35
0
from __future__ import absolute_import
import keras.backend as K
from keras.engine import merge
from ..layers import PassThrough
from . import loggers

def normalize_mask(x, mask):
    '''Keep the mask align wtih the tensor x

    Arguments: x is a data tensor; mask is a binary tensor
    Rationale: keep mask at same dimensionality as x, but only with a length-1 
               trailing dimension. This ensures broadcastability, which is important
               because inferring shapes is hard and shapes are easy to get wrong. 
    '''
    mask = K.cast(mask, K.floatx())
    while K.ndim(mask) != K.ndim(x):
        if K.ndim(mask) > K.ndim(x):
            mask = K.any(mask, axis=-1)
        elif K.ndim(mask) < K.ndim(x):
            mask = K.expand_dims(mask)
    return K.any(mask, axis=-1, keepdims=True)


concat = lambda x: merge(x, mode='concat')
def xor(a,b, v=None):
    return (a is not v and b is v) or (a is v and b is not v)
Beispiel #36
0
characters.append(EOS)
int2char = list(characters)
char2int = {c:i for i,c in enumerate(characters)}
print(char2int)

VOCAB_SIZE = len(characters)

input_seq = Input(shape=(None,), dtype='int32')

embedded = Embedding(VOCAB_SIZE, voc_dim, name='embd')(input_seq)
#drop_out = Dropout(0.1, name='d_o')(embedded)

forward = LSTM(middle_dim, return_sequences=True, consume_less='mem', name='fwd')(embedded)
backward = LSTM(middle_dim, return_sequences=True, go_backwards=True, name='bwd')(embedded)

sum_res = merge([forward, backward], mode='sum', name='mrg')

repeat = RepeatTimeDistributedVector(max_out, name='RTD')(sum_res)

alstm = ALSTM(voc_dim, return_sequences=True, name='ALSTM')(repeat)

dense = TimeDistributed(Dense(VOCAB_SIZE, name='d_t_d'), name='t_d1')(alstm)

out = TimeDistributed(HierarchicalSoftmax(levels=2, name='HSM'), name='t_d2')(dense)

model = Model(input_seq,out)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

sentence = "May the force be with you"
sentence = [EOS] + list(sentence) + [EOS]
                                            nb_filters=nb_filters,
                                            block=4 * blocks_per_group + i,
                                            nb_total_blocks=nb_total_blocks,
                                            subsample_factor=subsample_factor)

    f16 = x
    f16 = UpSampling2D(size=(16, 16), dim_ordering='tf')(f16)
    f16 = Convolution2D(16,
                        3,
                        3,
                        init='he_uniform',
                        border_mode='same',
                        activation='relu',
                        dim_ordering='tf')(f16)

    segmentation = merge([f16, f8, f4, f2], mode='concat', concat_axis=-1)
    segmentation = Convolution2D(1,
                                 1,
                                 1,
                                 activation='sigmoid',
                                 init='uniform',
                                 border_mode='same',
                                 dim_ordering='tf')(segmentation)
    segmentation = Reshape((img_rows_segment, img_cols_segment))(segmentation)

    model_segment = Model(input=images_segment, output=segmentation)

    model_segment.summary()

    print('')
    print('model init time: {}'.format(time.time() - start_time))
Beispiel #38
0
voc_size = 35000
voc_dim = 100
middle_dim = 200
max_out = 10


input_seq = Input(shape=(None,), dtype='int32')

embedded = Embedding(voc_size, voc_dim)(input_seq)
drop_out = Dropout(0.1)(embedded)

forward = LSTM(middle_dim, return_sequences=True, consume_less='mem')(drop_out)
backward = LSTM(middle_dim, return_sequences=True, go_backwards=True)(drop_out)

sum_res = merge([forward, backward], mode='sum')

repeat = RepeatTimeDistributedVector(max_out)(sum_res)

alstm = ALSTM(voc_dim, return_sequences=True)(repeat)

dense = TimeDistributed(Dense(voc_size))(alstm)

out = TimeDistributed(HierarchicalSoftmax(levels=3))(dense)

model = Model(input_seq,out)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')


Beispiel #39
0
def build_model(spectral_input_size, temporal_input_size,
                spectral_n_feature, temporal_n_feature,
                conv_layers, dense_layers, init,
                learning_rate, optimizer, pooling, dropout, atrous,
                regularizer_conf, temporal, objective, penalty,
                activation, last_layer, batch_size,
                n_batch_per_file, lstm_dropout):
    """Build the model"""

    spectral_input = Input(
        batch_shape=(batch_size, spectral_input_size,
                     spectral_n_feature),
        name='spectral_input')
    temporal_input = Input(
        batch_shape=(batch_size, temporal_input_size,
                     temporal_n_feature),
        name='temporal_input')
    pre_temporal_input = Input(
        batch_shape=(batch_size, temporal_input_size,
                     temporal_n_feature),
        name='pre_temporal_input')
    n_conv = len(conv_layers)
    n_dense = len(dense_layers)

    nb_kernel = conv_layers[0][0]
    he_kernel = conv_layers[0][1]
    regularizer = None
    if regularizer_conf['name'] == 'l1':
        regularizer = l1(l=regularizer_conf['value'])
    elif regularizer_conf['name'] == 'l2':
        regularizer = l2(l=regularizer_conf['value'])

    # spectral_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid',
    #                        dilation_rate=1, activation=activation,
    #                        use_bias=True,
    #                        kernel_initializer=init,
    #                        bias_initializer='zeros',
    #                        kernel_regularizer=regularizer,
    #                        bias_regularizer=None,
    #                        activity_regularizer=None,
    #                        kernel_constraint=None,
    #                        bias_constraint=None)(spectral_input)
    spectral_conv = Convolution1D(nb_kernel, he_kernel,
                                  border_mode='valid',
                           activation=activation,
                           bias=True,
                                  init=init,
                                  W_regularizer=regularizer)(spectral_input)
    # spectral_conv = MaxPooling1D(pool_length=2)(spectral_conv)
    spectral_conv = Dropout(dropout)(spectral_conv)
    # spectral_conv = MaxPooling1D(pool_size=2, strides=2,
    #                              padding='valid')(spectral_conv)
    # temporal_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid',
    #                        dilation_rate=1, activation=activation,
    #                        use_bias=True,
    #                        kernel_initializer=init,
    #                        bias_initializer='zeros',
    #                        kernel_regularizer=regularizer,
    #                        bias_regularizer=None,
    #                        activity_regularizer=None,
    #                        kernel_constraint=None,
    #                        bias_constraint=None)(temporal_input)
    temporal_conv = Convolution1D(nb_kernel, he_kernel,
                                  border_mode='valid',
                           activation=activation,
                           bias=True,
                                  init=init,
                                  W_regularizer=regularizer)(temporal_input)
    
    # temporal_conv = MaxPooling1D(pool_size=2, strides=2,
    #                              padding='valid')(temporal_conv)
    # temporal_conv = MaxPooling1D(pool_length=2)(temporal_conv)
    temporal_conv = Dropout(dropout)(temporal_conv)
    pre_temporal_conv = Convolution1D(nb_kernel, he_kernel,
                                  border_mode='valid',
                                  activation=activation,
                                  bias=True,
                                  init=init,
                                  W_regularizer=regularizer)(pre_temporal_input)
    # pre_temporal_conv = MaxPooling1D(pool_length=2)(pre_temporal_conv)
    pre_temporal_conv = Dropout(dropout)(pre_temporal_conv)
    for i in range(1, n_conv):
        nb_kernel = conv_layers[i][0]
        he_kernel = conv_layers[i][1]
        regularizer = None
        if regularizer_conf['name'] == 'l1':
            regularizer = l1(l=regularizer_conf['value'])
        elif regularizer_conf['name'] == 'l2':
            regularizer = l2(l=regularizer_conf['value'])
        # spectral_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid',
        #                    dilation_rate=1, activation=activation,
        #                    use_bias=True,
        #                    kernel_initializer=init,
        #                    bias_initializer='zeros',
        #                    kernel_regularizer=None,
        #                    bias_regularizer=None,
        #                    activity_regularizer=regularizer,
        #                    kernel_constraint=None,
        #                    bias_constraint=None)(spectral_conv)
        spectral_conv = Convolution1D(nb_kernel, he_kernel,
                                  border_mode='valid',
                           activation=activation,
                           bias=True,
                                  init=init,
                                  W_regularizer=None)(
            spectral_conv)
        spectral_conv = Dropout(dropout)(spectral_conv)
        # temporal_conv = Conv1D(nb_kernel, he_kernel, strides=1, padding='valid',
        #                    dilation_rate=1, activation=activation,
        #                    use_bias=True,
        #                    kernel_initializer=init,
        #                    bias_initializer='zeros',
        #                    kernel_regularizer=None,
        #                    bias_regularizer=None,
        #                    activity_regularizer=regularizer,
        #                    kernel_constraint=None,
        #                    bias_constraint=None)(temporal_conv)
        temporal_conv = Convolution1D(nb_kernel, he_kernel,
                                  border_mode='valid',
                           activation=activation,
                           bias=True,
                                  init=init,
                                  W_regularizer=None)(
            temporal_conv)
        # temporal_conv = MaxPooling1D(pool_size=2, strides=2,
        #                              padding='valid')(temporal_conv)
        # if i < 3:
        #     temporal_conv = MaxPooling1D(pool_length=2)(temporal_conv)
        temporal_conv = Dropout(dropout)(temporal_conv)
        
        pre_temporal_conv = Convolution1D(nb_kernel, he_kernel,
                                      border_mode='valid',
                                      activation=activation,
                                      bias=True,
                                      init=init,
                                      W_regularizer=None)(
            pre_temporal_conv)
        # if i < 3:
        #     pre_temporal_conv = MaxPooling1D(pool_length=2)(pre_temporal_conv)
        pre_temporal_conv = Dropout(dropout)(pre_temporal_conv)
    
    # spectral_conv = GRU(dense_layers[0], activation='tanh',
    #                     recurrent_activation='hard_sigmoid',
    #                     use_bias=True,
    #                     kernel_initializer='glorot_uniform',
    #                     recurrent_initializer='orthogonal',
    #                     bias_initializer='zeros',
    #                     kernel_regularizer=None,
    #                     recurrent_regularizer=None,
    #                     bias_regularizer=None,
    #                     activity_regularizer=None,
    #                     kernel_constraint=None,
    #                     recurrent_constraint=None,
    #                     bias_constraint=None,
    #                     dropout=0.0,
    #                     stateful=True,
    #                     implementation=0,
    #                     recurrent_dropout=lstm_dropout)(spectral_conv)
    # temporal_conv = GRU(dense_layers[0], activation='tanh',
    #                     recurrent_activation='hard_sigmoid',
    #                     use_bias=True,
    #                     kernel_initializer='glorot_uniform',
    #                     recurrent_initializer='orthogonal',
    #                     bias_initializer='zeros',
    #                     kernel_regularizer=None,
    #                     recurrent_regularizer=None,
    #                     bias_regularizer=None,
    #                     activity_regularizer=None,
    #                     kernel_constraint=None,
    #                     recurrent_constraint=None,
    #                     bias_constraint=None,
    #                     dropout=0.0,
    #                     stateful=True,
    #                     implementation=0,
    #                     recurrent_dropout=lstm_dropout)(temporal_conv)
    spectral_conv = Flatten()(spectral_conv)
    temporal_conv = Flatten()(temporal_conv)
    pre_temporal_conv = Flatten()(pre_temporal_conv)
    # merged_conv = concatenate([spectral_conv, temporal_conv])
    merged_conv = merge([spectral_conv, temporal_conv, pre_temporal_conv],
                        mode='concat')

    for i in range(0, n_dense):
        regularizer = None
        if regularizer_conf['name'] == 'l1':
            regularizer = l1(l=regularizer_conf['value'])
        elif regularizer_conf['name'] == 'l2':
            regularizer = l2(l=regularizer_conf['value'])
        
        # merged_conv = Dense(dense_layers[i],
        #                     activation=activation, kernel_initializer=init,
        #                     activity_regularizer=None, use_bias=True,
        #                     bias_initializer='zeros',
        #                     kernel_regularizer=regularizer,
        #                     bias_regularizer=None,
        #                     kernel_constraint=None, bias_constraint=None
        #                     )(merged_conv)
        merged_conv = Dense(dense_layers[i],
                            activation=activation,
                            init=init, W_regularizer=l2(l=1e-02))(
            merged_conv)
        merged_conv = Dropout(lstm_dropout)(merged_conv)
        
    last_regularizer = None
    if last_layer['regularization']['name'] == 'l1':
        last_regularizer = l1(l=last_layer['regularization']['value'])
    elif last_layer['regularization']['name'] == 'l2':
        last_regularizer = l2(l=last_layer['regularization']['value'])
    # output = Dense(1, activation=last_layer['activation'],
    #                kernel_initializer=init,
    #                activity_regularizer=None, use_bias=True,
    #                bias_initializer='zeros',
    #                kernel_regularizer=last_regularizer,
    #                bias_regularizer=None,
    #                kernel_constraint=None, bias_constraint=None,
    #                name='output')(merged_conv)
    output = Dense(1, activation=last_layer['activation'],
                   init=init,
                   W_regularizer=last_regularizer,name='output')(merged_conv)

    # model = Model(inputs=[spectral_input, temporal_input],
    #               outputs=[output])
    model = Model(input=[spectral_input, temporal_input,
                         pre_temporal_input],
                  output=output)
    
    
    compile_model(model, objective, penalty, learning_rate, optimizer)


    model_structure = ''
    print(model.summary())

    return [model_structure, model]
Beispiel #40
0
def test_recursion():
    ####################################################
    # test recursion

    a = Input(shape=(32,), name='input_a')
    b = Input(shape=(32,), name='input_b')

    dense = Dense(16, name='dense_1')
    a_2 = dense(a)
    b_2 = dense(b)
    merged = merge([a_2, b_2], mode='concat', name='merge')
    c = Dense(64, name='dense_2')(merged)
    d = Dense(5, name='dense_3')(c)

    model = Model(input=[a, b], output=[c, d], name='model')

    e = Input(shape=(32,), name='input_e')
    f = Input(shape=(32,), name='input_f')
    g, h = model([e, f])

    # g2, h2 = model([e, f])

    assert g._keras_shape == c._keras_shape
    assert h._keras_shape == d._keras_shape

    # test separate manipulation of different layer outputs
    i = Dense(7, name='dense_4')(h)

    final_model = Model(input=[e, f], output=[i, g], name='final')
    assert len(final_model.inputs) == 2
    assert len(final_model.outputs) == 2
    assert len(final_model.layers) == 4

    # we don't check names of first 2 layers (inputs) because
    # ordering of same-level layers is not fixed
    print('final_model layers:', [layer.name for layer in final_model.layers])
    assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4']

    print(model.compute_mask([e, f], [None, None]))
    assert model.compute_mask([e, f], [None, None]) == [None, None]

    print(final_model.get_output_shape_for([(10, 32), (10, 32)]))
    assert final_model.get_output_shape_for([(10, 32), (10, 32)]) == [(10, 7), (10, 64)]

    # run recursive model
    fn = K.function(final_model.inputs, final_model.outputs)
    input_a_np = np.random.random((10, 32))
    input_b_np = np.random.random((10, 32))
    fn_outputs = fn([input_a_np, input_b_np])
    assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)]

    # test serialization
    model_config = final_model.get_config()
    print(json.dumps(model_config, indent=4))
    recreated_model = Model.from_config(model_config)

    fn = K.function(recreated_model.inputs, recreated_model.outputs)
    input_a_np = np.random.random((10, 32))
    input_b_np = np.random.random((10, 32))
    fn_outputs = fn([input_a_np, input_b_np])
    assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)]

    ####################################################
    # test multi-input multi-output

    j = Input(shape=(32,), name='input_j')
    k = Input(shape=(32,), name='input_k')
    m, n = model([j, k])

    o = Input(shape=(32,), name='input_o')
    p = Input(shape=(32,), name='input_p')
    q, r = model([o, p])

    assert n._keras_shape == (None, 5)
    assert q._keras_shape == (None, 64)
    s = merge([n, q], mode='concat', name='merge_nq')
    assert s._keras_shape == (None, 64 + 5)

    # test with single output as 1-elem list
    multi_io_model = Model([j, k, o, p], [s])

    fn = K.function(multi_io_model.inputs, multi_io_model.outputs)
    fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)),
                     np.random.random((10, 32)), np.random.random((10, 32))])
    assert [x.shape for x in fn_outputs] == [(10, 69)]

    # test with single output as tensor
    multi_io_model = Model([j, k, o, p], s)

    fn = K.function(multi_io_model.inputs, multi_io_model.outputs)
    fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)),
                     np.random.random((10, 32)), np.random.random((10, 32))])
    # note that the output of the K.function will still be a 1-elem list
    assert [x.shape for x in fn_outputs] == [(10, 69)]

    # test serialization
    print('multi_io_model.layers:', multi_io_model.layers)
    print('len(model.inbound_nodes):', len(model.inbound_nodes))
    print('len(model.outbound_nodes):', len(model.outbound_nodes))
    model_config = multi_io_model.get_config()
    print(model_config)
    print(json.dumps(model_config, indent=4))
    recreated_model = Model.from_config(model_config)

    fn = K.function(recreated_model.inputs, recreated_model.outputs)
    fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)),
                     np.random.random((10, 32)), np.random.random((10, 32))])
    # note that the output of the K.function will still be a 1-elem list
    assert [x.shape for x in fn_outputs] == [(10, 69)]

    config = model.get_config()
    new_model = Model.from_config(config)

    model.summary()
    json_str = model.to_json()
    new_model = model_from_json(json_str)

    yaml_str = model.to_yaml()
    new_model = model_from_yaml(yaml_str)

    ####################################################
    # test invalid graphs

    # input is not an Input tensor
    j = Input(shape=(32,), name='input_j')
    j = Dense(32)(j)
    k = Input(shape=(32,), name='input_k')
    m, n = model([j, k])

    with pytest.raises(Exception):
        invalid_model = Model([j, k], [m, n])

    # disconnected graph
    j = Input(shape=(32,), name='input_j')
    k = Input(shape=(32,), name='input_k')
    m, n = model([j, k])
    with pytest.raises(Exception) as e:
        invalid_model = Model([j], [m, n])

    # redudant outputs
    j = Input(shape=(32,), name='input_j')
    k = Input(shape=(32,), name='input_k')
    m, n = model([j, k])
    # this should work lol
    # TODO: raise a warning
    invalid_model = Model([j, k], [m, n, n])

    # redundant inputs
    j = Input(shape=(32,), name='input_j')
    k = Input(shape=(32,), name='input_k')
    m, n = model([j, k])
    with pytest.raises(Exception):
        invalid_model = Model([j, k, j], [m, n])

    # i have not idea what I'm doing: garbage as inputs/outputs
    j = Input(shape=(32,), name='input_j')
    k = Input(shape=(32,), name='input_k')
    m, n = model([j, k])
    with pytest.raises(Exception):
        invalid_model = Model([j, k], [m, n, 0])

    ####################################################
    # test calling layers/models on TF tensors

    if K._BACKEND == 'tensorflow':
        import tensorflow as tf
        j = Input(shape=(32,), name='input_j')
        k = Input(shape=(32,), name='input_k')
        m, n = model([j, k])
        tf_model = Model([j, k], [m, n])

        # magic
        j_tf = tf.placeholder(dtype=K.floatx())
        k_tf = tf.placeholder(dtype=K.floatx())
        m_tf, n_tf = tf_model([j_tf, k_tf])
        assert not hasattr(m_tf, '_keras_shape')
        assert not hasattr(n_tf, '_keras_shape')
        assert K.int_shape(m_tf) == (None, 64)
        assert K.int_shape(n_tf) == (None, 5)

        # test merge
        o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1)

        # test tensor input
        x = tf.placeholder(shape=(None, 2), dtype=K.floatx())
        input_layer = InputLayer(input_tensor=x)

        x = Input(tensor=x)
        y = Dense(2)(x)