Ejemplo n.º 1
0
    def residual_block(x):
        original_x = x
        # TODO: initalization, regularization?
        # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet.
        tanh_out = CausalAtrousConvolution1D(nb_filters,
                                             2,
                                             atrous_rate=2**i,
                                             border_mode='valid',
                                             causal=True,
                                             bias=use_bias,
                                             name='dilated_conv_%d_tanh_s%d' %
                                             (2**i, s),
                                             activation='tanh')(x)
        x = layers.Dropout(0.2)(x)
        sigm_out = CausalAtrousConvolution1D(nb_filters,
                                             2,
                                             atrous_rate=2**i,
                                             border_mode='valid',
                                             causal=True,
                                             bias=use_bias,
                                             name='dilated_conv_%d_sigm_s%d' %
                                             (2**i, s),
                                             activation='sigmoid')(x)
        x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' %
                         (i, s))([tanh_out, sigm_out])

        res_x = layers.Convolution1D(nb_filters,
                                     1,
                                     border_mode='same',
                                     bias=use_bias)(x)
        res_x = layers.Merge(mode='sum')([original_x, res_x])
        return res_x
Ejemplo n.º 2
0
    def residual_block(x):
        original_x = x
        # TODO: initalization, regularization?
        # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet.
        tanh_out = Conv1D(
            nb_filters,
            2,
            dilation_rate=2**i,
            padding='causal',
            bias=use_bias,
            name='dilated_conv_%d_tanh_s%d' % (2**i, s),
            activation='tanh',
            W_regularizer=l2(res_l2)
        )(x)
        sigm_out = Conv1D(
            nb_filters,
            2,
            dilation_rate=2**i,
            padding='causal',
            bias=use_bias,
            name='dilated_conv_%d_sigm_s%d' % (2**i, s),
            activation='sigmoid',
            W_regularizer=l2(res_l2)
        )(x)
        x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out])

        res_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x)
        skip_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x)
        res_x = layers.Merge(mode='sum')([original_x, res_x])
        return res_x, skip_x
Ejemplo n.º 3
0
 def call(self, inputs, **kwargs):
     assert isinstance(inputs, list) and len(inputs) == 2
     symbols, encodings = inputs[0], inputs[1]
     # dropout masks
     self._generate_dropout_mask(inputs[1])
     # contexts.shape = (M, T, left, input_dim)
     contexts = make_history(symbols, self.left, symbols[:, :1])
     # M.shape = C.shape = (M, T, left, output_dim)
     M = kb.dot(contexts, self.M)  # input embeddings
     C = kb.dot(contexts, self.C)  # output embeddings
     if self.use_bias:
         M += self.T
     if self.embeddings_dropout > 0.0:
         M = M * self._dropout_mask[0]
         C = C * self._dropout_mask[1]
     p = distributed_dot_softmax(M, encodings)
     compressed_context = distributed_transposed_dot(C, p)
     if self.merge in ["concatenate", "sum"]:
         output_func = (kl.Concatenate() if self.merge == "concatenate" else
                        kl.Merge(mode='sum'))
         output = output_func([compressed_context, encodings])
     elif self.merge == "attention":
         output = compressed_context
     elif self.merge == "sigmoid":
         output = distributed_cell([compressed_context, encodings])
     return [output, p]
Ejemplo n.º 4
0
    def buildModel(self):

        #word net
        input1=layers.Input(shape=(self.maxLen,),name="seq1")
        input2=layers.Input(shape=(self.maxLen,),name="seq2")

        comEmbedding=layers.Embedding(input_dim=self.Size_Vocab,output_dim=self.embeddingSize,input_length=self.maxLen)
        emb1=comEmbedding(input1)
        emb2=comEmbedding(input2)
        reshapeLayer=layers.Reshape(target_shape=(self.maxLen,self.embeddingSize,1))
        x1=reshapeLayer(emb1)
        x2=reshapeLayer(emb2)
        x=layers.Merge(mode="concat",concat_axis=2)([x1,x2])
        x=ResNetX(x)
        dropLayer=layers.Dropout(0.36)(x)
        predictionLayer=layers.Dense(units=2,name="label",activation="softmax")(dropLayer)
        self.model=models.Model(inputs=[input1,input2],
                                outputs=[
                                    predictionLayer,
                                ]
                                )

        self.model.compile(optimizer=optimizers.Adam(),
                      loss={
                          "label":losses.binary_crossentropy
                           }
                      )

        return self.model
Ejemplo n.º 5
0
def build_model():
    # As described in https://arxiv.org/abs/1511.02283
    # Input: The 4101-dim feature from extract_features, and the previous output word

    visual_input = models.Sequential()
    visual_input_shape = (None, IMAGE_FEATURE_SIZE)
    visual_input.add(layers.TimeDistributed(layers.Dense(
        WORDVEC_DIM,
        activation='relu',
        name='visual_embed'),
        input_shape=visual_input_shape))

    word_input = models.Sequential()
    word_input.add(layers.Embedding(VOCABULARY_SIZE, WORDVEC_DIM, dropout=.5))

    model = models.Sequential()
    model.add(layers.Merge([visual_input, word_input], mode='concat', concat_axis=2))

    model.add(layers.LSTM(1024, name='lstm_1', return_sequences=False))
    model.add(layers.Dropout(.5))

    model.add(layers.Dense(
        VOCABULARY_SIZE,
        activation='softmax',
        name='embed_out'))

    return model
Ejemplo n.º 6
0
def build_model(fragment_length, nb_filters, nb_output_bins, dilation_depth, nb_stacks, use_skip_connections,
                learn_all_outputs, _log, desired_sample_rate, use_bias, res_l2, final_l2):
    def residual_block(x):
        original_x = x
        # TODO: initalization, regularization?
        # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet.
        tanh_out = Conv1D(
            nb_filters,
            2,
            dilation_rate=2**i,
            padding='causal',
            bias=use_bias,
            name='dilated_conv_%d_tanh_s%d' % (2**i, s),
            activation='tanh',
            W_regularizer=l2(res_l2)
        )(x)
        sigm_out = Conv1D(
            nb_filters,
            2,
            dilation_rate=2**i,
            padding='causal',
            bias=use_bias,
            name='dilated_conv_%d_sigm_s%d' % (2**i, s),
            activation='sigmoid',
            W_regularizer=l2(res_l2)
        )(x)
        x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out])

        res_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x)
        skip_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x)
        res_x = layers.Merge(mode='sum')([original_x, res_x])
        return res_x, skip_x

    input = Input(shape=(fragment_length, nb_output_bins), name='input_part')
    out = input
    skip_connections = []
    out = Conv1D(nb_filters, 2, dilation_rate=1, padding='causal', name='initial_causal_conv')(out)
    for s in range(nb_stacks):
        for i in range(0, dilation_depth + 1):
            out, skip_out = residual_block(out)
            skip_connections.append(skip_out)

    if use_skip_connections:
        out = layers.Merge(mode='sum')(skip_connections)
    out = layers.Activation('relu')(out)
    out = layers.Conv1D(nb_output_bins, 1, padding='same', W_regularizer=l2(final_l2))(out)
    out = layers.Activation('relu')(out)
    out = layers.Conv1D(nb_output_bins, 1, padding='same')(out)

    if not learn_all_outputs:
        raise DeprecationWarning('Learning on just all outputs is wasteful, now learning only inside receptive field.')
        out = layers.Lambda(lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1],))(out)  # Based on gif in deepmind blog: take last output?

    out = layers.Activation('softmax', name="output_softmax")(out)
    model = Model(input, out)

    receptive_field, receptive_field_ms = compute_receptive_field()

    _log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms)))
    return model
Ejemplo n.º 7
0
 def test_merge_mul(self):
     z1 = ZLayer.InputLayer(input_shape=(3, 5))
     z2 = ZLayer.InputLayer(input_shape=(3, 5))
     zlayer = ZLayer.Merge(layers=[z1, z2], mode="mul")
     k1 = KLayer.InputLayer(input_shape=(3, 5))
     k2 = KLayer.InputLayer(input_shape=(3, 5))
     klayer = KLayer.Merge(layers=[k1, k2], mode="mul")
     input_data = [np.random.random([2, 3, 5]), np.random.random([2, 3, 5])]
     self.compare_layer(klayer, zlayer, input_data)
Ejemplo n.º 8
0
 def test_merge_concat(self):
     z1 = ZLayer.InputLayer(input_shape=(2, 5, 11))
     z2 = ZLayer.InputLayer(input_shape=(2, 5, 8))
     zlayer = ZLayer.Merge(layers=[z1, z2], mode="concat")
     k1 = KLayer.InputLayer(input_shape=(2, 5, 11))
     k2 = KLayer.InputLayer(input_shape=(2, 5, 8))
     klayer = KLayer.Merge(layers=[k1, k2], mode="concat")
     input_data = [np.random.random([3, 2, 5, 11]), np.random.random([3, 2, 5, 8])]
     self.compare_layer(klayer, zlayer, input_data)
Ejemplo n.º 9
0
 def test_merge_mul(self):
     b1 = BLayer.InputLayer(input_shape=(3, 5))
     b2 = BLayer.InputLayer(input_shape=(3, 5))
     blayer = BLayer.Merge(layers=[b1, b2], mode="mul")
     k1 = KLayer.InputLayer(input_shape=(3, 5))
     k2 = KLayer.InputLayer(input_shape=(3, 5))
     klayer = KLayer.Merge(layers=[k1, k2], mode="mul")
     input_data = [np.random.random([2, 3, 5]), np.random.random([2, 3, 5])]
     self.compare_newapi(klayer, blayer, input_data)
Ejemplo n.º 10
0
def model_ContextSum(p, embeddings, max_sent_len, n_out):
    print("Parameters:", p)

    # Take sentence encoded as indices and convert it to embeddings
    sentence_input = layers.Input(shape=(max_sent_len,), dtype='int32', name='sentence_input')
    # Repeat the input 3 times as will need it once for the target entity pair and twice for the ghost pairs
    x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input)
    word_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=embeddings.shape[1], input_dim=embeddings.shape[0],
                                                                input_length=max_sent_len, weights=[embeddings],
                                                                mask_zero=True, trainable=False))(x)
    word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings)

    # Take token markers that identify entity positions, convert to position embeddings
    entity_markers = layers.Input(shape=(MAX_EDGES_PER_GRAPH, max_sent_len,), dtype='int8', name='entity_markers')
    pos_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=p['position_emb'],
                                                         input_dim=4, input_length=max_sent_len,
                                                         mask_zero=True, W_regularizer = regularizers.l2(),
                                                         trainable=True))(entity_markers)

    # Merge word and position embeddings and apply the specified amount of RNN layers
    x = layers.merge([word_embeddings, pos_embeddings], mode="concat")
    for i in range(p["rnn1_layers"]-1):
        x = layers.wrappers.TimeDistributed(
            getattr(layers, p['rnn1'])(p['units1'], return_sequences=True,
                                       consume_less='gpu' if p['gpu'] else "cpu"))(x)
    sentence_matrix = layers.wrappers.TimeDistributed(
        getattr(layers, p['rnn1'])(p['units1'],
                                   return_sequences=False, consume_less='gpu' if p['gpu'] else "cpu"))(x)

    # Take the vector of the sentences with the target entity pair
    layers_to_concat = []
    for i in range(MAX_EDGES_PER_GRAPH):
        sentence_vector = layers.Lambda(lambda l: l[:, i], output_shape=(p['units1'],))(sentence_matrix)
        if i == 0:
            context_vectors = layers.Lambda(lambda l: l[:, i+1:], output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']))(sentence_matrix)
        elif i == MAX_EDGES_PER_GRAPH - 1:
            context_vectors = layers.Lambda(lambda l: l[:, :i], output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']))(sentence_matrix)
        else:
            context_vectors = layers.Lambda(lambda l: K.concatenate([l[:, :i], l[:, i+1:]], axis=1), output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']))(sentence_matrix)
        context_vector = GlobalSumPooling1D()(context_vectors)
        edge_vector = layers.merge([sentence_vector, context_vector], mode="concat")
        edge_vector = layers.Reshape((1, p['units1']*2))(edge_vector)
        layers_to_concat.append(edge_vector)
    # edge_vectors = layers.Lambda(lambda l: K.stack(l), output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']*2))(layers_to_concat)
    edge_vectors = layers.Merge(mode='concat', concat_axis=1)(layers_to_concat)

    # Apply softmax
    edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors)
    main_output = layers.wrappers.TimeDistributed(layers.Dense(n_out, activation = "softmax", name='main_output'))(edge_vectors)

    model = models.Model(input=[sentence_input, entity_markers], output=[main_output])
    model.compile(optimizer=p['optimizer'], loss='categorical_crossentropy', metrics=['accuracy'])

    return model
Ejemplo n.º 11
0
 def test_merge_max(self):
     b1 = BLayer.InputLayer(input_shape=(2, 5, 8))
     b2 = BLayer.InputLayer(input_shape=(2, 5, 8))
     blayer = BLayer.Merge(layers=[b1, b2], mode="max")
     k1 = KLayer.InputLayer(input_shape=(2, 5, 8))
     k2 = KLayer.InputLayer(input_shape=(2, 5, 8))
     klayer = KLayer.Merge(layers=[k1, k2], mode="max")
     input_data = [
         np.random.random([3, 2, 5, 8]),
         np.random.random([3, 2, 5, 8])
     ]
     self.compare_newapi(klayer, blayer, input_data)
Ejemplo n.º 12
0
def wavenet_block_light(x,
                        nb_filters,
                        subsample=2,
                        use_bias=False,
                        res_l2=0.,
                        dropout_rate=0.,
                        batchnorm=False,
                        bn_momentum=0.99,
                        **kwargs):
    """Conv block inspired by wavenet architecture.
    x : history of shape (batch_size, hist_length, nb_inputs, nb_features)
        x should be aranged in reverse time step, i.e latest obs is x[:,0,:,:]
    nb_filters : nb. of output features
    subsample : subsampling rate along time dimension
    use_bias : whether to use bias in conv layers
    res_l2 : l2 coef
    dropout_rate: spatial dropout rate
    batchnorm: use batchnorm if True
    bn_momentum: momentum coef for BatchNormalization
    """
    # TODO: Add padding in case time dimension not divisible by sub_sample
    dense = x

    if batchnorm:
        dense = kl.BatchNormalization(momentum=bn_momentum)(dense)
    dense = kl.Convolution2D(nb_filters,
                             nb_row=2,
                             nb_col=1,
                             subsample=(subsample, 1),
                             border_mode='valid',
                             bias=use_bias,
                             activation='relu',
                             W_regularizer=l2(res_l2))(dense)
    dense = SpatialDropout(dropout_rate, collapse_dim=(1, ))(dense)
    res_x = kl.Convolution2D(nb_filters,
                             nb_row=1,
                             nb_col=1,
                             border_mode='same',
                             bias=use_bias,
                             W_regularizer=l2(res_l2))(dense)

    subsampled_x = kl.Lambda(lambda x: x[:, 0::subsample, :, :])(x)
    res_out = kl.Merge(mode='sum')([subsampled_x, res_x])

    skip_out = kl.Lambda(lambda x: x[:, :1, :, :])(res_x)
    return res_out, skip_out
Ejemplo n.º 13
0
 def call(self, inputs, **kwargs):
     assert isinstance(inputs, list) and len(inputs) == 2
     symbols, encodings = inputs[0], inputs[1]
     # contexts.shape = (M, T, left)
     contexts = make_history(symbols, self.left, symbols[:, :1])
     # M.shape = C.shape = (M, T, left, output_dim)
     M = kb.gather(self.M, contexts)  # input embeddings
     C = kb.gather(self.C, contexts)  # output embeddings
     if self.use_bias:
         M += self.T
     # p.shape = (M, T, input_dim)
     p = distributed_dot_softmax(M, encodings)
     # p._keras_shape = M._keras_shape[:2] + (self.)
     compressed_context = distributed_transposed_dot(C, p)
     if self.merge in ["concatenate", "sum"]:
         output_func = (kl.Concatenate() if self.merge == "concatenate" else
                        kl.Merge(mode='sum'))
         output = output_func([compressed_context, encodings])
     elif self.merge == "attention":
         output = compressed_context
     elif self.merge == "sigmoid":
         output = distributed_cell([compressed_context, encodings])
     return [output, p]
Ejemplo n.º 14
0
    def arch(raw_input):
        # Reverse time dimension make it easier not to lose the lastest obs
        input_ = kl.Lambda(lambda x: x[:, ::-1, :])(raw_input)
        # Add a dimension for filters features -> shape (bs, time, inputs, features)
        input_ = kl.Lambda(lambda x: K.expand_dims(x))(input_)

        scale_outputs = []
        for hist_length, time_unit, initial_subsample in zip(
                hist_lengths, time_units, initial_subsamples):
            scale_input = kl.Lambda(lambda x: x[:, :hist_length, :, :])(input_)

            nb_blocks = int(np.log2(hist_length // initial_subsample))

            scale_out = wavenet_light(
                hist_length,
                nb_inputs,
                output_horizon,
                nb_filters=nb_filters,
                nb_blocks=nb_blocks,
                initial_pooling=time_unit,
                initial_subsample=initial_subsample,
                use_skip_connections=use_skip_connections,
                use_bias=False,
                res_l2=res_l2,
                final_l2=final_l2,
                batchnorm=batchnorm,
                bn_momentum=bn_momentum,
                dropout_rate=dropout_rate,
                input_noise=input_noise,
                has_top=False)(scale_input)
            scale_outputs.append(scale_out)
        if len(scale_outputs) > 1:
            out = kl.Merge(mode=merge_scales)(scale_outputs)
        else:
            out = scale_outputs[0]
        if intermediate_conv:
            if batchnorm:
                out = kl.BatchNormalization(momentum=bn_momentum)(out)
            out = kl.Convolution2D(
                nb_filters,
                nb_row=1,
                nb_col=1,
                border_mode='same',
                bias=False,
            )(out)
            out = kl.Dropout(dropout_rate)(out)

        if n_experts > 1:
            out = mixture_experts(out,
                                  output_horizon,
                                  final_l2=final_l2,
                                  n_experts=n_experts)
        else:
            out = kl.Convolution2D(output_horizon,
                                   nb_row=1,
                                   nb_col=1,
                                   border_mode='same',
                                   W_regularizer=l2(final_l2))(out)
        # Remove time dimension
        out = kl.Lambda(lambda x: K.squeeze(x, 1))(out)
        # Switch horizons into time dimension
        out = kl.Permute(dims=(2, 1))(out)
        return out
Ejemplo n.º 15
0
def MultiLevelDCNet(input_shape, n_class, routings):
    """
    A Multi-level DCNet on CIFAR-10.

    :param input_shape: data shape, 3d, [width, height, channels]
    :param n_class: number of classes
    :param routings: number of routing iterations
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
    """
    
    x = layers.Input(shape=input_shape)
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    ########################### Level 1 Capsules ###########################
    # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate.
    conv, nb_filter = densenet.DenseBlock(x, growth_rate=32, nb_layers=8, nb_filter=32)
    # Batch Normalization
    DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv)

    # Creating Primary Capsules (Level 1)
    # Here PrimaryCapsConv2D is the Conv2D output which is used as the primary capsules by reshaping and squashing (squash activation).
    # primarycaps_1 (size: [None, num_capsule, dim_capsule]) is the "reshaped and sqashed output" which will be further passed to the dynamic routing protocol.
    primarycaps_1, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=12, kernel_size=5, strides=2, padding='valid')

    # Applying ReLU Activation to primary capsules 
    conv = layers.Activation('relu')(PrimaryCapsConv2D)

    ########################### Level 2 Capsules ###########################
    # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate.
    conv, nb_filter = densenet.DenseBlock(conv, growth_rate=32, nb_layers=8, nb_filter=32)
    # Batch Normalization
    DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv)

    # Creating Primary Capsules (Level 2)
    primarycaps_2, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=12, kernel_size=5, strides=2, padding='valid')

    # Applying ReLU Activation to primary capsules 
    conv = layers.Activation('relu')(PrimaryCapsConv2D)

    ########################### Level 3 Capsules ###########################
    # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate.
    conv, nb_filter = densenet.DenseBlock(conv, growth_rate=32, nb_layers=8, nb_filter=32)
    # Batch Normalization
    DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv)

    # Creating Primary Capsules (Level 3)
    primarycaps_3, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=12, kernel_size=3, strides=2, padding='valid')

    # Merging Primary Capsules for the Merged DigitCaps (CapsuleLayer formed by combining all levels of primary capsules)
    mergedLayer = layers.merge([primarycaps_1,primarycaps_2,primarycaps_3], mode='concat', concat_axis=1)


    ########################### Separate DigitCaps Outputs (used for training) ###########################
    # Merged DigitCaps
    digitcaps_0 = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings,
                             name='digitcaps0')(mergedLayer)
    out_caps_0 = Length(name='capsnet_0')(digitcaps_0)

    # First Level DigitCaps
    digitcaps_1 = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings,
                             name='digitcaps1')(primarycaps_1)
    out_caps_1 = Length(name='capsnet_1')(digitcaps_1)

    # Second Level DigitCaps
    digitcaps_2 = CapsuleLayer(num_capsule=n_class, dim_capsule=12, routings=routings,
                             name='digitcaps2')(primarycaps_2)
    out_caps_2 = Length(name='capsnet_2')(digitcaps_2)

    # Third Level DigitCaps
    digitcaps_3 = CapsuleLayer(num_capsule=n_class, dim_capsule=10, routings=routings,
                             name='digitcaps3')(primarycaps_3)
    out_caps_3 = Length(name='capsnet_3')(digitcaps_3)

    ########################### Combined DigitCaps Output (used for evaluation) ###########################
    digitcaps = layers.merge([digitcaps_1,digitcaps_2,digitcaps_3, digitcaps_0], mode='concat', concat_axis=2,
                             name='digitcaps')
    out_caps = Length(name='capsnet')(digitcaps)

    # Reconstruction (decoder) network
    y = layers.Input(shape=(n_class,))
    masked_by_y = Mask()([digitcaps, y])  # The true label is used to mask the output of capsule layer. For training
    masked = Mask()(digitcaps)  # Mask using the capsule with maximal length. For prediction

    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(layers.Dense(600, activation='relu', input_dim=int(digitcaps.shape[2]*n_class), name='zero_layer'))
    decoder.add(layers.Dense(600, activation='relu', name='one_layer'))
    decoderFinal = models.Sequential(name='decoderFinal')
    # Concatenating two layers
    decoderFinal.add(layers.Merge([decoder.get_layer('zero_layer'), decoder.get_layer('one_layer')], mode='concat'))
    decoderFinal.add(layers.Dense(1200, activation='relu'))
    decoderFinal.add(layers.Dense(np.prod([32,32,1]), activation='sigmoid'))
    decoderFinal.add(layers.Reshape(target_shape=[32,32,1], name='out_recon'))

    # Model for training
    train_model = models.Model([x, y], [out_caps_0, out_caps_1, out_caps_2, out_caps_3, decoderFinal(masked_by_y)])

    # Model for evaluation (prediction)
    # Note that out_caps is the final prediction. Other predictions could be used for analysing separate-level predictions. 
    eval_model = models.Model(x, [out_caps, out_caps_0, out_caps_1, out_caps_2, out_caps_3, decoderFinal(masked)])

    return train_model, eval_model
Ejemplo n.º 16
0
                                weights=[embedding_matrix],
                                input_length=MAX_LEN,
                                trainable=False)
    inp = Input(shape=(MAX_LEN, ))
    embedding_sequence = embedding_layer(inp)
    convs = []
    filter_sizes = [3, 4, 5]

    for filter_size in filter_sizes:
        l_conv = layers.Conv1D(filters=128,
                               kernel_size=filter_size,
                               activation='relu')(embedding_sequence)
        l_pool = layers.MaxPool1D(pool_size=3)(l_conv)
        convs.append(l_pool)

    l_merged = layers.Merge(mode='concat', concat_axis=1)(convs)
    conv = layers.Conv1D(filters=128, kernel_size=3,
                         activation='relu')(embedding_sequence)
    pool = layers.MaxPooling1D(pool_size=3)(conv)
    if extra_conv:
        x = Dropout(0.2)(l_merged)
    else:
        x = Dropout(0.2)(pool)
    x = layers.Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(1)(x)

    cnn_model1 = Model(inp, x)
    cnn_model1.compile(optimizer='rmsprop', loss='mse', metrics=[escore])
Ejemplo n.º 17
0
def main():
    args = setup()
    momentnow = time.strftime("%Y%m%d_%H%M%S")
    os.mkdir(momentnow)
    dfTrain = loadData(args.input, args.rows_to_skip)
    X1 = np.zeros((len(dfTrain), MAXLEN), dtype=np.uint8)
    X2 = np.zeros((len(dfTrain), ), dtype=np.float32)
    X3 = np.zeros((len(dfTrain), ), dtype=np.float32)
    Y = np.zeros((len(dfTrain), dfTrain["Classification"].unique().size),
                 dtype=np.int8)
    categories = {}
    for i, row in dfTrain.iterrows():
        desc = row["Concepto"]
        X1[i, MAXLEN - len(desc):] = [ord(c) for c in desc]
        X2[i] = row["Importe"]
        X3[i] = row["FechaRel"]
        Y[i, categories.setdefault(row["Classification"], len(categories))] = 1
    X2 = (X2 - np.mean(X2)) / np.std(X2)
    inv_categories = {v: k for k, v in categories.items()}

    # preparing my prediction set
    dfPred = loadData(args.validationfile, args.rows_to_skip)
    X1pred = np.zeros((len(dfPred), MAXLEN), dtype=np.uint8)
    X2pred = np.zeros((len(dfPred), ), dtype=np.float32)
    X3pred = np.zeros((len(dfPred), ), dtype=np.float32)
    for i, row in dfPred.iterrows():
        desc = row["Concepto"]
        X1pred[i, MAXLEN - len(desc):] = [ord(c) for c in desc]
        X2pred[i] = row["Importe"]
        X3pred[i] = row["FechaRel"]
    X2pred = (X2pred - np.mean(X2pred)) / np.std(X2pred)

    # creating my RNN model
    model_desc = models.Sequential()
    embedding = np.zeros((256, 256), dtype=np.float32)
    np.fill_diagonal(embedding, 1)
    model_desc.add(
        layers.embeddings.Embedding(256,
                                    256,
                                    input_length=MAXLEN,
                                    weights=[embedding],
                                    trainable=False))
    model_desc.add(layers.LSTM(128))

    model_amount = models.Sequential()
    model_amount.add(layers.Dense(10, input_shape=(1, ), activation="relu"))

    model_date = models.Sequential()
    model_date.add(layers.Dense(10, input_shape=(1, ), activation="relu"))

    merged = layers.Merge((model_desc, model_amount, model_date),
                          mode="concat")
    final_model = models.Sequential()
    final_model.add(merged)
    final_model.add(layers.Dense(64, activation="relu"))
    final_model.add(layers.Dropout(args.dropout))
    final_model.add(layers.Dense(Y.shape[-1], activation="softmax"))
    final_model.compile(loss="categorical_crossentropy",
                        optimizer="rmsprop",
                        metrics=["accuracy"])
    csv_logger = keras.callbacks.CSVLogger(momentnow + "/metrics_" +
                                           momentnow + ".csv")
    modelfit = final_model.fit([X1, X2, X3],
                               Y,
                               batch_size=50,
                               epochs=args.epochs,
                               validation_split=args.validation,
                               shuffle=True,
                               callbacks=[csv_logger])
    print(final_model.summary())
    print(modelfit.history.keys())
    final_model.save(momentnow + "/model_" + momentnow + ".h5")
    final_model.to_json()

    # plot ACCURACY for training and validation sets
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(modelfit.history['acc'])
    plt.plot(modelfit.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')

    # plot LOSS for training and validation sets
    plt.subplot(1, 2, 2)
    plt.plot(modelfit.history['loss'])
    plt.plot(modelfit.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(momentnow + "/plotloss_" + momentnow)
    plt.show()

    YPred = final_model.predict_classes([X1pred, X2pred, X3pred], verbose=2)
    YTrue = [categories[x] for x in dfPred["Classification"]]
    print(YPred)
    for i in range(0, len(YPred)):
        print(inv_categories[YPred[i]])
    hit = 0
    for i in range(0, len(YPred)):
        if YPred[i] == YTrue[i]:
            hit += 1
    acc_rate = hit / len(YPred)
    print("accuracy on prediction set: {:.6}%".format(acc_rate * 100))
    labels = [inv_categories[x] for x in inv_categories]
    confusionmatrix = confusion_matrix(YTrue, YPred)
    cm_norm = confusionmatrix.astype("float") / confusionmatrix.sum(
        axis=1)[:, np.newaxis]
    cm_norm = np.round(cm_norm, 2)
    sns.set(font_scale=0.9)  # for label size
    plt.figure()
    plotconf = sns.heatmap(cm_norm,
                           annot=True,
                           annot_kws={"size": 6},
                           cbar=False)
    plotconf.figure.savefig(momentnow + "/confusionmatrix_" + momentnow)

    destinationfile = momentnow + "/code_" + momentnow + '.py'
    copyfile(__file__, destinationfile)
Ejemplo n.º 18
0
def MultiLevelDCNet(input_shape, n_class, routings):
    """
    A DCNet (1-level DCNet) on MNIST.

    :param input_shape: data shape, 3d, [width, height, channels]
    :param n_class: number of classes
    :param routings: number of routing iterations
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
    """

    x = layers.Input(shape=input_shape)
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    ########################### Primary Capsules ###########################
    # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate.
    conv, nb_filter = densenet.DenseBlock(x,
                                          growth_rate=32,
                                          nb_layers=8,
                                          nb_filter=32)
    # Batch Normalization
    DenseBlockOutput = BatchNormalization(axis=concat_axis,
                                          epsilon=1.1e-5)(conv)

    # Creating Primary Capsules
    # Here PrimaryCapsConv2D is the Conv2D output which is used as the primary capsules by reshaping and squashing (squash activation).
    # primarycaps_1 (size: [None, num_capsule, dim_capsule]) is the "reshaped and sqashed output" which will be further passed to the dynamic routing protocol.
    primarycaps, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput,
                                                dim_capsule=8,
                                                n_channels=32,
                                                kernel_size=9,
                                                strides=2,
                                                padding='valid')

    ########################### DigitCaps Output ###########################
    digitcaps = CapsuleLayer(num_capsule=n_class,
                             dim_capsule=16,
                             routings=routings,
                             name='digitcaps0')(primarycaps)
    out_caps = Length(name='capsnet')(digitcaps)

    # Reconstruction (decoder) network
    y = layers.Input(shape=(n_class, ))
    masked_by_y = Mask()(
        [digitcaps, y]
    )  # The true label is used to mask the output of capsule layer. For training
    masked = Mask(
    )(digitcaps)  # Mask using the capsule with maximal length. For prediction

    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(
        layers.Dense(512,
                     activation='relu',
                     input_dim=int(digitcaps.shape[2] * n_class),
                     name='zero_layer'))
    decoder.add(layers.Dense(512, activation='relu', name='one_layer'))
    decoderFinal = models.Sequential(name='decoderFinal')
    # Concatenating two layers
    decoderFinal.add(
        layers.Merge(
            [decoder.get_layer('zero_layer'),
             decoder.get_layer('one_layer')],
            mode='concat'))
    decoderFinal.add(layers.Dense(1024, activation='relu'))
    decoderFinal.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
    decoderFinal.add(layers.Reshape(input_shape, name='out_recon'))

    # Model for training
    train_model = models.Model([x, y], [out_caps, decoderFinal(masked_by_y)])

    # Model for evaluation (prediction)
    eval_model = models.Model(x, [out_caps, decoderFinal(masked)])

    return train_model, eval_model
Ejemplo n.º 19
0
    def arch(raw_input):
        head = raw_input

        if input_noise > 0.:
            head = kl.GaussianNoise(input_noise)(head)

        if initial_pooling > 1:
            head = kl.Lambda(lambda x: K.asymmetric_spatial_2d_padding(
                x,
                top_pad=output_horizon,
                bottom_pad=0,
                left_pad=0,
                right_pad=0))(head)
            if batchnorm:
                head = kl.BatchNormalization(momentum=bn_momentum)(head)
            head = kl.Convolution2D(nb_filters,
                                    nb_row=initial_pooling + output_horizon,
                                    nb_col=1,
                                    bias=use_bias,
                                    subsample=(initial_subsample, 1),
                                    border_mode='valid')(head)
        else:
            if batchnorm:
                head = kl.BatchNormalization(momentum=bn_momentum)(head)
            head = kl.Lambda(lambda x: K.asymmetric_spatial_2d_padding(
                x, top_pad=1, bottom_pad=0, left_pad=0, right_pad=0))(head)
            head = kl.Convolution2D(nb_filters,
                                    nb_row=2,
                                    nb_col=1,
                                    bias=use_bias,
                                    subsample=(1, 1),
                                    border_mode='valid')(head)

        perceptive_field = 2**nb_blocks * initial_subsample
        if perceptive_field < hist_length:
            print('History length of {} but conv block with perceptive field \
            of {}. This is suboptimal'.format(hist_length, perceptive_field))

        skip_connections = []
        for i in range(nb_blocks):
            head, skip_out = wavenet_block_light(
                head,
                nb_filters,
                subsample=2,
                use_bias=use_bias,
                res_l2=res_l2,
                batchnorm=batchnorm,
                bn_momentum=bn_momentum,
                dropout_rate=dropout_rate,
            )
            skip_connections.append(skip_out)

        if use_skip_connections:
            head = kl.Merge(mode='sum')(skip_connections)
        else:
            head = kl.Lambda(lambda x: x[:, :1, :, :])(head)
        head = kl.Activation('relu')(head)

        if batchnorm:
            head = kl.BatchNormalization(momentum=bn_momentum)(head)
        head = kl.Convolution2D(
            nb_filters,
            nb_row=1,
            nb_col=1,
            border_mode='same',
            bias=use_bias,
        )(head)
        head = kl.Dropout(dropout_rate)(head)

        if has_top:
            head = kl.Convolution2D(output_horizon,
                                    nb_row=1,
                                    nb_col=1,
                                    border_mode='same',
                                    bias=use_bias,
                                    W_regularizer=l2(final_l2))(head)
        return head
Ejemplo n.º 20
0
def create_model(desired_sample_rate, dilation_depth, nb_stacks):

    # desired_sample_rate = 4410
    nb_output_bins = 4
    # nb_filters = 256
    nb_filters = 64
    # dilation_depth = 9  #
    # nb_stacks = 1
    use_bias = False
    res_l2 = 0
    final_l2 = 0
    fragment_length = 488 + compute_receptive_field_(
        desired_sample_rate, dilation_depth, nb_stacks)[0]
    fragment_stride = 488
    use_skip_connections = True
    learn_all_outputs = True

    def residual_block(x):
        original_x = x
        # TODO: initalization, regularization?
        # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet.
        tanh_out = CausalAtrousConvolution1D(nb_filters,
                                             2,
                                             atrous_rate=2**i,
                                             border_mode='valid',
                                             causal=True,
                                             bias=use_bias,
                                             name='dilated_conv_%d_tanh_s%d' %
                                             (2**i, s),
                                             activation='tanh',
                                             W_regularizer=l2(res_l2))(x)
        x = layers.Dropout(0.2)(x)
        sigm_out = CausalAtrousConvolution1D(nb_filters,
                                             2,
                                             atrous_rate=2**i,
                                             border_mode='valid',
                                             causal=True,
                                             bias=use_bias,
                                             name='dilated_conv_%d_sigm_s%d' %
                                             (2**i, s),
                                             activation='sigmoid',
                                             W_regularizer=l2(res_l2))(x)
        x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' %
                         (i, s))([tanh_out, sigm_out])

        res_x = layers.Convolution1D(nb_filters,
                                     1,
                                     border_mode='same',
                                     bias=use_bias,
                                     W_regularizer=l2(res_l2))(x)
        skip_x = layers.Convolution1D(nb_filters,
                                      1,
                                      border_mode='same',
                                      bias=use_bias,
                                      W_regularizer=l2(res_l2))(x)
        res_x = layers.Merge(mode='sum')([original_x, res_x])
        return res_x, skip_x

    input = Input(shape=(fragment_length, nb_output_bins), name='input_part')
    out = input
    skip_connections = []
    out = CausalAtrousConvolution1D(nb_filters,
                                    2,
                                    atrous_rate=1,
                                    border_mode='valid',
                                    causal=True,
                                    name='initial_causal_conv')(out)
    for s in range(nb_stacks):
        for i in range(0, dilation_depth + 1):
            out, skip_out = residual_block(out)
            skip_connections.append(skip_out)

    if use_skip_connections:
        out = layers.Merge(mode='sum')(skip_connections)
    out = layers.PReLU()(out)
    # out = layers.Convolution1D(nb_filter=256, filter_length=1, border_mode='same',
    #                            W_regularizer=l2(final_l2))(out)
    out = layers.Convolution1D(nb_filter=nb_output_bins,
                               filter_length=3,
                               border_mode='same')(out)
    out = layers.Dropout(0.5)(out)
    out = layers.PReLU()(out)
    out = layers.Convolution1D(nb_filter=nb_output_bins,
                               filter_length=3,
                               border_mode='same')(out)

    if not learn_all_outputs:
        raise DeprecationWarning(
            'Learning on just all outputs is wasteful, now learning only inside receptive field.'
        )
        out = layers.Lambda(
            lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1], ))(
                out)  # Based on gif in deepmind blog: take last output?

    # out = layers.Activation('softmax', name="output_softmax")(out)
    out = layers.PReLU()(out)
    # out = layers.Activation('sigmoid', name="output_sigmoid")(out)
    out = layers.Flatten()(out)
    predictions = layers.Dense(919, activation='sigmoid', name='fc1')(out)
    model = Model(input, predictions)
    # x = model.output
    # x = layers.Flatten()(x)
    # # x = layers.Dense(output_dim=1024)(x)
    # # x = layers.PReLU()(x)
    # # x = layers.Dropout(0.5)(x)
    # # x = layers.Dense(output_dim=919)(x)
    # # x = layers.Activation('sigmoid')(x)
    # model = Model(input=model.input, output=predictions)

    receptive_field, receptive_field_ms = compute_receptive_field_(
        desired_sample_rate, dilation_depth, nb_stacks)

    _log.info('Receptive Field: %d (%dms)' %
              (receptive_field, int(receptive_field_ms)))
    return model