Python TimeDistributed.build Examples

Programming Language: Python

Namespace/Package Name: keras.layers

Class/Type: TimeDistributed

Method/Function: build

Examples at hotexamples.com: 6

Python TimeDistributed.build - 6 examples found. These are the top rated real world Python examples of keras.layers.TimeDistributed.build extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TimeDistributed(30)

compile(13)

add(9)

supports_masking(9)

summary(9)

fit(8)

build(6)

get_shape(6)

trainable(5)

predict(3)

fit_generator(2)

save(2)

evaluate_generator(1)

_uses_learning_phase(1)

eval(1)

load_weights(1)

predict_generator(1)

set_weights(1)

compute_output_shape(1)

close(1)

tolist(1)

get_layer(1)

Example #1

Show file

File: tied_graph_autoencoder.py Project: BioSysLab/kinase_binding

class TiedGraphAutoencoderFP(Layer):
    def __init__(self,
                 inner_layer_arg,
                 activ,
                 bias,
                 init,
                 original_atom_bond_features,
                 tied_to=None,
                 encode=False,
                 decode=False,
                 activity_reg=None,
                 **kwargs):
        # Initialise
        self.tied_to = tied_to
        self.encode = encode
        self.decode = decode
        self.original_atom_bond_features = original_atom_bond_features
        self.bias = bias
        self.reg = activity_reg

        if isinstance(inner_layer_arg, (int, np.int64)):
            self.fp_length = inner_layer_arg
            self.create_inner_layer_fn = lambda: DenseTied(
                self.fp_length,
                activation=activ,
                use_bias=bias,
                kernel_initializer=init,
                tied_to=self.tied_to,
                idx=None,
                activity_regularizer=self.reg,
                **kwargs)  ### add inputs to dense layer
        else:
            raise ValueError(
                'NeuralGraphHidden has to be initialised with fp_length.')

        super(TiedGraphAutoencoderFP, self).__init__(**kwargs)

    def build(self, inputs_shape):
        # Set the index for the DenseTied weight values
        # Import dimensions
        (max_atoms, _, num_atom_features, num_bond_features,
         _) = mol_shapes_to_dims(mol_shapes=inputs_shape)

        # Add the dense layer that contains the trainable parameters
        # Initialise dense layer with specified params (kwargs) and name
        self.trainable_weights = []
        self.non_trainable_weights = []

        inner_layer = self.create_inner_layer_fn()
        inner_layer_type = inner_layer.__class__.__name__.lower()
        inner_layer.name = self.name + '_inner_' + inner_layer_type

        # Initialise TimeDistributed layer wrapper in order to parallelise
        #   dense layer across atoms
        inner_3D_layer_name = self.name + '_inner_timedistributed'
        self.inner_3D_layer = TimeDistributed(inner_layer,
                                              name=inner_3D_layer_name)

        # Build the TimeDistributed layer (which will build the Dense layer)
        if self.encode:
            self.inner_3D_layer.build(
                (None, max_atoms, num_atom_features + num_bond_features))
        else:
            self.inner_3D_layer.build((None, max_atoms, self.fp_length))

        # Store dense_3D_layer and it's weights

        if self.tied_to is not None:
            self.non_trainable_weights.append(self.inner_3D_layer.layer.kernel)
            if self.bias:
                self.trainable_weights.append(self.inner_3D_layer.layer.bias)
        else:
            self.trainable_weights = self.inner_3D_layer.trainable_weights

    def call(self, inputs, mask=None):

        if self.encode:
            return self.encoder(inputs)
        elif self.decode:
            return self.decoder(inputs)

    def encoder(self, inputs):
        atoms, bonds, edges = inputs

        final_fp_out = self.process_through_layers(atoms, bonds, edges)
        return final_fp_out

    def decoder(self, inputs):
        fp_out, _, _ = inputs

        vxi_dot = self.inner_3D_layer(fp_out)
        return vxi_dot

    def process_through_layers(self, atoms, bonds, edges):
        # Create a matrix that stores for each atom, the degree it is, use it
        #   to create a general atom mask (unused atoms are 0 padded)
        # We have to use the edge vector for this, because in theory, a convolution
        #   could lead to a zero vector for an atom that is present in the molecule
        atom_degrees = K.sum(tf.keras.backend.cast(K.not_equal(edges, -1),
                                                   dtype='float32'),
                             axis=-1,
                             keepdims=True)
        general_atom_mask = K.cast(K.not_equal(atom_degrees, 0), K.floatx())

        # Sum the edge features for each atom
        summed_bond_features = K.sum(bonds, axis=-2)

        # Concatenate the summed atom and bond features
        atoms_bonds_features = keras.layers.Concatenate(axis=-1)(
            [atoms, summed_bond_features])

        # Compute fingerprint

        fingerprint_out_unmasked = self.inner_3D_layer(atoms_bonds_features)

        # Do explicit masking because TimeDistributed does not support masking
        fingerprint_out_masked = fingerprint_out_unmasked * general_atom_mask
        final_fp_out = fingerprint_out_masked

        # Sum across all atoms
        # final_fp_out = K.sum(fingerprint_out_masked, axis=-2, keepdims = False)

        return final_fp_out

    def compute_output_shape(self, inputs_shape):

        # Import dimensions
        (max_atoms, _, _, _,
         num_samples) = mol_shapes_to_dims(mol_shapes=inputs_shape)

        if self.encode:
            return (num_samples, max_atoms, self.fp_length)
        else:
            return (num_samples, max_atoms, self.original_atom_bond_features)

    def get_config(self):
        config = super(TiedGraphAutoencoderFP, self).get_config()

        # Store config of inner layer of the 3D wrapper
        inner_layer = self.inner_3D_layer.layer
        config['inner_layer_config'] = dict(
            config=inner_layer.get_config(),
            class_name=inner_layer.__class__.__name__)
        return config

Example #2

Show file

File: tied_graph_autoencoder.py Project: BioSysLab/kinase_binding

    def build(self, inputs_shape):
        # Import dimensions
        (max_atoms, max_degree, num_atom_features, num_bond_features,
         _) = mol_shapes_to_dims(mol_shapes=inputs_shape)

        # Add the dense layers (that contain trainable params)
        #   (for each degree we convolve with a different weight matrix)
        self.trainable_weights = []
        self.non_trainable_weights = []
        self.inner_3D_layers = []
        self.all_layers = []

        self.idx = max_degree
        self_layer = self.create_inner_layer_fn()
        self_layer_type = self_layer.__class__.__name__.lower()
        self_layer.name = self.name + '_self_' + self_layer_type + '_'

        #Time Distributed layer wrapper
        self.self_3D_layer_name = self.name + '_self_timedistributed'
        self.self_3D_layer = TimeDistributed(self_layer,
                                             name=self.self_3D_layer_name)
        if self.encode_only:
            self.self_3D_layer.build(
                (None, max_atoms, num_atom_features + num_bond_features))
        else:
            self.self_3D_layer.build((None, max_atoms, self.conv_width))

        for degree in range(max_degree):
            self.idx = degree
            # Initialise inner layer, and rename it
            inner_layer = self.create_inner_layer_fn()
            inner_layer_type = inner_layer.__class__.__name__.lower()
            inner_layer.name = self.name + '_inner_' + inner_layer_type + '_' + str(
                degree)

            # Initialise TimeDistributed layer wrapper in order to parallelise
            #   dense layer across atoms (3D)
            inner_3D_layer_name = self.name + '_inner_timedistributed_' + str(
                degree)
            inner_3D_layer = TimeDistributed(inner_layer,
                                             name=inner_3D_layer_name)

            # Build the TimeDistributed layer (which will build the Dense layer)
            if self.encode_only:
                inner_3D_layer.build(
                    (None, max_atoms, num_bond_features + num_atom_features))
            else:
                inner_3D_layer.build((None, max_atoms, self.conv_width))

            # Store inner_3D_layer and it's weights
            self.inner_3D_layers.append(inner_3D_layer)
            self.all_layers.append(inner_3D_layer)
            if self.tied_to is not None:
                self.non_trainable_weights.append(inner_3D_layer.layer.kernel)
                if self.bias:
                    self.trainable_weights.append(inner_3D_layer.layer.bias)
            else:
                self.trainable_weights += inner_3D_layer.trainable_weights

        if self.tied_to is not None:
            self.trainable_weights.append(self.self_3D_layer.layer.bias)
            self.non_trainable_weights.append(self.self_3D_layer.layer.kernel)
        else:
            self.trainable_weights += self.self_3D_layer.trainable_weights

        self.all_layers.append(self_layer)

Example #3

Show file

File: tied_graph_autoencoder.py Project: BioSysLab/kinase_binding

class TiedGraphAutoencoder(Layer):
    def __init__(self,
                 inner_layer_arg,
                 activ,
                 bias,
                 init,
                 original_atom_bond_features=None,
                 tied_to=None,
                 encode_only=False,
                 decode_only=False,
                 activity_reg=None,
                 **kwargs):
        # Initialise inner dense layers using convolution width
        # Check if inner_layer_arg is conv_width
        self.tied_to = tied_to
        self.encode_only = encode_only
        self.decode_only = decode_only
        self.bias = bias
        self.original_atom_bond_features = original_atom_bond_features
        self.activ = activ
        self.init = init
        self.reg = activity_reg

        # Case 1: check if conv_width is given
        if isinstance(inner_layer_arg, (int, np.int64)):
            self.conv_width = inner_layer_arg
            self.create_inner_layer_fn = lambda: DenseTied(
                self.conv_width,
                activation=self.activ,
                use_bias=bias,
                kernel_initializer=init,
                tied_to=self.tied_to,
                idx=self.idx,
                activity_regularizer=self.reg,
                **kwargs)
        # Case 2: Check if an initialised keras layer is given
        elif isinstance(inner_layer_arg, Layer):
            assert inner_layer_arg.built == False, 'When initialising with a keras layer, it cannot be built.'
            _, self.conv_width = inner_layer_arg.get_output_shape_for(
                (None, None))
            # layer_from_config will mutate the config dict, therefore create a get fn
            self.create_inner_layer_fn = lambda: layer_from_config(
                dict(class_name=inner_layer_arg.__class__.__name__,
                     config=inner_layer_arg.get_config()))
        else:
            raise ValueError(
                'TiedAutoencoder has to be initialised with 1). int conv_width, 2). a keras layer instance, or 3). a function returning a keras layer instance.'
            )

        super(TiedGraphAutoencoder, self).__init__(**kwargs)

    def build(self, inputs_shape):
        # Import dimensions
        (max_atoms, max_degree, num_atom_features, num_bond_features,
         _) = mol_shapes_to_dims(mol_shapes=inputs_shape)

        # Add the dense layers (that contain trainable params)
        #   (for each degree we convolve with a different weight matrix)
        self.trainable_weights = []
        self.non_trainable_weights = []
        self.inner_3D_layers = []
        self.all_layers = []

        self.idx = max_degree
        self_layer = self.create_inner_layer_fn()
        self_layer_type = self_layer.__class__.__name__.lower()
        self_layer.name = self.name + '_self_' + self_layer_type + '_'

        #Time Distributed layer wrapper
        self.self_3D_layer_name = self.name + '_self_timedistributed'
        self.self_3D_layer = TimeDistributed(self_layer,
                                             name=self.self_3D_layer_name)
        if self.encode_only:
            self.self_3D_layer.build(
                (None, max_atoms, num_atom_features + num_bond_features))
        else:
            self.self_3D_layer.build((None, max_atoms, self.conv_width))

        for degree in range(max_degree):
            self.idx = degree
            # Initialise inner layer, and rename it
            inner_layer = self.create_inner_layer_fn()
            inner_layer_type = inner_layer.__class__.__name__.lower()
            inner_layer.name = self.name + '_inner_' + inner_layer_type + '_' + str(
                degree)

            # Initialise TimeDistributed layer wrapper in order to parallelise
            #   dense layer across atoms (3D)
            inner_3D_layer_name = self.name + '_inner_timedistributed_' + str(
                degree)
            inner_3D_layer = TimeDistributed(inner_layer,
                                             name=inner_3D_layer_name)

            # Build the TimeDistributed layer (which will build the Dense layer)
            if self.encode_only:
                inner_3D_layer.build(
                    (None, max_atoms, num_bond_features + num_atom_features))
            else:
                inner_3D_layer.build((None, max_atoms, self.conv_width))

            # Store inner_3D_layer and it's weights
            self.inner_3D_layers.append(inner_3D_layer)
            self.all_layers.append(inner_3D_layer)
            if self.tied_to is not None:
                self.non_trainable_weights.append(inner_3D_layer.layer.kernel)
                if self.bias:
                    self.trainable_weights.append(inner_3D_layer.layer.bias)
            else:
                self.trainable_weights += inner_3D_layer.trainable_weights

        if self.tied_to is not None:
            self.trainable_weights.append(self.self_3D_layer.layer.bias)
            self.non_trainable_weights.append(self.self_3D_layer.layer.kernel)
        else:
            self.trainable_weights += self.self_3D_layer.trainable_weights

        self.all_layers.append(self_layer)

    def call(self, inputs, mask=None):
        atoms, bonds, edges = inputs

        if self.encode_only:
            return self.encode(inputs)
        elif self.decode_only:
            return self.decode(atoms, bonds, edges)
        else:
            return self.decode(self.encode(inputs), bonds, edges)

    def encode(self, inputs):
        atoms, bonds, edges = inputs

        # Import dimensions
        max_atoms = atoms._keras_shape[1]
        num_atom_features = atoms._keras_shape[-1]
        num_bond_features = bonds._keras_shape[-1]
        max_degree = 5

        # Looks up the neighbors, sums the edge features and creates vni
        summed_features, atom_degrees = self.mask_atoms_by_degree(
            atoms, edges, bonds)

        new_features_by_degree = self.create_layer_by_deg(
            max_degree, atom_degrees,
            (max_atoms, num_atom_features, num_bond_features), summed_features)
        zni = add(new_features_by_degree)
        summed_bonds = K.sum(bonds, axis=-2)
        vxi = K.concatenate([atoms, summed_bonds], axis=-1)
        zxi = self.self_3D_layer(vxi)

        vxi_plus_one = keras.layers.add([zni, zxi])

        return vxi_plus_one

    def decode(self, vxi_plus_one, bonds, edges):
        atoms = vxi_plus_one

        # Import dimensions
        max_atoms = atoms.shape[1]
        num_atom_features = atoms.shape[-1]
        num_bond_features = bonds._keras_shape[-1]
        max_degree = 5

        _, atom_degrees = self.mask_atoms_by_degree(atoms, edges, bonds=None)
        td_denses_by_degree = self.create_layer_by_deg(
            max_degree, atom_degrees,
            [max_atoms, num_atom_features, num_bond_features], vxi_plus_one)
        vni_dot = keras.layers.add(td_denses_by_degree)
        vxi_dot = self.self_3D_layer(vxi_plus_one)
        return [vni_dot, vxi_dot]

    def mask_atoms_by_degree(self, atoms, edges, bonds=None):

        # Create a matrix that stores for each atom, the degree it is
        atom_degrees = K.sum(tf.keras.backend.cast(K.not_equal(edges, -1),
                                                   dtype='float32'),
                             axis=-1,
                             keepdims=True)

        # For each atom, look up the features of it's neighbour
        neighbour_atom_features = neighbour_lookup(atoms,
                                                   edges,
                                                   include_self=False)

        # Sum along degree axis to get summed neighbour features
        summed_atom_features = K.sum(neighbour_atom_features, axis=-2)

        # Sum the edge features for each atom
        if bonds is not None:
            summed_bond_features = K.sum(bonds, axis=-2)

        # Concatenate the summed atom and bond features
        if bonds is not None:
            summed_features = K.concatenate(
                [summed_atom_features, summed_bond_features], axis=-1)
        else:
            summed_features = summed_atom_features

        return summed_features, atom_degrees

    def create_layer_by_deg(self, max_deg, atom_degrees, inputs,
                            summed_features):
        # For each degree we convolve with a different weight matrix
        [max_atoms, num_atom_features, num_bond_features] = inputs
        new_features_by_degree = []
        for degree in range(max_deg):

            # Create mask for this degree
            atom_masks_this_degree = K.cast(K.equal(atom_degrees, degree),
                                            K.floatx())

            # Multiply with hidden merge layer
            #   (use time Distributed because we are dealing with 2D input/3D for batches)
            # Add keras shape to let keras now the dimensions
            if self.encode_only:
                summed_features._keras_shape = (None, max_atoms,
                                                num_atom_features +
                                                num_bond_features)
            else:
                summed_features._keras_shape = (None, max_atoms,
                                                self.conv_width)

            new_unmasked_features = self.inner_3D_layers[degree](
                summed_features)
            # Do explicit masking because TimeDistributed does not support masking
            new_masked_features = new_unmasked_features * atom_masks_this_degree

            new_features_by_degree.append(new_masked_features)

        return new_features_by_degree

    def compute_output_shape(self, inputs_shape):

        # Import dimensions
        inputs_shape[0] = (None, int(inputs_shape[0][1]), inputs_shape[0][2])

        (max_atoms, _, _, _,
         num_samples) = mol_shapes_to_dims(mol_shapes=inputs_shape)

        if self.encode_only:
            return (num_samples, max_atoms, self.conv_width)
        else:
            return [(num_samples, max_atoms, self.original_atom_bond_features),
                    (num_samples, max_atoms, self.original_atom_bond_features)]

Example #4

Show file

File: model.py Project: zhuchaojiebuaa/DIIN-in-Keras

    def __init__(self,
                 p=None,
                 h=None,
                 include_word_vectors=True,
                 word_embedding_weights=None,
                 train_word_embeddings=True,
                 include_chars=True,
                 chars_per_word=16,
                 char_embedding_size=8,
                 char_conv_filters=100,
                 char_conv_kernel_size=5,
                 include_syntactical_features=True,
                 syntactical_feature_size=50,
                 include_exact_match=True,
                 dropout_initial_keep_rate=1.,
                 dropout_decay_rate=0.977,
                 dropout_decay_interval=10000,
                 first_scale_down_ratio=0.3,
                 transition_scale_down_ratio=0.5,
                 growth_rate=20,
                 layers_per_dense_block=8,
                 nb_dense_blocks=3,
                 nb_labels=3,
                 inputs=None,
                 outputs=None,
                 name='DIIN'):
        """
        :ref https://openreview.net/forum?id=r1dHXnH6-&noteId=r1dHXnH6-

        :param p: sequence length of premise
        :param h: sequence length of hypothesis
        :param include_word_vectors: whether or not to include word vectors in the model
        :param word_embedding_weights: matrix of weights for word embeddings (GloVe pre-trained vectors)
        :param train_word_embeddings: whether or not to modify word embeddings while training
        :param include_chars: whether or not to include character embeddings in the model
        :param chars_per_word: how many chars are there per one word (a fixed number)
        :param char_embedding_size: input size of the character-embedding layer
        :param char_conv_filters: number of conv-filters applied on character embedding
        :param char_conv_kernel_size: size of the kernel applied on character embeddings
        :param include_syntactical_features: whether or not to include syntactical features (POS tags) in the model
        :param syntactical_feature_size: size of the syntactical feature vector for each word
        :param include_exact_match: whether or not to include exact match features in the model
        :param dropout_initial_keep_rate: initial state of dropout
        :param dropout_decay_rate: how much to change dropout at each interval
        :param dropout_decay_interval: how much time to wait for the next update
        :param first_scale_down_ratio: first scale down ratio in densenet
        :param transition_scale_down_ratio: transition scale down ratio in densenet
        :param growth_rate: growing rate in densenet
        :param layers_per_dense_block: number of layers in one dense-block
        :param nb_dense_blocks: number of dense blocks in densenet
        :param nb_labels: number of labels (3 labels by default: entailment, contradiction, neutral)
        """

        if inputs or outputs:
            super(DIIN, self).__init__(inputs=inputs,
                                       outputs=outputs,
                                       name=name)
            return

        if include_word_vectors:
            assert word_embedding_weights is not None
        inputs = []
        premise_embeddings = []
        hypothesis_embeddings = []
        '''Embedding layer'''
        # 1. Word embedding input
        if include_word_vectors:
            premise_word_input = Input(shape=(p, ),
                                       dtype='int64',
                                       name='PremiseWordInput')
            hypothesis_word_input = Input(shape=(h, ),
                                          dtype='int64',
                                          name='HypothesisWordInput')
            inputs.append(premise_word_input)
            inputs.append(hypothesis_word_input)

            word_embedding = Embedding(
                input_dim=word_embedding_weights.shape[0],
                output_dim=word_embedding_weights.shape[1],
                weights=[word_embedding_weights],
                trainable=train_word_embeddings,
                name='WordEmbedding')
            premise_word_embedding = word_embedding(premise_word_input)
            hypothesis_word_embedding = word_embedding(hypothesis_word_input)

            premise_word_embedding = DecayingDropout(
                initial_keep_rate=dropout_initial_keep_rate,
                decay_interval=dropout_decay_interval,
                decay_rate=dropout_decay_rate,
                name='PremiseWordEmbeddingDropout')(premise_word_embedding)
            hypothesis_word_embedding = DecayingDropout(
                initial_keep_rate=dropout_initial_keep_rate,
                decay_interval=dropout_decay_interval,
                decay_rate=dropout_decay_rate,
                name='HypothesisWordEmbeddingDropout')(
                    hypothesis_word_embedding)
            premise_embeddings.append(premise_word_embedding)
            hypothesis_embeddings.append(hypothesis_word_embedding)

        # 2. Character input
        if include_chars:
            premise_char_input = Input(shape=(
                p,
                chars_per_word,
            ),
                                       name='PremiseCharInput')
            hypothesis_char_input = Input(shape=(
                h,
                chars_per_word,
            ),
                                          name='HypothesisCharInput')
            inputs.append(premise_char_input)
            inputs.append(hypothesis_char_input)

            # Share weights of character-level embedding for premise and hypothesis
            character_embedding_layer = TimeDistributed(Sequential([
                Embedding(input_dim=100,
                          output_dim=char_embedding_size,
                          input_length=chars_per_word),
                Conv1D(filters=char_conv_filters,
                       kernel_size=char_conv_kernel_size),
                GlobalMaxPooling1D()
            ]),
                                                        name='CharEmbedding')
            character_embedding_layer.build(input_shape=(None, None,
                                                         chars_per_word))
            premise_char_embedding = character_embedding_layer(
                premise_char_input)
            hypothesis_char_embedding = character_embedding_layer(
                hypothesis_char_input)
            premise_embeddings.append(premise_char_embedding)
            hypothesis_embeddings.append(hypothesis_char_embedding)

        # 3. Syntactical features
        if include_syntactical_features:
            premise_syntactical_input = Input(shape=(
                p,
                syntactical_feature_size,
            ),
                                              name='PremiseSyntacticalInput')
            hypothesis_syntactical_input = Input(
                shape=(
                    h,
                    syntactical_feature_size,
                ),
                name='HypothesisSyntacticalInput')
            inputs.append(premise_syntactical_input)
            inputs.append(hypothesis_syntactical_input)
            premise_embeddings.append(premise_syntactical_input)
            hypothesis_embeddings.append(hypothesis_syntactical_input)

        # 4. One-hot exact match feature
        if include_exact_match:
            premise_exact_match_input = Input(shape=(p, ),
                                              name='PremiseExactMatchInput')
            hypothesis_exact_match_input = Input(
                shape=(h, ), name='HypothesisExactMatchInput')
            premise_exact_match = Reshape(target_shape=(
                p,
                1,
            ))(premise_exact_match_input)
            hypothesis_exact_match = Reshape(target_shape=(
                h,
                1,
            ))(hypothesis_exact_match_input)
            inputs.append(premise_exact_match_input)
            inputs.append(hypothesis_exact_match_input)
            premise_embeddings.append(premise_exact_match)
            hypothesis_embeddings.append(hypothesis_exact_match)

        # Concatenate all features
        premise_embedding = Concatenate(
            name='PremiseEmbedding')(premise_embeddings)
        hypothesis_embedding = Concatenate(
            name='HypothesisEmbedding')(hypothesis_embeddings)
        d = K.int_shape(hypothesis_embedding)[-1]
        '''Encoding layer'''
        # Now we have the embedded premise [pxd] along with embedded hypothesis [hxd]
        premise_encoding = Encoding(name='PremiseEncoding')(premise_embedding)
        hypothesis_encoding = Encoding(
            name='HypothesisEncoding')(hypothesis_embedding)
        '''Interaction layer'''
        interaction = Interaction(name='Interaction')(
            [premise_encoding, hypothesis_encoding])
        '''Feature Extraction layer'''
        feature_extractor_input = Conv2D(filters=int(d *
                                                     first_scale_down_ratio),
                                         kernel_size=1,
                                         activation=None,
                                         name='FirstScaleDown')(interaction)
        feature_extractor = DenseNet(
            include_top=False,
            input_tensor=Input(shape=K.int_shape(feature_extractor_input)[1:]),
            nb_dense_block=nb_dense_blocks,
            nb_layers_per_block=layers_per_dense_block,
            compression=transition_scale_down_ratio,
            growth_rate=growth_rate)(feature_extractor_input)
        '''Output layer'''
        features = DecayingDropout(initial_keep_rate=dropout_initial_keep_rate,
                                   decay_interval=dropout_decay_interval,
                                   decay_rate=dropout_decay_rate,
                                   name='Features')(feature_extractor)
        out = Dense(units=nb_labels, activation='softmax',
                    name='Output')(features)
        super(DIIN, self).__init__(inputs=inputs, outputs=out, name=name)

Example #5

Show file

File: Multimodalities_Model.py Project: Yuyanze/Tianchi-ECommerce-Multimodalities-Recall

def build_model(cfg, summary=False, word_embedding_matrix=None):
    def _get_model(base_dir, cfg_=None):
        config_file = os.path.join(base_dir, 'bert_config.json')
        checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt')
        if not os.path.exists(config_file):
            config_file = os.path.join(base_dir, 'bert_config_large.json')
            checkpoint_file = os.path.join(base_dir, 'roberta_l24_large_model')
        print(config_file, checkpoint_file)
        #         model = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, seq_len=cfg_['maxlen'])
        model = load_trained_model_from_checkpoint(
            config_file,
            checkpoint_file,
            training=False,
            trainable=cfg_["bert_trainable"],
            output_layer_num=cfg["cls_num"],
            seq_len=cfg_['maxlen'])
        return model

    def get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None):
        if cfg["opt"].lower() == "nadam":
            opt = Nadam(lr=lr)
        else:
            total_steps, warmup_steps = calc_train_steps(
                num_example=num_example,
                batch_size=B_SIZE,
                epochs=MAX_EPOCH,
                warmup_proportion=warmup_proportion,
            )

            opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr)

        return opt

    model1 = _get_model(cfg["base_dir"], cfg)

    #model1 = Model(inputs=model1.inputs[: 2], outputs=model1.layers[-7].output)
    model1 = Model(inputs=model1.inputs[:2], outputs=model1.layers[-7].output)

    if word_embedding_matrix is not None:
        embed_layer = Embedding(input_dim=word_embedding_matrix.shape[0],
                                output_dim=word_embedding_matrix.shape[1],
                                weights=[word_embedding_matrix],
                                trainable=cfg["trainable"],
                                name="embed_layer")

    inp_token1 = Input(shape=(None, ),
                       dtype=np.int32,
                       name="query_token_input")
    inp_segm1 = Input(shape=(None, ),
                      dtype=np.float32,
                      name="query_segm_input")

    #     inp_token2 = Input(shape=(None, ), dtype=np.int32)
    #     inp_segm2 = Input(shape=(None, ), dtype=np.float32)

    inp_image = Input(shape=(None, 2048), dtype=np.float32, name="image_input")
    inp_image_mask = Input(shape=(None, ),
                           dtype=np.float32,
                           name="image_mask_input")
    inp_pos = Input(shape=(None, 5), dtype=np.float32, name="image_pos_input")
    inp_image_char = Input(shape=(None, cfg["max_char"]),
                           dtype=np.int32,
                           name='image_char_input')

    mask = Lambda(lambda x: K.cast(K.not_equal(x, cfg["x_pad"]), 'float32'),
                  name="token_mask")(inp_token1)
    word_embed = embed_layer(inp_token1)
    word_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
        [word_embed, mask])
    word_embed = Bidirectional(LSTM(cfg["unit1_1"], return_sequences=True),
                               merge_mode="sum")(word_embed)
    word_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
        [word_embed, mask])

    sequence_output = model1([inp_token1, inp_segm1])
    sequence_output = Concatenate(axis=-1)([sequence_output, word_embed])
    text_pool = Lambda(lambda x: x[:, 0, :])(sequence_output)

    # Share weights of character-level embedding for premise and hypothesis
    character_embedding_layer = TimeDistributed(
        Sequential([
            embed_layer,
            # Embedding(input_dim=100, output_dim=char_embedding_size, input_length=chars_per_word),
            Conv1D(filters=128, kernel_size=3, name="char_embed_conv1d"),
            GlobalMaxPooling1D()
        ]),
        name='CharEmbedding')
    character_embedding_layer.build(input_shape=(None, None, cfg["max_char"]))
    image_char_embed = character_embedding_layer(inp_image_char)
    image_embed = Concatenate(axis=-1)([image_char_embed, inp_image])
    image_embed = Dense(512, activation='relu',
                        name='image_embed')(image_embed)
    image_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
        [image_embed, inp_image_mask])
    pos_embed = Dense(512, activation='relu', name='pos_embed')(inp_pos)
    pos_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
        [pos_embed, inp_image_mask])
    embed = Add()([image_embed, pos_embed])  # batch, maxlen(10), 1024+128

    image_embed = Bidirectional(LSTM(1152, return_sequences=True),
                                merge_mode="sum")(embed)
    image_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
        [image_embed, inp_image_mask])

    image_pool = Lambda(lambda x: x[:, 0, :])(image_embed)

    pool = Concatenate(axis=-1)([image_pool, text_pool])
    pool = Dense(2048, activation="relu")(pool)
    pool = Dense(512, activation="relu")(pool)
    pool = Dense(128, activation="relu")(pool)

    output = Dense(2, activation='softmax', name='output')(pool)

    opt = get_opt(num_example=cfg["num_example"],
                  lr=cfg["lr"],
                  min_lr=cfg['min_lr'])
    model = Model(inputs=[
        inp_token1, inp_segm1, inp_image, inp_image_mask, inp_pos,
        inp_image_char
    ],
                  outputs=[output])  #

    model.compile(optimizer=opt,
                  loss={'output': 'sparse_categorical_crossentropy'},
                  metrics=['accuracy'])
    if summary:
        model.summary()

    return model

Example #6

Show file

File: diin.py Project: databinQ/modelvision

    def __init__(self,
                 p=None,
                 h=None,
                 use_word_embedding=True,
                 word_embedding_weights=None,
                 train_word_embeddings=False,
                 dropout_init_keep_rate=1.0,
                 dropout_decay_interval=10000,
                 dropout_decay_rate=0.977,
                 use_chars=False,
                 chars_per_word=16,
                 char_input_dim=100,
                 char_embedding_size=8,
                 char_conv_filters=100,
                 char_conv_kernel_size=5,
                 use_syntactical_features=False,
                 syntactical_feature_size=50,
                 use_exact_match=False,
                 first_scale_down_ratio=0.3,
                 nb_dense_blocks=3,
                 layers_per_dense_block=8,
                 nb_labels=3,
                 growth_rate=20,
                 transition_scale_down_ratio=0.5,
                 inputs=None,
                 outputs=None,
                 name="DIIN"):
        """Densely Interactive Inference Network(DIIN)

        Model from paper `Natural Language Inference over Interaction Space`
        (https://openreview.net/forum?id=r1dHXnH6-&noteId=r1dHXnH6-)

        :param p: sequence length of premise
        :param h: sequence length of hypothesis
        :param use_word_embedding: whether or not to include word vectors in the model
        :param use_chars: whether or not to include character embeddings in the model
        :param use_syntactical_features: whether or not to include syntactical features (POS tags) in the model
        :param use_exact_match: whether or not to include exact match features in the model
        :param word_embedding_weights: matrix of weights for word embeddings(pre-trained vectors)
        :param train_word_embeddings: whether or not to modify word embeddings while training
        :param dropout_init_keep_rate: initial keep rate of dropout
        :param dropout_decay_interval: the number of steps to wait for the next turn update, steps means single batch,
        other than epoch
        :param dropout_decay_rate: how much to change dropout at each interval
        :param chars_per_word: how many chars are there per one word
        :param char_input_dim: character unique numbers
        :param char_embedding_size: output size of the character-embedding layer
        :param char_conv_filters: filters of the kernel applied on character embeddings
        :param char_conv_kernel_size: size of the kernel applied on character embeddings
        :param syntactical_feature_size: size of the syntactical feature vector for each word
        :param first_scale_down_ratio: scale ratio of map features as the input of first Densenet block
        :param nb_dense_blocks: number of dense blocks in densenet
        :param layers_per_dense_block: number of layers in one dense block
        :param nb_labels: number of labels
        :param growth_rate:growing rate in dense net
        :param transition_scale_down_ratio: transition scale down ratio in dense net
        :param inputs: inputs of keras models
        :param outputs: outputs of keras models
        :param name: models name
        """

        if inputs or outputs:
            super(DIINModel, self).__init__(inputs=inputs,
                                            outputs=outputs,
                                            name=name)
            return

        if use_word_embedding:
            assert word_embedding_weights is not None, "Word embedding weights are needed"

        inputs = []
        premise_features = []
        hypothesis_features = []
        """Embedding layer"""
        # Input: word embedding
        if use_word_embedding:
            premise_word_input = Input(shape=(p, ),
                                       dtype="int64",
                                       name="premise_word_input")
            hypothesis_word_input = Input(shape=(h, ),
                                          dtype="int64",
                                          name="hypothesis_word_input")
            inputs.append(premise_word_input)
            inputs.append(hypothesis_word_input)

            word_embedding = Embedding(
                input_dim=word_embedding_weights.shape[0],
                output_dim=word_embedding_weights.shape[1],
                weights=[word_embedding_weights],
                trainable=train_word_embeddings,
                name="word_embedding")
            premise_word_embedding = word_embedding(premise_word_input)
            hypothesis_word_embedding = word_embedding(hypothesis_word_input)

            premise_word_embedding = DecayingDropout(
                init_keep_rate=dropout_init_keep_rate,
                decay_interval=dropout_decay_interval,
                decay_rate=dropout_decay_rate,
                name="premise_word_dropout")(premise_word_embedding)
            hypothesis_word_embedding = DecayingDropout(
                init_keep_rate=dropout_init_keep_rate,
                decay_interval=dropout_decay_interval,
                decay_rate=dropout_decay_rate,
                name="hypothesis_word_dropout")(hypothesis_word_embedding)

            premise_features.append(premise_word_embedding)
            hypothesis_features.append(hypothesis_word_embedding)

        # Input: character embedding
        if use_chars:
            premise_char_input = Input(shape=(p, chars_per_word),
                                       dtype="int64",
                                       name="premise_char_input")
            hypothesis_char_input = Input(shape=(h, chars_per_word),
                                          dtype="int64",
                                          name="hypothesis_char_input")
            inputs.append(premise_char_input)
            inputs.append(hypothesis_char_input)

            # Share weights of character-level embedding for premise and hypothesis
            character_embedding = TimeDistributed(Sequential([
                Embedding(input_dim=char_input_dim,
                          output_dim=char_embedding_size,
                          input_length=chars_per_word),
                Conv1D(filters=char_conv_filters,
                       kernel_size=char_conv_kernel_size),
                GlobalMaxPooling1D(),
            ]),
                                                  name="char_embedding")
            character_embedding.build(
                input_shape=(None, None, chars_per_word))  # Set input shape

            premise_char_embedding = character_embedding(premise_char_input)
            hypothesis_char_embedding = character_embedding(
                hypothesis_char_input)
            premise_features.append(premise_char_embedding)
            hypothesis_features.append(hypothesis_char_embedding)

        # Input: syntactical features
        if use_syntactical_features:
            premise_syntactical_input = Input(shape=(p,
                                                     syntactical_feature_size),
                                              name="premise_syntactical_input")
            hypothesis_syntactical_input = Input(
                shape=(h, syntactical_feature_size),
                name="hypothesis_syntactical_input")
            inputs.append(premise_syntactical_input)
            inputs.append(hypothesis_syntactical_input)
            premise_features.append(premise_syntactical_input)
            hypothesis_features.append(hypothesis_syntactical_input)

        # Input: one-hot exact match feature
        if use_exact_match:
            premise_exact_match_input = Input(shape=(p, ),
                                              name='premise_exact_match_input')
            hypothesis_exact_match_input = Input(
                shape=(h, ), name='hypothesis_exact_match_input')
            inputs.append(premise_exact_match_input)
            inputs.append(hypothesis_exact_match_input)

            premise_exact_match = Reshape(
                target_shape=(p, 1))(premise_exact_match_input)
            hypothesis_exact_match = Reshape(
                target_shape=(h, 1))(hypothesis_exact_match_input)
            premise_features.append(premise_exact_match)
            hypothesis_features.append(hypothesis_exact_match)

        # Concatenate all features
        if len(premise_features) > 1:
            premise_embedding = Concatenate()(premise_features)
            hypothesis_embedding = Concatenate()(hypothesis_features)
        else:
            premise_embedding = premise_features[0]
            hypothesis_embedding = hypothesis_features[0]
        d = K.int_shape(premise_embedding)[-1]
        """Encoding layer"""
        premise_encoding = Encoding(name="premise_encoding")(premise_embedding)
        hypothesis_encoding = Encoding(
            name="hypothesis_encoding")(hypothesis_embedding)
        """Interaction layer"""
        interaction = Interaction(name="interaction")(
            [premise_encoding, hypothesis_encoding])
        """Feature extraction layer"""
        feature_extractor_input = Conv2D(
            filters=int(d * first_scale_down_ratio),
            kernel_size=1,
            activation=None,
            name="bottleneck")(interaction)  # Bottleneck layer
        feature_extractor = DenseNet(
            input_tensor=Input(shape=K.int_shape(feature_extractor_input)[1:]),
            include_top=False,
            nb_dense_block=nb_dense_blocks,
            nb_layers_per_block=layers_per_dense_block,
            growth_rate=growth_rate,
            compression=transition_scale_down_ratio)(feature_extractor_input)
        """Output layer"""
        features = DecayingDropout(init_keep_rate=dropout_init_keep_rate,
                                   decay_interval=dropout_decay_interval,
                                   decay_rate=dropout_decay_rate,
                                   name="features")(feature_extractor)
        if nb_labels == 2:
            out = Dense(1, activation="sigmoid", name="output")(features)
        else:
            out = Dense(nb_labels, activation="softmax",
                        name="output")(features)
        super(DIINModel, self).__init__(inputs=inputs, outputs=out, name=name)