def _build_network(self,
                       vocab_size,
                       char_vocab_size,
                       emb_weights=None,
                       hidden_units=256,
                       trainable=False,
                       batch_size=1):
        print('Build model...')

        text_input = Input(shape=(self._line_maxlen, ))

        if (len(emb_weights) == 0):
            emb = Embedding(vocab_size,
                            128,
                            input_length=self._line_maxlen,
                            embeddings_initializer='glorot_normal',
                            trainable=trainable)(text_input)
        else:
            emb = Embedding(vocab_size,
                            emb_weights.shape[1],
                            input_length=self._line_maxlen,
                            weights=[emb_weights],
                            trainable=trainable)(text_input)

        char_lstm1 = LSTM(int(hidden_units / 4),
                          kernel_initializer='he_normal',
                          recurrent_initializer='orthogonal',
                          bias_initializer='he_normal',
                          activation='sigmoid',
                          recurrent_activation='sigmoid',
                          dropout=0.25,
                          recurrent_dropout=0.5,
                          unit_forget_bias=False,
                          return_sequences=True)(emb)

        char_lstm2 = LSTM(int(hidden_units / 4),
                          kernel_initializer='he_normal',
                          recurrent_initializer='orthogonal',
                          bias_initializer='he_normal',
                          activation='sigmoid',
                          recurrent_activation='sigmoid',
                          dropout=0.25,
                          recurrent_dropout=0.5,
                          unit_forget_bias=False,
                          return_sequences=True,
                          go_backwards=True)(emb)

        char_merged = add([char_lstm1, char_lstm2])

        gmp = Attention()(char_merged)

        text_emb = Reshape((int(emb.shape[1]), int(emb.shape[2]), 1))(emb)

        text_cnn1 = Convolution2D(
            int(hidden_units / 8), (2, 1),
            kernel_initializer='he_normal',
            bias_initializer='he_normal',
            activation='relu',
            padding='valid',
            use_bias=True,
            input_shape=(1, self._line_char_maxlen))(text_emb)
        text_cnn1 = MaxPooling2D((2, 1))(text_cnn1)
        text_cnn1 = Dropout(0.5)(text_cnn1)

        text_cnn2 = Convolution2D(int(hidden_units / 4), (2, 1),
                                  kernel_initializer='he_normal',
                                  bias_initializer='he_normal',
                                  activation='relu',
                                  padding='valid',
                                  use_bias=True)(text_cnn1)
        text_cnn2 = MaxPooling2D((2, 1))(text_cnn2)
        text_cnn2 = Dropout(0.5)(text_cnn2)

        text_cnn3 = Convolution2D(int(hidden_units / 4), (2, 1),
                                  kernel_initializer='he_normal',
                                  bias_initializer='he_normal',
                                  activation='relu',
                                  padding='valid',
                                  use_bias=True)(text_cnn2)
        text_cnn3 = MaxPooling2D((2, 1))(text_cnn3)
        text_cnn3 = Dropout(0.5)(text_cnn3)

        char_gmp = GlobalMaxPooling2D()(text_cnn3)

        merged = concatenate([char_gmp, gmp])

        output = Dense(2, activation='softmax')(merged)

        model = Model(inputs=[text_input], outputs=output)

        adam = Adam(lr=0.001)

        model.compile(loss='binary_crossentropy',
                      optimizer=adam,
                      metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())
        return model
    def _build_network(self,
                       vocab_size,
                       char_vocab_size,
                       emb_weights=None,
                       hidden_units=256,
                       trainable=False,
                       batch_size=1):
        print('Build model...')

        text_input = Input(name='text', shape=(self._line_maxlen, ))
        text_input_mask = Masking(mask_value=0)(text_input)

        if (len(emb_weights) == 0):
            emb = Embedding(vocab_size,
                            128,
                            input_length=self._line_maxlen,
                            embeddings_initializer='glorot_normal',
                            trainable=trainable)(text_input_mask)
        else:
            emb = Embedding(vocab_size,
                            emb_weights.shape[1],
                            input_length=self._line_maxlen,
                            weights=[emb_weights],
                            trainable=trainable)(text_input_mask)

        emb = Reshape((int(emb.shape[1]), int(emb.shape[2]), 1))(emb)

        t_cnn1 = Convolution2D(int(hidden_units / 8), (2, 1),
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='relu',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, self._line_maxlen))(emb)
        t_cnn1 = MaxPooling2D((2, 1))(t_cnn1)
        t_cnn1 = Dropout(0.5)(t_cnn1)

        t_cnn2 = Convolution2D(int(hidden_units / 4), (2, 1),
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='relu',
                               padding='valid',
                               use_bias=True)(t_cnn1)
        t_cnn2 = MaxPooling2D((2, 1))(t_cnn2)
        t_cnn2 = Dropout(0.5)(t_cnn2)

        t_cnn3 = Convolution2D(int(hidden_units / 4), (2, 1),
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='relu',
                               padding='valid',
                               use_bias=True)(t_cnn2)
        t_cnn3 = MaxPooling2D((2, 1))(t_cnn3)
        t_cnn3 = Dropout(0.5)(t_cnn3)

        gmp = GlobalMaxPooling2D()(t_cnn3)

        char_input = Input(name='char_text', shape=(self._line_char_maxlen, ))

        char_emb = Embedding(char_vocab_size,
                             25,
                             input_length=self._line_char_maxlen,
                             embeddings_initializer='glorot_normal',
                             trainable=True)(char_input)

        char_emb = Reshape(
            (int(char_emb.shape[1]), int(char_emb.shape[2]), 1))(char_emb)

        char_cnn1 = Convolution2D(
            int(hidden_units / 8), (2, 1),
            kernel_initializer='he_normal',
            bias_initializer='he_normal',
            activation='relu',
            padding='valid',
            use_bias=True,
            input_shape=(1, self._line_char_maxlen))(char_emb)
        char_cnn1 = MaxPooling2D((2, 1))(char_cnn1)
        char_cnn1 = Dropout(0.5)(char_cnn1)

        char_cnn2 = Convolution2D(int(hidden_units / 4), (2, 1),
                                  kernel_initializer='he_normal',
                                  bias_initializer='he_normal',
                                  activation='relu',
                                  padding='valid',
                                  use_bias=True)(char_cnn1)
        char_cnn2 = MaxPooling2D((2, 1))(char_cnn2)
        char_cnn2 = Dropout(0.5)(char_cnn2)

        char_cnn3 = Convolution2D(int(hidden_units / 4), (2, 1),
                                  kernel_initializer='he_normal',
                                  bias_initializer='he_normal',
                                  activation='relu',
                                  padding='valid',
                                  use_bias=True)(char_cnn2)
        char_cnn3 = MaxPooling2D((2, 1))(char_cnn3)
        char_cnn3 = Dropout(0.5)(char_cnn3)

        char_gmp = GlobalMaxPooling2D()(char_cnn3)

        merged = concatenate([char_gmp, gmp])

        output = Dense(2, activation='softmax')(merged)

        model = Model(inputs=[text_input, char_input], outputs=output)

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())
        return model
Ejemplo n.º 3
0
class SiameseTripletGraphEmbedding(SiameseGraphEmbedding):
    def __init__(self,
                 d=128,
                 margin=0.2,
                 batch_size=2048,
                 lr=0.001,
                 epochs=10,
                 directed_proba=0.5,
                 weighted=True,
                 compression_func="sqrt",
                 negative_sampling_ratio=2.0,
                 max_length=1400,
                 truncating="post",
                 seed=0,
                 verbose=False,
                 conv1_kernel_size=12,
                 conv1_batch_norm=False,
                 max1_pool_size=6,
                 conv2_kernel_size=6,
                 conv2_batch_norm=True,
                 max2_pool_size=3,
                 lstm_unit_size=320,
                 dense1_unit_size=1024,
                 dense2_unit_size=512,
                 directed_distance="euclidean",
                 undirected_distance="euclidean",
                 source_target_dense_layers=True,
                 embedding_normalization=False,
                 **kwargs):
        super().__init__(d, margin, batch_size, lr, epochs, directed_proba,
                         weighted, compression_func, negative_sampling_ratio,
                         max_length, truncating, seed, verbose,
                         conv1_kernel_size, conv1_batch_norm, max1_pool_size,
                         conv2_kernel_size, conv2_batch_norm, max2_pool_size,
                         lstm_unit_size, dense1_unit_size, dense2_unit_size,
                         directed_distance, undirected_distance,
                         source_target_dense_layers, embedding_normalization,
                         **kwargs)

    def identity_loss(self, y_true, y_pred):
        return K.mean(y_pred - 0 * y_true)

    def triplet_loss(self, inputs):
        encoded_i, encoded_j, encoded_k, is_directed = inputs

        positive_distance = Lambda(self.st_euclidean_distance,
                                   name="lambda_positive_distances")(
                                       [encoded_i, encoded_j, is_directed])
        negative_distance = Lambda(self.st_euclidean_distance,
                                   name="lambda_negative_distances")(
                                       [encoded_i, encoded_k, is_directed])
        return K.mean(
            K.maximum(0.0,
                      positive_distance - negative_distance + self.margin))

    def build_keras_model(self, multi_gpu=False):
        if multi_gpu:
            device = "/cpu:0"
            allow_soft_placement = True
        else:
            device = "/gpu:0"
            allow_soft_placement = False

        K.clear_session()
        tf.reset_default_graph()
        config = tf.ConfigProto(allow_soft_placement=allow_soft_placement,
                                log_device_placement=True)
        self.sess = tf.Session(config=config)
        set_session(self.sess)

        with tf.device(device):
            input_seq_i = Input(batch_shape=(self.batch_size, None),
                                name="input_seq_i")
            input_seq_j = Input(batch_shape=(self.batch_size, None),
                                name="input_seq_j")
            input_seq_k = Input(batch_shape=(self.batch_size, None),
                                name="input_seq_k")
            is_directed = Input(batch_shape=(self.batch_size, 1),
                                dtype=tf.int8,
                                name="is_directed")

            # build create_network to use in each siamese 'leg'
            self.lstm_network = self.create_lstm_network()

            # encode each of the two inputs into a vector with the conv_lstm_network
            encoded_i = self.lstm_network(input_seq_i)
            print(encoded_i) if self.verbose else None
            encoded_j = self.lstm_network(input_seq_j)
            print(encoded_j) if self.verbose else None
            encoded_k = self.lstm_network(input_seq_k)
            print(encoded_k) if self.verbose else None

            output = Lambda(self.triplet_loss,
                            name="lambda_triplet_loss_output")(
                                [encoded_i, encoded_j, encoded_k, is_directed])

            self.siamese_net = Model(
                inputs=[input_seq_i, input_seq_j, input_seq_k, is_directed],
                outputs=output)

        # Multi-gpu parallelization
        if multi_gpu:
            self.siamese_net = multi_gpu_model(self.siamese_net,
                                               gpus=4,
                                               cpu_merge=True,
                                               cpu_relocation=False)

        # Compile & train
        self.siamese_net.compile(
            loss=self.
            identity_loss,  # binary_crossentropy, cross_entropy, contrastive_loss
            optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, epsilon=0.1),
        )
        print("Network total weights:",
              self.siamese_net.count_params()) if self.verbose else None

    def learn_embedding(self,
                        network: MultiDigraphNetwork,
                        network_val=None,
                        multi_gpu=False,
                        subsample=True,
                        n_steps=500,
                        validation_steps=None,
                        tensorboard=True,
                        histogram_freq=0,
                        early_stopping=False,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False,
                        rebuild_model=False,
                        seed=0,
                        **kwargs):

        generator_train = self.get_training_data_generator(
            network, n_steps, seed)

        if network_val is not None:
            self.generator_val = SampledTripletDataGenerator(network=network_val, weighted=self.weighted,
                                                             batch_size=self.batch_size, replace=True, seed=seed,
                                                             verbose=self.verbose, maxlen=self.max_length,
                                                             padding='post', truncating="post",
                                                             tokenizer=generator_train.tokenizer) \
                if not hasattr(self, "generator_val") else self.generator_val
        else:
            self.generator_val = None

        assert generator_train.tokenizer.word_index == self.generator_val.tokenizer.word_index
        if not hasattr(self, "siamese_net") or rebuild_model:
            self.build_keras_model(multi_gpu)

        try:
            print(self.log_dir)
            self.hist = self.siamese_net.fit_generator(
                generator_train,
                epochs=self.epochs,
                validation_data=self.generator_val,
                validation_steps=validation_steps,
                callbacks=self.get_callbacks(early_stopping, tensorboard,
                                             histogram_freq),
                use_multiprocessing=True,
                workers=8,
                **kwargs)
        except KeyboardInterrupt:
            print("Stop training")
        finally:
            self.save_network_weights()

    def get_training_data_generator(self, network, n_steps=250, seed=0):
        if not hasattr(self, "generator_train"):
            self.generator_train = SampledTripletDataGenerator(
                network=network,
                weighted=self.weighted,
                batch_size=self.batch_size,
                replace=True,
                seed=seed,
                verbose=self.verbose,
                maxlen=self.max_length,
                padding='post',
                truncating=self.truncating)
        else:
            return self.generator_train
        self.node_list = self.generator_train.node_list
        return self.generator_train

    def get_reconstructed_adj(self,
                              beta=2.0,
                              X=None,
                              node_l=None,
                              node_l_b=None,
                              edge_type="d",
                              interpolate=False):
        """
        :param X:
        :param node_l: list of node names
        :param edge_type:
        :return:
        """
        if hasattr(self, "reconstructed_adj") and edge_type == "d":
            adj = self.reconstructed_adj
        else:
            embs = self.get_embeddings()
            assert len(self.node_list) == embs.shape[0]

            adj = self._pairwise_similarity(embs, edge_type)

        if interpolate:
            adj = np.interp(adj, (adj.min(), adj.max()), (0, 1))
        if (node_l is None or node_l == self.node_list) and node_l_b is None:
            if edge_type == "d":
                self.reconstructed_adj = adj  # Cache reconstructed_adj to memory for faster recall
            return adj
        elif set(node_l) < set(self.node_list) or node_l_b is not None:
            return self._select_adj_indices(adj, node_l, node_l_b)
        elif not (set(node_l) < set(self.node_list)):
            raise Exception("A node in node_l is not in self.node_list.")

    def _pairwise_similarity(self, embeddings, edge_type="d"):
        if edge_type == 'd':
            embeddings_X = embeddings[:, 0:int(self.embedding_d / 2)]
            embeddings_Y = embeddings[:,
                                      int(self.embedding_d /
                                          2):self.embedding_d]

            if self.directed_distance == "euclidean_ball":
                embeddings_stacked = np.vstack([embeddings_X, embeddings_Y])
                adj = radius_neighbors_graph(embeddings_stacked,
                                             radius=self.margin,
                                             n_jobs=-2)
                adj = adj[0:embeddings_X.shape[0], :][:,
                                                      embeddings_X.shape[0]:]
                print("radius_neighbors_graph")

            elif self.directed_distance == "euclidean":
                adj = pairwise_distances(X=embeddings_X,
                                         Y=embeddings_Y,
                                         metric="euclidean",
                                         n_jobs=-2)

                # Get node-specific adaptive threshold
                # adj = self.transform_adj_adaptive_threshold(adj, margin=0)
                # adj = self.transform_adj_beta_exp(adj, edge_types="d", sample_negative=self.negative_sampling_ratio)
                adj = np.exp(-2.0 * adj)
                print("Euclidean dist")

            elif self.directed_distance == "cosine":
                adj = pairwise_distances(X=embeddings_X,
                                         Y=embeddings_Y,
                                         metric="cosine",
                                         n_jobs=-2)
                print("Cosine similarity")

            elif self.directed_distance == "dot_sigmoid":
                adj = np.matmul(embeddings_X, embeddings_Y.T)
                adj = sigmoid(adj)
                print("Dot product & sigmoid")
            elif self.directed_distance == "dot_softmax":
                adj = np.matmul(embeddings_X, embeddings_Y.T)
                adj = softmax(adj)
                print("Dot product & softmax")

        elif edge_type == 'u':
            if self.undirected_distance == "euclidean_ball":
                adj = radius_neighbors_graph(embeddings,
                                             radius=self.margin,
                                             n_jobs=-2)

            elif self.undirected_distance == "euclidean":
                adj = pairwise_distances(X=embeddings,
                                         metric="euclidean",
                                         n_jobs=-2)
                # adj = np.exp(-2.0 * adj)
                adj = self.transform_adj_beta_exp(adj,
                                                  edge_types=["u", "u_n"],
                                                  sample_negative=False)
                # adj = self.transform_adj_adaptive_threshold(adj, margin=self.margin/2)
                print("Euclidean dist")

            elif self.undirected_distance == "cosine":
                adj = pairwise_distances(X=embeddings,
                                         metric="cosine",
                                         n_jobs=-2)

            elif self.undirected_distance == "dot_sigmoid":
                adj = np.matmul(embeddings, embeddings.T)
                adj = sigmoid(adj)
            elif self.undirected_distance == "dot_softmax":
                adj = np.matmul(embeddings, embeddings.T)
                adj = softmax(adj)
        else:
            raise Exception("Unsupported edge_type", edge_type)
        return adj

    def transform_adj_adaptive_threshold(self,
                                         adj_pred,
                                         margin=0.2,
                                         edge_types="d"):
        print("adaptive threshold")
        adj_true = self.generator_train.network.get_adjacency_matrix(
            edge_types=edge_types, node_list=self.node_list)
        self.distance_threshold = self.get_adaptive_threshold(
            adj_pred, adj_true, margin)
        print("distance_threshold", self.distance_threshold)
        predicted_adj = np.zeros(adj_pred.shape)
        for node_id in range(predicted_adj.shape[0]):
            predicted_adj[node_id, :] = (adj_pred[node_id, :] <
                                         self.distance_threshold).astype(float)
        adj_pred = predicted_adj
        return adj_pred

    def get_adaptive_threshold(self, adj_pred, adj_true, margin):
        distance_threshold = np.zeros((len(self.node_list), ))
        for nonzero_node_id in np.unique(adj_true.nonzero()[0]):
            _, nonzero_node_cols = adj_true[nonzero_node_id].nonzero()
            positive_distances = adj_pred[nonzero_node_id, nonzero_node_cols]
            distance_threshold[nonzero_node_id] = np.min(positive_distances)
        median_threshold = np.min(
            distance_threshold[distance_threshold > 0]) + margin / 2
        distance_threshold[distance_threshold == 0] = median_threshold
        return distance_threshold
Ejemplo n.º 4
0
def TCNN_model(
    len_params=9,
    init_shape=(4, 4, 100),
    opt_adam=None,
    loss='mse',
    ldr=0.25,
    verbose=True,
):
    """
    Build a TCNN model.

    Keywords in comment lines:
        TCNN2D: 2D Transposed convolution layer (Deconvolution).
        CNN2D: 2D convolution layer (spatial convolution).
        ReLU: Rectified Linear Unit  activation function.

    Args:
        len_params (int): Number of parameters to use.
        init_shape (tuple): Initial shape. A tuple of integers: height, width, channels.
        opt_adam (str): Name of optimizers or optimizer instance. If None, ADAM optimizer will be use.
        loss (str): Name of objective function or objective function or Loss instance. default: Mean Square Error
        ldr (int): Fraction of the input units to drop.
        verbose (bool): Whether or not verbose.

    Return:
        TCNN constructed model.

    """

    # Use ADAM optimizer as default.
    if opt_adam is None:
        lr, b1, b2 = 0.0001, 0.5, 0.99
        opt_adam = optimizers.Adam(learning_rate=lr, beta_1=b1, beta_2=b2)
        print(
            colored(
                f'\nThis model uses ADAM optimizer with {lr} learning rate and beta1={b1}, beta2={b2}.',
                'green'))

    # Initial size.
    height, width, channels = init_shape

    # Input parameters. Specialize the number of parameters to use.
    input_layer = Input(
        shape=(len_params, ),
        name='params',
    )

    # First fully connected layer with RelU.
    layer = Dense(
        units=625,
        use_bias=True,
        name='Dense_1',
    )(input_layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_d1',
    )(layer)

    # Apply Dropout to the input.
    layer = Dropout(
        rate=ldr,
        seed=32,
        trainable=False,
        name='Dropout',
    )(layer)

    # Second fully connected layer with ReLU.
    layer = Dense(
        units=1250,
        use_bias=True,
        name='Dense_2',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_d2',
    )(layer)

    # Third fully connected layer with ReLU.
    layer = Dense(
        units=height * width * channels,
        use_bias=True,
        name='Dense_3',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_3',
    )(layer)

    # Reshape from 1600 to 4x4x100.
    layer = Reshape(
        target_shape=(height, width, channels),
        name='Reshape_',
    )(layer)

    # Apply 5x5 TCNN2D and CNN2D: Up-sample from 4x4x100, to 11x11x48, 25x25x24 and 50x50x3 with 3 de-convolutions.

    # First TCNN2D with ReLU: Up-simple to 11x11x48.
    layer = Conv2DTranspose(
        filters=48,
        kernel_size=(5, 5),
        strides=(2, 2),
        padding='valid',
        name='Transp_1',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_t1',
    )(layer)

    # First CNN2D with ReLU: Up-sample to 11x11x48.
    layer = Conv2D(
        filters=48,
        kernel_size=(5, 5),
        strides=(1, 1),
        padding='same',
        name='Conv2D_1',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_c1',
    )(layer)

    # Second TCNN2D with ReLU: Up-simple to 25x25x24.
    layer = Conv2DTranspose(
        filters=24,
        kernel_size=(5, 5),
        strides=(2, 2),
        padding='valid',
        name='Transp_2',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_t2',
    )(layer)

    # Second CNN2D with ReLU: Up-sample to 25x25x24.
    layer = Conv2D(
        filters=24,
        kernel_size=(5, 5),
        strides=(1, 1),
        padding='same',
        name='Conv2D_2',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_c2',
    )(layer)

    # Third TCNN2D with ReLU: Up-simple to 50x50x3.
    layer = Conv2DTranspose(
        filters=3,
        kernel_size=(5, 5),
        strides=(2, 2),
        padding='same',
        name='Transp_3',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_t3',
    )(layer)

    # Third CNN2D with ReLU: Up-sample to 50x50x3.
    layer = Conv2D(
        filters=3,
        kernel_size=(5, 5),
        strides=(1, 1),
        padding='same',
        name='Conv2D_3',
    )(layer)
    layer = LeakyReLU(
        alpha=0.3,
        name='ReLu_c3',
    )(layer)

    # Compile the model.
    model = Model(inputs=input_layer, outputs=layer, name='TCNN model')
    model.compile(optimizer=opt_adam, loss=loss, metrics=['acc', 'mae', 'mse'])

    # Show some information.
    if verbose:
        for layer in model.layers:
            if 'ReLu' not in layer.name and 'params' not in layer.name:
                print(
                    f'{layer.name}:, [output shape: {layer.output_shape}, trainable? {layer.trainable}]'
                )

        print(
            colored('\nTCNN model has {} trainable parameters.',
                    'green').format(model.count_params()))

        _, df = model.layers[0].input_shape
        _, h, w, c = model.output_shape
        print(
            colored(
                f'The degrees of freedom of the system increases from {df} to {h * w * c}.',
                'green'))

    return model