Ejemplo n.º 1
0
def encode(name,inputs,info,batch_size):
	internal_dim=128
	encoder_output_dim=inputs["encoder_output_dim"]
	in_adjs=inputs["adjs"]
	features=inputs["features"]
	dropout_rate=inputs["dropout_rate"]
	is_train=inputs["is_train"]
	enabled_node_nums=inputs['enabled_node_nums']
	adj_channel_num=info.adj_channel_num
	
	layer=features
	layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
	layer=layers.GraphBatchNormalization()(layer,
		max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
	layer = tf.nn.relu(layer)
	layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
	layer=layers.GraphBatchNormalization()(layer,
		max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
	layer = tf.nn.relu(layer)
	layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
	layer=layers.GraphBatchNormalization()(layer,
		max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
	layer = tf.nn.relu(layer)
	mean_layer=layers.GraphDense(encoder_output_dim)(layer)
	std_layer=layers.GraphDense(encoder_output_dim)(layer)
	std_layer=tf.nn.softplus(std_layer)
	std_layer=tf.sqrt(std_layer)
	return mean_layer,std_layer
Ejemplo n.º 2
0
def decode_links(name,inputs,info):
	internal_dim=128
	dropout_rate=inputs["dropout_rate"]
	layer=inputs["input_layer"]
	input_dim=inputs["input_layer_dim"]
	is_train=inputs["is_train"]
	node_num=inputs["decoded_node_num"]
	enabled_node_nums=inputs['enabled_node_nums']
	layer=layers.GraphDense(internal_dim)(layer)
	layer=layers.GraphBatchNormalization()(layer,
		max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
	layer=tf.nn.relu(layer)
	layer=layers.GraphDense(internal_dim)(layer)
	layer=layers.GraphDecoderDistMult()(layer)
	return layer
Ejemplo n.º 3
0
def build_nn(inputs,info,config,batch_size=4):
    adj_channel_num=info.adj_channel_num
    preference_list_length=2
    internal_dim=32
    in_adjs=inputs["adjs"]
    features=inputs["features"]
    in_nodes=inputs["nodes"]
    dropout_rate=inputs["dropout_rate"]

    layer=features
    input_dim=info.feature_dim
    if features is None:
        layer=K.layers.Embedding(info.all_node_num,embedding_dim)(in_nodes)
        input_dim=embedding_dim
    # layer: batch_size x graph_node_num x dim
    layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
    layer=tf.sigmoid(layer)
    layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
    layer=tf.sigmoid(layer)
    layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
    layer=layers.GraphMaxPooling(adj_channel_num)(layer,adj=in_adjs)
    layer=layers.GraphBatchNormalization()(layer,
        max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
    layer=tf.sigmoid(layer)
    layer=K.layers.Dropout(dropout_rate)(layer)
    layer=layers.GraphDense(internal_dim)(layer)
    layer=tf.sigmoid(layer)
    layer=layers.GraphGather()(layer)
    layer=K.layers.Dense(info.label_dim)(layer)
    return model
Ejemplo n.º 4
0
def encode(name,inputs,info,batch_size):
    internal_dim=64
    encoder_output_dim=inputs["encoder_output_dim"]
    in_adjs=inputs["adjs"]
    features=inputs["features"]
    dropout_rate=inputs["dropout_rate"]
    is_train=inputs["is_train"]
    enabled_node_nums=inputs['enabled_node_nums']
    adj_channel_num=info.adj_channel_num

    with tf.variable_scope(name):
        layer=features
        layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
        layer=layers.GraphBatchNormalization()(layer,
            max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
        layer = tf.tanh(layer)
        layer=layers.GraphConv(internal_dim,adj_channel_num)(layer,adj=in_adjs)
        layer=layers.GraphBatchNormalization()(layer,
            max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
        layer = tf.tanh(layer)
        layer=layers.GraphDense(internal_dim)(layer)
        layer=tf.sigmoid(layer)
        layer=layers.GraphGather()(layer)

        mean_layer=Dense(encoder_output_dim,kernel_initializer='random_uniform')(layer)
        std_layer=Dense(encoder_output_dim)(layer)
        std_layer=tf.nn.softplus(std_layer)
        std_layer=tf.sqrt(std_layer)
        mean_layer=tf.clip_by_value(mean_layer,-100,100)
        std_layer=tf.clip_by_value(std_layer,-5,5)
    return mean_layer,std_layer
Ejemplo n.º 5
0
def decode_links(name,inputs,info):
    dropout_rate=inputs["dropout_rate"]
    internal_dim=64
    layer=inputs["input_layer"]
    input_dim=inputs["input_layer_dim"]
    is_train=inputs["is_train"]
    node_num=inputs["decoded_node_num"]
    enabled_node_nums=inputs['enabled_node_nums']
    with tf.variable_scope(name):
        layer=layers.GraphDense(internal_dim,name="dense_1")(layer)
        layer=layers.GraphBatchNormalization(name="bn_1")(layer,
            max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
        layer=tf.sigmoid(layer)
        layer=layers.GraphDense(internal_dim,name="dense_2")(layer)
        layer=tf.sigmoid(layer)
        #layer=layers.GraphDecoderInnerProd()(layer)
        layer=layers.GraphDecoderDistMult()(layer)
    return layer
Ejemplo n.º 6
0
def decode_nodes(name,inputs,info):
    dropout_rate=inputs["dropout_rate"]
    layer=inputs["input_layer"]
    input_dim=inputs["input_layer_dim"]
    decoded_output_dim=inputs["output_layer_dim"]
    node_num=inputs["decoded_node_num"]
    is_train=inputs["is_train"]
    enabled_node_nums=inputs['enabled_node_nums']
    with tf.variable_scope(name):
        layer=layers.GraphDense(decoded_output_dim,kernel_initializer='random_uniform',name="dense_1")(layer)
    return layer
Ejemplo n.º 7
0
def build_model(placeholders, info, config, batch_size=4):
    adj_channel_num = info.adj_channel_num
    in_adjs = placeholders["adjs"]
    features = placeholders["features"]
    in_nodes = placeholders["nodes"]
    labels = placeholders["labels"]
    mask = placeholders["mask"]
    dropout_rate = placeholders["dropout_rate"]
    is_train = placeholders["is_train"]
    mask_node = placeholders["mask_node"]
    enabled_node_nums = placeholders["enabled_node_nums"]
    internal_dim = 100
    #
    layer = features
    input_dim = info.feature_dim
    print(info.param["num_gcn_layer"])
    for i in range(int(info.param["num_gcn_layer"])):
        layer = layers.GraphConv(internal_dim, adj_channel_num)(layer,
                                                                adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=info.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.sigmoid(layer)
        layer = K.layers.Dropout(dropout_rate)(layer)
    layer = layers.GraphDense(internal_dim)(layer)
    layer = tf.sigmoid(layer)
    layer = layers.GraphGather()(layer)
    output_dim = 2
    layer = K.layers.Dense(output_dim)(layer)
    prediction = tf.nn.softmax(layer)

    # computing cost and metrics
    cost = mask * tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                          logits=layer)
    cost_opt = tf.reduce_mean(cost)

    metrics = {}
    cost_sum = tf.reduce_sum(cost)

    correct_count = mask * tf.cast(
        tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1)), tf.float32)
    metrics["correct_count"] = tf.reduce_sum(correct_count)
    return layer, prediction, cost_opt, cost_sum, metrics
Ejemplo n.º 8
0
    def build_model(self):
        ## aliases
        batch_size = self.batch_size
        in_adjs = self.placeholders["adjs"]
        features = self.placeholders["features"]
        sequences = self.placeholders["sequences"]
        sequences_len = self.placeholders["sequences_len"]
        in_nodes = self.placeholders["nodes"]
        labels = self.placeholders["labels"]
        mask = self.placeholders["mask"]
        dropout_rate = self.placeholders["dropout_rate"]
        mask_label = self.placeholders["mask_label"]
        is_train = self.placeholders["is_train"]
        enabled_node_nums = self.placeholders["enabled_node_nums"]
        embedded_layer = self.placeholders['embedded_layer']

        layer = features
        print("graph input layer:", layer.shape)
        layer = layers.GraphConv(100, self.adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=self.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.nn.relu(layer)

        layer = layers.GraphConv(100, self.adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=self.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.nn.relu(layer)

        layer = layers.GraphDense(100)(layer)
        layer = tf.nn.relu(layer)
        layer = layers.GraphGather()(layer)
        graph_output_layer = layer
        print("graph output layer:", graph_output_layer.shape)
        graph_output_layer_dim = 100

        with tf.variable_scope("seq_nn") as scope_part:
            # Embedding
            if self.feed_embedded_layer:
                layer = embedded_layer
            else:
                layer = self._embedding(sequences)
            print("sequence input layer:", layer.shape)
            # CNN + Pooling
            stride = 1
            layer = tf.keras.layers.Conv1D(500,
                                           stride,
                                           padding="same",
                                           activation='relu')(layer)
            layer = tf.keras.layers.MaxPooling1D(stride)(layer)

            layer = tf.keras.layers.Conv1D(500,
                                           stride,
                                           padding="same",
                                           activation='relu')(layer)
            layer = tf.keras.layers.MaxPooling1D(stride)(layer)

            layer = tf.keras.layers.Conv1D(1,
                                           stride,
                                           padding="same",
                                           activation='tanh')(layer)
            layer = tf.squeeze(layer)

            if len(layer.shape) == 1:
                # When batch-size is 1, this shape doesn't have batch-size dimmension due to just previous 'tf.squeeze()'.
                layer = tf.expand_dims(layer, axis=0)
            seq_output_layer = layer
            seq_output_layer_dim = layer.shape[1]
            print("sequence output layer:", seq_output_layer.shape)

        layer = tf.concat([seq_output_layer, graph_output_layer], axis=1)
        print("shared_part input:", layer.shape)

        input_dim = seq_output_layer_dim + graph_output_layer_dim

        with tf.variable_scope("shared_nn") as scope_part:
            layer = tf.keras.layers.BatchNormalization()(layer)
            layer = tf.keras.layers.Dense(52)(layer)
            layer = tf.keras.layers.BatchNormalization()(layer)
            layer = tf.nn.relu(layer)

        layer = tf.keras.layers.Dense(self.label_dim)(layer)

        prediction = tf.nn.softmax(layer)
        # computing cost and metrics
        cost = mask * tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                              logits=layer)
        cost_opt = tf.reduce_mean(cost)

        metrics = {}
        cost_sum = tf.reduce_sum(cost)

        correct_count = mask * tf.cast(
            tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1)),
            tf.float32)
        metrics["correct_count"] = tf.reduce_sum(correct_count)
        return self, prediction, cost_opt, cost_sum, metrics
Ejemplo n.º 9
0
 def build_model(self, info, batch_size=4):
     adj_channel_num=info.adj_channel_num
     profeat_dim=info.vector_modal_dim[info.vector_modal_name["profeat"]]
     in_adjs=self.placeholders["adjs"]
     features=self.placeholders["features"]
     in_nodes=self.placeholders["nodes"]
     labels=self.placeholders["labels"]
     mask=self.placeholders["mask"]
     dropout_rate=self.placeholders["dropout_rate"]
     profeat = self.placeholders["profeat"]
     mask_label=self.placeholders["mask_label"]
     is_train=self.placeholders["is_train"] 
     enabled_node_nums=self.placeholders["enabled_node_nums"]
     wd_b=None
     wd_w=0.1
     
     ###
     ### Graph part
     ###
     layer=features
     input_dim=info.feature_dim
     layer=layers.GraphConv(100,adj_channel_num)(layer,adj=in_adjs)
     layer=layers.GraphBatchNormalization()(layer,
         max_node_num=info.graph_node_num,enabled_node_nums=enabled_node_nums)
     layer=tf.nn.relu(layer)
 
     layer=layers.GraphDense(100)(layer)
     layer=tf.nn.relu(layer)
     layer=layers.GraphGather()(layer)
     graph_output_layer=layer
     graph_output_layer_dim=100
    
     ###
     ### Sequence part
     ###
     with tf.variable_scope("seq_nn") as scope_part:
         layer=profeat
         layer=K.layers.Dense(100)(layer)
         layer=K.layers.BatchNormalization()(layer)
         layer=tf.nn.relu(layer)
     
         seq_output_layer=layer
         seq_output_layer_dim=100
  
     ###
     ### Shared part
     ###
     # 32dim (Graph part)+ 32 dim (Sequence part)
     layer=tf.concat([seq_output_layer,graph_output_layer],axis=1)
     input_dim=seq_output_layer_dim+graph_output_layer_dim
     
     with tf.variable_scope("shared_nn") as scope_part:
         layer=K.layers.Dense(52)(layer)
         layer=K.layers.BatchNormalization()(layer)
         layer=tf.nn.relu(layer)
 
         layer=K.layers.Dense(info.label_dim)(layer)
     # # 最終出力を作成
     # # shape:[12×50×2]
     logits = mu.multitask_logits(layer, labels.shape[1])
     model = logits
     # # costの計算 各タスクのバッチ数平均 12
     task_losses =  mu.add_training_loss(logits = logits,label = labels,pos_weight = info.pos_weight,\
                                         batch_size= batch_size,n_tasks = labels.shape[1],mask = mask_label)
     total_loss = tf.reduce_sum(task_losses)#全タスクのlossを合計
 
     ### multi-task loss
     cost_opt=task_losses
     each_cost = task_losses
 
     # 2値の確率予測:12×50×2
     prediction = mu.add_softmax(logits)
     prediction = tf.transpose(prediction,[1,0,2])
 
     metrics={}
     cost_sum= total_loss 
     # cost_sum = cost_opt
     metrics["each_cost"] = task_losses
 
     metrics["each_correct_count"] = [None]*labels.shape[1]
     for i in range(labels.shape[1]):
         equal_cnt=mask_label[:,i]*tf.cast(tf.equal(tf.cast(tf.argmax(prediction[:,i,:],1),tf.int16), tf.cast(labels[:,i],tf.int16)),tf.float32)
 
         each_correct_count=tf.cast(tf.reduce_sum(equal_cnt,axis=0),tf.float32)
         metrics["each_correct_count"][i] = each_correct_count
 
     # correct_count=0#mask*tf.cast(tf.reduce_all(tf.equal(tf.cast(tf.argmax(prediction,1),tf.int16), tf.cast(labels,tf.int16)),axis=1),tf.float32)
     # metrics["correct_count"]=tf.reduce_sum(correct_count)
     metrics["correct_count"]= sum([metrics["each_correct_count"][i] for i in range(labels.shape[1])])
     return model,prediction,cost_opt,cost_sum,metrics
Ejemplo n.º 10
0
def build_model(placeholders, info, config, batch_size=4):
    adj_channel_num = info.adj_channel_num
    embedding_dim = config["embedding_dim"]
    in_adjs = placeholders["adjs"]
    features = placeholders["features"]
    in_nodes = placeholders["nodes"]
    labels = placeholders["labels"]
    mask = placeholders["mask"]
    mask_label = placeholders["mask_label"]
    #dropout_rate=placeholders["dropout_rate"]
    dropout_rate = 0.3
    is_train = placeholders["is_train"]
    mask_node = placeholders["mask_node"]
    enabled_node_nums = placeholders["enabled_node_nums"]

    layer = features
    input_dim = info.feature_dim
    with tf.variable_scope("rollout"):
        if features is None:
            layer = K.layers.Embedding(info.all_node_num,
                                       embedding_dim)(in_nodes)
            input_dim = embedding_dim
        # layer: batch_size x graph_node_num x dim
        layer = layers.GraphConv(128, adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=info.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.nn.relu(layer)
        #       layer=K.layers.Dropout(dropout_rate)(layer)

        layer = layers.GraphConv(128, adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=info.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.nn.relu(layer)
        #       layer=K.layers.Dropout(dropout_rate)(layer)

        layer = layers.GraphConv(128, adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=info.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.nn.relu(layer)
        #       layer=K.layers.Dropout(dropout_rate)(layer)

        layer = layers.GraphDense(128)(layer)
        layer = tf.nn.relu(layer)
        #       layer=K.layers.Dropout(dropout_rate)(layer)

        layer = layers.GraphGather()(layer)
        layer = K.layers.Dense(info.label_dim)(layer)
        prediction = tf.nn.softmax(layer, name="output")
        # computing cost and metrics
        cost = mask * tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels,
                                                                 logits=layer)
        cost_opt = tf.reduce_mean(cost)
        metrics = {}
        cost_sum = tf.reduce_sum(cost)
        correct_count = mask * tf.cast(
            tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1)),
            tf.float32)
        metrics["correct_count"] = tf.reduce_sum(correct_count)
    return layer, prediction, cost_opt, cost_sum, metrics
Ejemplo n.º 11
0
def build_model(placeholders, info, config, batch_size=4):
    adj_channel_num = info.adj_channel_num
    embedding_dim = config["embedding_dim"]
    in_adjs = placeholders["adjs"]
    features = placeholders["features"]
    sequences = placeholders["sequences"]
    sequences_len = placeholders["sequences_len"]
    in_nodes = placeholders["nodes"]
    labels = placeholders["labels"]
    mask = placeholders["mask"]
    dropout_rate = placeholders["dropout_rate"]
    is_train = placeholders["is_train"]
    enabled_node_nums = placeholders["enabled_node_nums"]

    ###
    ### Graph part
    ###
    with tf.variable_scope("seq_nn") as scope_part:
        layer = features
        input_dim = info.feature_dim
        if features is None:
            layer = K.layers.Embedding(info.all_node_num,
                                       embedding_dim)(in_nodes)
            input_dim = embedding_dim
        # layer: batch_size x graph_node_num x dim
        layer = layers.GraphConv(50, adj_channel_num)(layer, adj=in_adjs)
        layer = tf.sigmoid(layer)
        layer = layers.GraphConv(50, adj_channel_num)(layer, adj=in_adjs)
        layer = tf.sigmoid(layer)
        layer = layers.GraphConv(50, adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphMaxPooling(adj_channel_num)(layer, adj=in_adjs)
        layer = layers.GraphBatchNormalization()(
            layer,
            max_node_num=info.graph_node_num,
            enabled_node_nums=enabled_node_nums)
        layer = tf.sigmoid(layer)
        layer = K.layers.Dropout(dropout_rate)(layer)
        layer = layers.GraphDense(50)(layer)
        layer = tf.sigmoid(layer)
        layer = layers.GraphGather()(layer)
        graph_output_layer = layer
        graph_output_layer_dim = 50

    ###
    ### Sequence part
    ###

    with tf.variable_scope("seq_nn") as scope_part:
        # Embedding
        embedding_dim = 10
        layer = K.layers.Embedding(info.sequence_symbol_num,
                                   embedding_dim)(sequences)
        # CNN + Pooling
        stride = 4
        layer = K.layers.Conv1D(50, stride, padding="same",
                                activation='relu')(layer)
        layer = K.layers.MaxPooling1D(stride)(layer)
        # LSTM 1
        output_dim = 32
        layer = K.layers.LSTM(output_dim,
                              return_sequences=True,
                              go_backwards=True)(layer)
        # LSTM 2
        layer = K.layers.LSTM(output_dim,
                              return_sequences=False,
                              go_backwards=True)(layer)
        #layer = tf.squeeze(layer)
        seq_output_layer = layer
        seq_output_layer_dim = layer.shape[1]
    ###
    ### Shared part
    ###

    # 32dim (Graph part)+ 32 dim (Sequence part)
    layer = tf.concat([seq_output_layer, graph_output_layer], axis=1)
    input_dim = seq_output_layer_dim + graph_output_layer_dim
    with tf.variable_scope("shared_nn") as scope_part:
        layer = K.layers.Dense(52)(layer)
        layer = K.layers.BatchNormalization()(layer)
        layer = tf.nn.relu(layer)

        layer = K.layers.Dense(info.label_dim)(layer)

    prediction = tf.nn.softmax(layer)
    # computing cost and metrics
    cost = mask * tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                          logits=layer)
    cost_opt = tf.reduce_mean(cost)

    metrics = {}
    cost_sum = tf.reduce_sum(cost)

    correct_count = mask * tf.cast(
        tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1)), tf.float32)
    metrics["correct_count"] = tf.reduce_sum(correct_count)
    return layer, prediction, cost_opt, cost_sum, metrics
Ejemplo n.º 12
0
def build_model(placeholders, info, config, batch_size=4):
    adj_channel_num = info.adj_channel_num
    embedding_dim = config["embedding_dim"]
    in_adjs = placeholders["adjs"]
    features = placeholders["features"]
    in_nodes = placeholders["nodes"]
    labels = placeholders["labels"]
    mask = placeholders["mask"]
    mask_label = placeholders["mask_label"]
    dropout_rate = placeholders["dropout_rate"]
    is_train = placeholders["is_train"]
    mask_node = placeholders["mask_node"]
    enabled_node_nums = placeholders["enabled_node_nums"]

    layer = features
    input_dim = info.feature_dim
    if features is None:
        layer = K.layers.Embedding(info.all_node_num, embedding_dim)(in_nodes)
        input_dim = embedding_dim
    # layer: batch_size x graph_node_num x dim
    layer = layers.GraphConv(256, adj_channel_num)(layer, adj=in_adjs)
    layer = tf.sigmoid(layer)
    layer = layers.GraphConv(256, adj_channel_num)(layer, adj=in_adjs)
    layer = tf.sigmoid(layer)
    layer = layers.GraphDense(256)(layer)
    layer = tf.sigmoid(layer)
    layer = layers.GraphConv(50, adj_channel_num)(layer, adj=in_adjs)
    #layer=layers.GraphMaxPooling(adj_channel_num)(layer,adj=in_adjs)
    layer = layers.GraphBatchNormalization()(
        layer,
        max_node_num=info.graph_node_num,
        enabled_node_nums=enabled_node_nums)
    layer = tf.sigmoid(layer)
    layer = layers.GraphDense(50)(layer)
    layer = tf.sigmoid(layer)
    layer = layers.GraphGather()(layer)
    layer = K.layers.Dense(info.label_dim)(layer)
    ###
    ### multi-task loss
    ###
    prediction = tf.sigmoid(layer)
    # computing cost and metrics
    # cost (batch_size x labels) => batch_size
    if "pos_weight" in info:
        cost = mask * tf.reduce_sum(
            mask_label * tf.nn.weighted_cross_entropy_with_logits(
                targets=labels, logits=layer, pos_weight=info.pos_weight),
            axis=1)
    else:
        cost = mask * tf.reduce_sum(
            mask_label * tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                                 logits=layer),
            axis=1)

    cost_opt = tf.reduce_mean(cost)

    metrics = {}
    cost_sum = tf.reduce_sum(cost)

    def binary_activation(x, thresh):
        cond = tf.less(x, tf.ones(tf.shape(x)) * thresh)
        out = tf.where(cond, tf.zeros(tf.shape(x)), tf.ones(tf.shape(x)))
        return out

    correct_count = mask * tf.cast(
        tf.reduce_all(tf.equal(binary_activation(prediction, 0.5), labels),
                      axis=1), tf.float32)
    metrics["correct_count"] = tf.reduce_sum(correct_count)
    return layer, prediction, cost_opt, cost_sum, metrics