Example #1
0
    def call(self, x: IOperator) -> IOperator:
        self.__var_list: List[ITrainable] = []

        fc1 = Dense(inputs=x, activation=Tanh(), units=784)
        self.__var_list.extend(fc1.variables)

        fc2 = Dense(inputs=fc1, activation=Tanh(), units=784)
        self.__var_list.extend(fc2.variables)

        fc3 = Dense(inputs=fc2, activation=Tanh(), units=392)
        self.__var_list.extend(fc3.variables)

        dropout = Dropout(inputs=fc3)

        fc4 = Dense(inputs=dropout, activation=Tanh(), units=128)
        self.__var_list.extend(fc4.variables)

        fc5 = Dense(inputs=fc4, activation=Softmax(), units=10)
        self.__var_list.extend(fc5.variables)

        return fc5
Example #2
0
from nn.loss import cross_entropy
from nn.metrix import accuracy


(X_train, y_train), (X_test, y_test) = load_mnist()
X_train = X_train.reshape((X_train.shape[0], -1)) / 255
X_test = X_test.reshape((X_test.shape[0], -1)) / 255

transformer = MakeOneHot()
y_train = transformer.fit_transform(y_train)
y_test = transformer.transform(y_test)

model = Model()
model.add(FC(500, input_shape=784))
model.add(ReLU())
model.add(Dropout(0.5))
model.add(FC(150))
model.add(ReLU())
model.add(Dropout(0.5))
model.add(FC(50))
model.add(ReLU())
model.add(Dropout(0.5))
model.add(FC(10))
model.add(Softmax())

model.compile(Adam(eta=0.01), cross_entropy, accuracy)

model.fit(X_train, y_train, max_iter=10, batch_size=2000)

print("train acc: {:.2f}%".format(model.score(X_train, y_train)))
print("test acc: {:.2f}%".format(model.score(X_test, y_test)))
Example #3
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None])
        self.x_len = tf.placeholder(tf.int32, [None])
        self.y = tf.placeholder(tf.int32, [None])
        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab() + 1,
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)
        dropout = Dropout(self.keep_prob)
        varition_dropout = VariationalDropout(self.keep_prob)
        input_x = dropout(input_x, self.training)

        encoder1 = BiLSTM(self.rnn_hidden_size, name='layer_1')
        input_x, _ = encoder1(input_x, self.x_len)
        input_x = varition_dropout(input_x, self.training)

        encoder2 = BiLSTM(self.rnn_hidden_size, name='layer_2')
        input_x, _ = encoder2(input_x, self.x_len)
        input_x = varition_dropout(input_x, self.training)

        avg_pool = tf.reduce_mean(input_x, axis=1)
        avg_max = tf.reduce_max(input_x, axis=1)

        merge = tf.concat([avg_pool, avg_max], axis=1)
        #
        # dense = tf.keras.layers.Dense(16,activation=tf.nn.relu)
        # merge = dense(merge)
        # merge = dropout(merge, self.training)
        self.logits = tf.keras.layers.Dense(self.num_class,
                                            activation=None)(merge)
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                           labels=self.y))
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels": self.y,
            "text_len": self.x_len,
            "training": self.training
        })

        self.output_variable_dict = OrderedDict(
            {"predict": tf.argmax(self.logits, axis=1)})

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #4
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None])
        self.y = tf.placeholder(tf.int32, [None])
        self.pos_feature = tf.placeholder(tf.int32, [None, None])
        self.ask_word_feature = tf.placeholder(tf.int32, [None, None])
        self.in_name_feature = tf.placeholder(tf.int32, [None, None])

        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab() + 1,
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)

        pos_embedding = Embedding(pretrained_embedding=None,
                                  embedding_shape=(self.pos_vocab_size,
                                                   self.pos_embedding_size))
        #
        # input_x_pos = pos_embedding(self.pos_feature)
        #
        # feature_x = tf.one_hot(self.in_name_feature,depth=2)
        # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2)
        # input_x = tf.concat([input_x,feature_x],axis=-1)
        # # input_x = tf.concat([input_x,ask_word_feature],axis=-1)
        # input_x = tf.concat([input_x,input_x_pos],axis=-1)
        # # print(input_x.shape)
        dropout = Dropout(self.keep_prob)
        input_x = dropout(input_x, self.training)
        pooled = []
        c4 = None
        c5 = None
        c6 = None
        for idx, kernel_size in enumerate(self.filter_sizes1):
            con1d = tf.layers.conv1d(input_x,
                                     self.filter_nums1[idx],
                                     kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='conv1d-%d' % (idx))
            pooled_conv = tf.layers.max_pooling1d(con1d,
                                                  2,
                                                  strides=1,
                                                  padding='same')
            if kernel_size == 4:
                c4 = pooled_conv
            if kernel_size == 5:
                c5 = pooled_conv
            if kernel_size == 6:
                c6 = pooled_conv
            pooled.append(pooled_conv)
        merge = tf.concat(pooled, axis=-1)
        c1_concat = merge

        layer2_pooled = []
        kernel_size = [2, 3]
        for idx, kernel_size in enumerate(kernel_size):
            con1d = tf.layers.conv1d(c1_concat,
                                     self.filter_nums1[idx],
                                     kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='conv1d-layer-2-%d' % (idx))
            pooled_conv = tf.layers.max_pooling1d(con1d,
                                                  2,
                                                  strides=1,
                                                  padding='same')
            layer2_pooled.append(pooled_conv)

        c2_concat = tf.concat([tf.concat(layer2_pooled, axis=-1), c4, c5, c6],
                              axis=-1)
        # print(merge.shape)
        # print(c2_concat.shape)
        merge = tf.concat([c2_concat, merge], axis=-1)

        conv1d = tf.layers.conv1d(merge,
                                  128,
                                  kernel_size=1,
                                  padding='same',
                                  activation=tf.nn.relu,
                                  name='layer_%d' % (3))
        merge = tf.reduce_max(conv1d, axis=1)
        # merge = tf.reduce_max(merge,axis=1)

        merge = dropout(merge, self.training)
        merge = tf.layers.batch_normalization(inputs=merge)
        dense1 = tf.keras.layers.Dense(128, activation=tf.nn.tanh)
        merge = dense1(merge)
        merge = tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge, self.training)
        dense2 = tf.keras.layers.Dense(self.num_class,
                                       activation=None,
                                       use_bias=False)
        logits = dense2(merge)

        # self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=self.y))

        # self.loss+=self.loss + lossL2

        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels": self.y,
            # "features":self.in_name_feature,
            # "pos_feature":self.pos_feature,
            # 'ask_word_feature':self.ask_word_feature,
            "training": self.training,
        })

        self.output_variable_dict = OrderedDict(
            {"predict": tf.argmax(logits, axis=1)})

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #5
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None])
        self.x_len = tf.placeholder(tf.int32, [None])
        self.y = tf.placeholder(tf.int32, [None])
        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab() + 1,
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)
        dropout = Dropout(self.keep_prob)
        input_x = dropout(input_x, self.training)

        encoder1 = BiLSTM(self.rnn_hidden_size, name='layer_1')
        input_x, _ = encoder1(input_x, self.x_len)

        H = tf.nn.tanh(input_x)

        alpha = tf.nn.softmax(tf.squeeze(tf.layers.Dense(1)(H), axis=-1))

        H_star = tf.squeeze(tf.matmul(tf.expand_dims(alpha, axis=1), H),
                            axis=1)

        H_star = dropout(H_star, self.training)

        self.logits = tf.keras.layers.Dense(self.num_class,
                                            activation=None,
                                            use_bias=True)(H_star)
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                           labels=self.y))
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels": self.y,
            "text_len": self.x_len,
            "training": self.training
        })

        self.output_variable_dict = OrderedDict(
            {"predict": tf.argmax(self.logits, axis=1)})

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None])
        self.x_len = tf.placeholder(tf.int32, [None])
        self.pos_feature = tf.placeholder(tf.int32, [None, None])
        print(self.x.name)  # = tf.placeholder(tf.int32, [None, None])
        print(self.x_len.name)  # = tf.placeholder(tf.int32, [None, None])
        print(
            self.pos_feature.name)  # = tf.placeholder(tf.int32, [None, None])
        #self.in_name_feature = tf.placeholder(tf.int32,[None,None])
        self.y = tf.placeholder(tf.int32, [None])
        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab() + 1,
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)

        pos_embedding = Embedding(pretrained_embedding=None,
                                  embedding_shape=(self.pos_vocab_size,
                                                   self.pos_embedding_size))

        input_x_pos = pos_embedding(self.pos_feature)

        input_x = tf.concat([input_x, input_x_pos], axis=-1)
        input_q = input_x  #feature_x = tf.one_hot(self.in_name_feature,depth=2)
        #input_x = tf.concat([input_x,feature_x],axis=-1)
        self.filters = 256  #

        dropout = Dropout(self.keep_prob)
        varition_dropout = VariationalDropout(self.keep_prob)
        input_x = dropout(input_x, self.training)
        mask = create_padding_mask(self.x)

        encoder1 = BiGRU(self.rnn_hidden_size, name='layer_1')
        input_x, _ = encoder1(input_x, self.x_len)
        input_x = varition_dropout(input_x, self.training)

        tmp_ma = MultiHeadAttention(self.filters, 8, name='mha1')
        norm_x = tf.layers.batch_normalization(input_x)
        mha_out, _ = tmp_ma(norm_x, norm_x, norm_x, mask)
        input_x += mha_out

        encoder2 = BiGRU(self.rnn_hidden_size, name='layer_2')
        input_x, _ = encoder2(input_x, self.x_len)
        input_x = varition_dropout(input_x, self.training)

        #tmp_ma = MultiHeadAttention(self.filters,8)
        tmp_ma = MultiHeadAttention(self.filters, 8, name='mha2')
        norm_x = tf.layers.batch_normalization(input_x)
        mha_out, _ = tmp_ma(norm_x, norm_x, norm_x, mask)
        input_x = mha_out
        #tmp_ma = MultiHeadAttention(self.filters,8)

        avg_pool = tf.reduce_mean(input_x, axis=1)
        avg_max = tf.reduce_max(input_x, axis=1)

        merge = tf.concat([avg_pool, avg_max], axis=1)
        ''' 
        pooled =[]
        for idx,kernel_size in enumerate(self.filter_sizes1):
            con1d = tf.layers.conv1d(input_x,self.filter_nums1[idx],kernel_size,padding='same',activation=tf.nn.relu,
                                     name='conv1d-%d'%(idx))
            pooled_conv = tf.reduce_max(con1d,axis=1)
            pooled.append(pooled_conv)
        merge_pooled  = tf.concat(pooled,axis=1)
        merge  = tf.concat([merge_pooled,merge],axis=1)
        '''# dense = tf.keras.layers.Dense(16,activation=tf.nn.relu)
        # merge = dense(merge)
        # merge = dropout(merge, self.training)
        self.logits = tf.keras.layers.Dense(self.num_class,
                                            activation=None,
                                            name='final_dense')(merge)
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                           labels=self.y))
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids":
            self.x,
            "labels":
            self.y,
            "text_len":
            self.x_len,
            "training":
            self.training,
            "pos_feature":
            self.pos_feature,
            #            "features":self.in_name_feature,
        })

        self.output_variable_dict = OrderedDict({
            "prob":
            tf.nn.softmax(self.logits),
            "predict":
            tf.argmax(self.logits, axis=1)
        })

        print(self.output_variable_dict)  # 8. Metrics and summary
        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #7
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32,[None,None])
        self.y = tf.placeholder(tf.int32,[None])
        self.domain = tf.placeholder(tf.int32,[None])
        self.x_len = tf.placeholder(tf.int32,[None])
        # self.soft_target = tf.placeholder(tf.float32,[None,None])
        self.pos_feature = tf.placeholder(tf.int32,[None,None])
        # self.ask_word_feature = tf.placeholder(tf.int32,[None,None])
        self.in_name_feature = tf.placeholder(tf.int32,[None,None])

        self.training = tf.placeholder_with_default(False,shape=(),name='is_training')

        word_embedding = Embedding(pretrained_embedding=self.pretrained_word_embedding,
                                   embedding_shape=(self.vocab.get_word_vocab(), self.word_embedding_size),
                                   trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)

        pos_embedding = Embedding(pretrained_embedding=None,
                                   embedding_shape=(self.pos_vocab_size, self.pos_embedding_size))

        input_x_pos = pos_embedding(self.pos_feature)
        self.filters = 256#
        feature_x = tf.one_hot(self.in_name_feature,depth=2)
        # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2)
        input_x = tf.concat([input_x,feature_x],axis=-1)
        # # input_x = tf.concat([input_x,ask_word_feature],axis=-1)
        input_x = tf.concat([input_x,input_x_pos],axis=-1)

        divisors = tf.pow(tf.constant([10000.0] * (self.filters // 2), dtype=tf.float32),
                          tf.range(0, self.filters, 2, dtype=tf.float32) / self.filters)
        quotients = tf.cast(tf.expand_dims(tf.range(0, tf.reduce_max(self.x_len)), -1), tf.float32) / tf.expand_dims(divisors, 0)
        position_repr = tf.concat([tf.sin(quotients), tf.cos(quotients)], -1)

        mask = create_padding_mask(self.x)#tmp_ma = MultiHeadAttention(self.filters,16,name='layer1')
        tmp_ma = MultiHeadAttention(self.filters,16,name='layer1')
        atten_x,_ = tmp_ma(input_x,input_x,input_x,mask=mask)

        input_x = tf.concat([input_x,atten_x],axis=-1)#print(position_repr.shape)## print(input_x.shape)
        #sys.exit(1)#print(position_repr)## print(input_x.shape)
        # print(input_x.shape)
        dropout = Dropout(self.keep_prob)
        input_x = dropout(input_x,self.training)
        pooled =[]
        for idx,kernel_size in enumerate(self.filter_sizes1):
            con1d = tf.layers.conv1d(input_x,self.filter_nums1[idx],kernel_size,padding='same',activation=tf.nn.relu,
                                     name='conv1d-%d'%(idx))
            pooled_conv = tf.reduce_max(con1d,axis=1)
            pooled.append(pooled_conv)
        merge  = tf.concat(pooled,axis=1)
        merge = dropout(merge,self.training)
        # merge = tf.layers.batch_normalization(inputs=merge)
        # dense1 = tf.keras.layers.Dense(128,activation=tf.nn.tanh)
        merge = tf.layers.dense(merge,128,activation=tf.nn.tanh,name='dense1')
        # merge=tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge,self.training)
        logits = tf.layers.dense(merge,self.num_class,activation=None,use_bias=False)
        # logits = dense2(merge,name='dense2')
        self.prob = tf.nn.softmax(logits)

        domain_logits = tf.layers.dense(merge,2,activation=None,use_bias=False)
        self.domain_prob = tf.nn.softmax(domain_logits)
        # print(self.prob)

        from nn.loss import  softmax_with_logits_label_smooth

        from nn.loss import focal_loss_softmax

        self.loss = tf.reduce_mean(focal_loss_softmax(labels=self.y,logits=logits,alpha=0.5))
        #self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))
        # self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))

        # self.domain_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=domain_logits,labels=self.domain))
        # self.loss+=self.loss + lossL2

        # self.soft_loss = tf.reduce_mean(
        #     tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits/self.softmax_temperature,labels=self.soft_target)
        # )
        # self.task_balance=1.0
        # self.soft_loss =0.0
        # self.loss *=self.task_balance
        # self.loss += (1-self.task_balance)*self.soft_loss*(self.softmax_temperature**2)
        # self.loss +=self.domain_loss
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels":self.y,
            "text_len":self.x_len,
            # "domain":self.domain,
            # 'soft_target':self.soft_target,
            "features":self.in_name_feature,
            "pos_feature":self.pos_feature,
            # 'ask_word_feature':self.ask_word_feature,
            "training": self.training,
        })

        self.output_variable_dict = OrderedDict({
            "predict": tf.argmax(logits,axis=1),
            "prob":self.prob

        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {
                'loss': tf.metrics.mean(self.loss)
            }

        self.train_update_metrics = tf.group(*[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {
                'loss': tf.metrics.mean(self.loss)
            }

        self.eval_update_metrics = tf.group(*[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #8
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, 20])
        self.y = tf.placeholder(tf.int32, [None])
        self.domain = tf.placeholder(tf.int32, [None])
        # self.soft_target = tf.placeholder(tf.float32,[None,None])
        self.pos_feature = tf.placeholder(tf.int32, [None, None])
        # self.ask_word_feature = tf.placeholder(tf.int32,[None,None])
        # self.in_name_feature = tf.placeholder(tf.int32,[None,None])

        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab(),
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)

        input_x += self.positional_encoding(input_x, 20, masking=False)
        self.enc = input_x

        for i in range(self.num_blocks):
            with tf.variable_scope("num_blocks_{}".format(i)):
                self.enc = self.multihead_attention(queries=self.enc,
                                                    keys=self.enc,
                                                    num_units=256,
                                                    num_heads=8)
                self.enc = self.feedforward(self.enc, num_units=[2 * 256, 256])
        input_x = self.enc
        print(input_x)  #= self.enc

        #
        # feature_x = tf.one_hot(self.in_name_feature,depth=2)
        # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2)
        # input_x = tf.concat([input_x,feature_x],axis=-1)
        # # input_x = tf.concat([input_x,ask_word_feature],axis=-1)
        # input_x = tf.concat([input_x,input_x_pos],axis=-1)
        # print(input_x.shape)
        dropout = Dropout(self.keep_prob)
        merge = tf.reduce_mean(input_x, axis=1)
        # merge = tf.layers.batch_normalization(inputs=merge)
        # dense1 = tf.keras.layers.Dense(128,activation=tf.nn.tanh)
        merge = tf.layers.dense(merge,
                                128,
                                activation=tf.nn.tanh,
                                name='dense1')
        # merge=tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge, self.training)
        logits = tf.layers.dense(merge,
                                 self.num_class,
                                 activation=None,
                                 use_bias=False)
        # logits = dense2(merge,name='dense2')
        self.prob = tf.nn.softmax(logits)

        domain_logits = tf.layers.dense(merge,
                                        2,
                                        activation=None,
                                        use_bias=False)
        self.domain_prob = tf.nn.softmax(domain_logits)
        # print(self.prob)

        from nn.loss import softmax_with_logits_label_smooth

        from nn.loss import focal_loss_softmax

        #self.loss = tf.reduce_mean(focal_loss_softmax(labels=self.y,logits=logits,alpha=0.5))
        #self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))
        #self.loss = tf.reduce_mean(softmax_with_logits_label_smooth(logits=logits,labels=self.y))
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=self.y))

        # self.domain_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=domain_logits,labels=self.domain))
        # self.loss+=self.loss + lossL2

        # self.soft_loss = tf.reduce_mean(
        #     tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits/self.softmax_temperature,labels=self.soft_target)
        # )
        # self.task_balance=1.0
        # self.soft_loss =0.0
        # self.loss *=self.task_balance
        # self.loss += (1-self.task_balance)*self.soft_loss*(self.softmax_temperature**2)
        # self.loss +=self.domain_loss
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels": self.y,
            # "domain":self.domain,
            # 'soft_target':self.soft_target,
            # "features":self.in_name_feature,
            # "pos_feature":self.pos_feature,
            # 'ask_word_feature':self.ask_word_feature,
            "training": self.training,
        })

        self.output_variable_dict = OrderedDict({
            "predict":
            tf.argmax(logits, axis=1),
            "prob":
            self.prob
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #9
0
    def _build_graph(self):
        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')
        self.input_ids = tf.placeholder(shape=[None, None],
                                        dtype=tf.int32,
                                        name='input_ids')
        self.input_mask = tf.placeholder(shape=[None, None],
                                         dtype=tf.int32,
                                         name="input_mask")
        self.segment_ids = tf.placeholder(shape=[None, None],
                                          dtype=tf.int32,
                                          name="segment_ids")
        self.y = tf.placeholder(tf.int32, [None])
        self.bert_embedding = BertEmbedding(self.bert_dir)
        _, output_layer = self.bert_embedding(input_ids=self.input_ids,
                                              input_mask=self.input_mask,
                                              segment_ids=self.segment_ids,
                                              is_training=self.training,
                                              return_pool_output=True,
                                              use_fp16=self.use_fp16)

        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [self.num_class, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [self.num_class],
                                      initializer=tf.zeros_initializer())

        dropout = Dropout(0.9)
        output_layer = dropout(output_layer, self.training)
        # if is_training:
        #     # I.e., 0.1 dropout
        #     output_layer = tf.nn.dropout(output_layer, keep_prob=0.9,)
        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1, name="probs")
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(self.y,
                                    depth=self.num_class,
                                    dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        self.loss = tf.reduce_mean(per_example_loss)
        self.probs = probabilities

        self.input_placeholder_dict = OrderedDict({
            "input_ids": self.input_ids,
            "segment_ids": self.segment_ids,
            "labels": self.y,
            "input_mask": self.input_mask,
            "training": self.training
        })

        self.output_variable_dict = OrderedDict({
            "predict":
            tf.argmax(self.probs, axis=1),
            "probabilities":
            probabilities
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #10
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None])
        self.x_len = tf.placeholder(tf.int32,[None])
        self.y = tf.placeholder(tf.int32, [None])
        self.training = tf.placeholder_with_default(False, shape=(), name='is_training')
        self.kernel_initializer = tf.truncated_normal_initializer(stddev=0.05)

        self.filter_sizes = [5,5,3,3,3,3]
        self.num_filters = 256

        word_embedding = Embedding(pretrained_embedding=self.pretrained_word_embedding,
                                   embedding_shape=(self.vocab.get_char_vocab_size() + 1, self.word_embedding_size),
                                   trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)

        # pos_embedding = Embedding(pretrained_embedding=None,
        #                           embedding_shape=(self.pos_vocab_size, self.pos_embedding_size))
        #
        # input_x_pos = pos_embedding(self.pos_feature)
        #
        # feature_x = tf.one_hot(self.in_name_feature, depth=2)
        # input_x = tf.concat([input_x, feature_x], axis=-1)
        # input_x = tf.concat([input_x, input_x_pos], axis=-1)
        dropout = Dropout(self.keep_prob)
        input_x = dropout(input_x, self.training)

        input_x = tf.expand_dims(input_x,axis=-1)
        print(input_x.shape)

        # ============= Convolutional Layers =============
        with tf.name_scope("conv-maxpool-1"):
            conv1 = tf.layers.conv2d(
                input_x,
                filters=self.num_filters,
                kernel_size=[self.filter_sizes[0], self.word_embedding_size],
                kernel_initializer=self.kernel_initializer,
                activation=tf.nn.relu)
            pool1 = tf.layers.max_pooling2d(
                conv1,
                pool_size=(3, 1),
                strides=(3, 1))
            pool1 = tf.transpose(pool1, [0, 1, 3, 2])

        with tf.name_scope("conv-maxpool-2"):
            conv2 = tf.layers.conv2d(
                pool1,
                filters=self.num_filters,
                kernel_size=[self.filter_sizes[1], self.num_filters],
                kernel_initializer=self.kernel_initializer,
                activation=tf.nn.relu)
            pool2 = tf.layers.max_pooling2d(
                conv2,
                pool_size=(3, 1),
                strides=(3, 1))
            pool2 = tf.transpose(pool2, [0, 1, 3, 2])

        with tf.name_scope("conv-3"):
            conv3 = tf.layers.conv2d(
                pool2,
                filters=self.num_filters,
                kernel_size=[self.filter_sizes[2], self.num_filters],
                kernel_initializer=self.kernel_initializer,
                activation=tf.nn.relu)
            conv3 = tf.transpose(conv3, [0, 1, 3, 2])

        with tf.name_scope("conv-4"):
            conv4 = tf.layers.conv2d(
                conv3,
                filters=self.num_filters,
                kernel_size=[self.filter_sizes[3], self.num_filters],
                kernel_initializer=self.kernel_initializer,
                activation=tf.nn.relu)
            conv4 = tf.transpose(conv4, [0, 1, 3, 2])

        with tf.name_scope("conv-5"):
            conv5 = tf.layers.conv2d(
                conv4,
                filters=self.num_filters,
                kernel_size=[self.filter_sizes[4], self.num_filters],
                kernel_initializer=self.kernel_initializer,
                activation=tf.nn.relu)
            conv5 = tf.transpose(conv5, [0, 1, 3, 2])

        with tf.name_scope("conv-maxpool-6"):
            conv6 = tf.layers.conv2d(
                conv5,
                filters=self.num_filters,
                kernel_size=[self.filter_sizes[5], self.num_filters],
                kernel_initializer=self.kernel_initializer,
                activation=tf.nn.relu)
            pool6 = tf.layers.max_pooling2d(
                conv6,
                pool_size=(3, 1),
                strides=(3, 1))
            pool6 = tf.transpose(pool6, [0, 2, 1, 3])
            print(pool6.get_shape().as_list())
            h_pool = tf.reshape(pool6, [-1, self.num_filters])
        print(h_pool.shape)



        print(input_x)
        fc1_layer = tf.keras.layers.Dense(128,activation=tf.nn.relu)
        fc1_out = fc1_layer(h_pool)
        # fc1_out = dropout(fc1_out,self.training)
        # fc2_layer = tf.keras.layers.Dense(1024,activation=tf.nn.relu)
        # fc2_out = fc2_layer(fc1_out)


        # encoder1 = BiLSTM(self.rnn_hidden_size,name='layer_1')
        # input_x,_ = encoder1(input_x,self.x_len)
        #
        # encoder2 = BiLSTM(self.rnn_hidden_size,name='layer_2')
        # input_x,_ = encoder2(input_x,self.x_len)
        # print(input_x.shape)
        # merge = tf.reshape(input_x,[tf.shape(input_x)[0],-1])

        # avg_pool = tf.reduce_mean(input_x,axis=1)
        # avg_max =  tf.reduce_max(input_x,axis=1)
        #
        # merge = tf.concat([avg_pool,avg_max],axis=1)
        # print(merge.shape)
        # h_conc_linear1 = tf.keras.layers.Dense(200,use_bias=False,activation=tf.nn.relu)(merge)
        # h_conc_linear2 = tf.keras.layers.Dense(200,use_bias=False,activation=tf.nn.relu)(merge)
        # merge = merge+h_conc_linear1+h_conc_linear2

        #
        # dense = tf.keras.layers.Dense(16,activation=tf.nn.relu)
        # merge = dense(merge)
        # merge = dropout(merge, self.training)
        self.logits = tf.keras.layers.Dense(self.num_class,activation=None)(fc1_out)
        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,labels=self.y))
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "char_ids": self.x,
            "labels": self.y,
            "text_len":self.x_len,
            "training": self.training
        })

        self.output_variable_dict = OrderedDict({
            "predict": tf.argmax(self.logits, axis=1)
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {
                'loss': tf.metrics.mean(self.loss)
            }

        self.train_update_metrics = tf.group(*[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {
                'loss': tf.metrics.mean(self.loss)
            }

        self.eval_update_metrics = tf.group(*[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
    def _build_graph(self):
        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')
        self.input_ids = tf.placeholder(shape=[None, None],
                                        dtype=tf.int32,
                                        name='input_ids')
        self.input_mask = tf.placeholder(shape=[None, None],
                                         dtype=tf.int32,
                                         name="input_mask")
        self.segment_ids = tf.placeholder(shape=[None, None],
                                          dtype=tf.int32,
                                          name="segment_ids")
        self.y = tf.placeholder(tf.int32, [None])
        self.bert_embedding = BertEmbedding(self.bert_dir)
        _, output_layer = self.bert_embedding(input_ids=self.input_ids,
                                              input_mask=self.input_mask,
                                              segment_ids=self.segment_ids,
                                              is_training=self.training,
                                              return_pool_output=True,
                                              use_fp16=self.use_fp16)

        hidden_size = output_layer.shape[-1].value

        # output_weights = tf.get_variable(
        #     "output_weights", [self.num_class, hidden_size],
        #     initializer=tf.truncated_normal_initializer(stddev=0.02))
        #
        # output_bias = tf.get_variable(
        #     "output_bias", [self.num_class], initializer=tf.zeros_initializer())

        dropout = Dropout(0.9)
        output_layer = dropout(output_layer, self.training)

        #add cnn layer
        pooled = []
        for idx, kernel_size in enumerate(self.filter_sizes1):
            con1d = tf.layers.conv1d(output_layer,
                                     self.filter_nums1[idx],
                                     kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='conv1d-%d' % (idx))
            pooled_conv = tf.reduce_max(con1d, axis=1)
            pooled.append(pooled_conv)
        merge = tf.concat(pooled, axis=1)
        merge = dropout(merge, self.training)
        merge = tf.layers.dense(merge,
                                128,
                                activation=tf.nn.tanh,
                                name='dense1')
        # merge=tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge, self.training)
        logits = tf.layers.dense(merge,
                                 self.num_class,
                                 activation=None,
                                 use_bias=False)
        # if is_training:
        #     # I.e., 0.1 dropout
        #     output_layer = tf.nn.dropout(output_layer, keep_prob=0.9,)
        # logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        # logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1, name="probs")
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(self.y,
                                    depth=self.num_class,
                                    dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        self.loss = tf.reduce_mean(per_example_loss)
        self.probs = probabilities

        self.input_placeholder_dict = OrderedDict({
            "input_ids": self.input_ids,
            "segment_ids": self.segment_ids,
            "labels": self.y,
            "input_mask": self.input_mask,
            "training": self.training
        })

        self.output_variable_dict = OrderedDict({
            "predict":
            tf.argmax(self.probs, axis=1),
            "probabilities":
            probabilities
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
Example #12
0
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None], name='token_ids')
        self.x_char = tf.placeholder(tf.int32, [None, None], name="char_ids")
        # self.x_len = tf.placeholder(tf.int32,[None])
        # self.x_char_len = tf.placeholder(tf.int32,[None])
        self.y = tf.placeholder(tf.int32, [None])
        #self.pos_feature = tf.placeholder(tf.int32,[None,None])
        # self.ask_word_feature = tf.placeholder(tf.int32,[None,None])
        # self.in_name_feature = tf.placeholder(tf.int32,[None,None])

        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab() +
                             1 if self.pretrained_word_embedding != None else
                             self.vocab.get_word_vocab(),
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        char_embedding = Embedding(
            pretrained_embedding=None,
            embedding_shape=(self.vocab.get_char_vocab_size() + 1,
                             self.word_embedding_size),
            trainable=True)
        x_char_embeded = char_embedding(self.x_char)
        print(x_char_embeded)

        input_x = word_embedding(self.x)
        x_w_embeded = input_x

        pos_embedding = Embedding(pretrained_embedding=None,
                                  embedding_shape=(self.pos_vocab_size,
                                                   self.pos_embedding_size))

        #input_x_pos = pos_embedding(self.pos_feature)
        # #
        # feature_x = tf.one_hot(self.in_name_feature,depth=2)
        # # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2)
        # input_x = tf.concat([input_x,feature_x],axis=-1)
        # # input_x = tf.concat([input_x,ask_word_feature],axis=-1)
        #input_x = tf.concat([input_x,input_x_pos],axis=-1)
        # print(input_x.shape)
        dropout = Dropout(self.keep_prob)
        input_x = dropout(input_x, self.training)
        pooled = []
        for idx, kernel_size in enumerate(self.filter_sizes1):
            con1d = tf.layers.conv1d(input_x,
                                     self.filter_nums1[idx],
                                     kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='conv1d-%d' % (idx))
            # con1d = dropout(con1d,self.training)
            similarity_func = ProjectedDotProduct(name='conv1d-%d' % (idx),
                                                  hidden_units=64)
            # attn = UniAttention(similarity_func)
            # attn_c_w = attn(con1d,x_w_embeded,self.x_len)
            # # attn_c_w= dropout(attn_c_w,self.training)
            # attn_c_c= attn(con1d,x_char_embeded,self.x_char_len)
            # attn_c_c = dropout(attn_c_c,self.training)
            pooled_conv = tf.reduce_max(
                con1d,
                axis=1)  #tf.concat([con1d,attn_c_w,attn_c_c],axis=-1),axis=1)
            pooled.append(pooled_conv)

        char_kernel_size = [2, 3, 5, 7]
        for idx, kernel_size in enumerate(char_kernel_size):
            con1d = tf.layers.conv1d(x_char_embeded,
                                     128,
                                     kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='char_conv1d-%d' % (idx))
            # con1d = dropout(con1d,self.training)

            # similarity_func = ProjectedDotProduct(name='char_cond-%d' % (idx), hidden_units=64)
            # attn = UniAttention(similarity_func)
            # attn_c_w = attn(con1d, x_w_embeded, self.x_len)
            # attn_c_c = attn(con1d, x_char_embeded, self.x_char_len)
            # attn_c_c = dropout(attn_c_c,self.training)
            # attn_c_w = dropout(attn_c_w,self.training)

            pooled_conv = tf.reduce_max(
                con1d,
                axis=1)  #tf.concat([con1d,attn_c_c,attn_c_w],axis=-1),axis=1)
            pooled.append(pooled_conv)

        # print(merge.shape)
        # print(c2_concat.shape)
        merge = tf.concat(pooled, axis=-1)

        merge = dropout(merge, self.training)
        merge = tf.layers.batch_normalization(inputs=merge)
        dense1 = tf.keras.layers.Dense(128, activation=tf.nn.tanh)
        merge = dense1(merge)
        merge = tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge, self.training)
        dense2 = tf.keras.layers.Dense(self.num_class,
                                       activation=None,
                                       use_bias=False)
        logits = dense2(merge)

        self.prob = tf.nn.softmax(logits, name="probs")

        from nn.loss import focal_loss_softmax

        # self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=self.y))
        #self.loss = focal_loss_softmax(labels=self.y,logits = logits)
        # self.loss = tf.reduce_mean(focal_loss_softmax(labels=self.y, logits=logits, alpha=0.25))

        # self.loss+=self.loss + lossL2

        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels": self.y,
            # "text_len":self.x_len,
            # 'char_lens':self.x_char_len,
            # "features":self.in_name_feature,
            "char_ids": self.x_char,
            #    "pos_feature":self.pos_feature,
            # 'ask_word_feature':self.ask_word_feature,
            "training": self.training,
        })

        self.output_variable_dict = OrderedDict({
            "predict":
            tf.argmax(logits, axis=1),
            "probs":
            self.prob
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
    def _build_graph(self):
        self.x = tf.placeholder(tf.int32, [None, None])
        self.y = tf.placeholder(tf.int32, [None])
        self.domain = tf.placeholder(tf.int32, [None])
        print(self.x)
        # self.soft_target = tf.placeholder(tf.float32,[None,None])
        self.pos_feature = tf.placeholder(tf.int32, [None, None])
        print(self.pos_feature)
        # self.ask_word_feature = tf.placeholder(tf.int32,[None,None])
        self.in_name_feature = tf.placeholder(tf.int32, [None, None])
        print(self.in_name_feature)
        # self.ask_word_feature = tf.placeholder(tf.int32,[None,None])

        self.training = tf.placeholder_with_default(False,
                                                    shape=(),
                                                    name='is_training')

        word_embedding = Embedding(
            pretrained_embedding=self.pretrained_word_embedding,
            embedding_shape=(self.vocab.get_word_vocab(),
                             self.word_embedding_size),
            trainable=self.word_embedding_trainable)

        input_x = word_embedding(self.x)

        pos_embedding = Embedding(pretrained_embedding=None,
                                  embedding_shape=(self.pos_vocab_size,
                                                   self.pos_embedding_size))

        input_x_pos = pos_embedding(self.pos_feature)
        #
        feature_x = tf.one_hot(self.in_name_feature, depth=2)
        # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2)
        # input_x = tf.concat([input_x,feature_x],axis=-1)
        # # input_x = tf.concat([input_x,ask_word_feature],axis=-1)
        input_x = tf.concat([input_x, input_x_pos], axis=-1)
        # print(input_x.shape)
        dropout = Dropout(self.keep_prob)
        input_x = dropout(input_x, self.training)
        pooled = []
        input_x = tf.layers.dense(input_x, 128, activation=None, name='aff1')
        for idx in range(2):
            conv_x = tf.layers.conv1d(input_x,
                                      128,
                                      3,
                                      padding='same',
                                      activation=tf.nn.relu,
                                      name='conv1d-%d' % (idx))
            input_x += conv_x
            input_x = dropout(input_x, self.training)
        input_x = tf.reduce_max(input_x, axis=1)
        # merge = dropout(merge,self.training)
        # merge = tf.layers.batch_normalization(inputs=merge)
        # dense1 = tf.keras.layers.Dense(128,activation=tf.nn.tanh)
        merge = tf.layers.dense(input_x,
                                128,
                                activation=tf.nn.tanh,
                                name='dense1')
        # merge=tf.layers.batch_normalization(inputs=merge)
        merge = dropout(merge, self.training)
        logits = tf.layers.dense(merge,
                                 self.num_class,
                                 activation=None,
                                 use_bias=True)
        # logits = dense2(merge,name='dense2')
        self.prob = tf.nn.softmax(logits)

        domain_logits = tf.layers.dense(merge,
                                        2,
                                        activation=None,
                                        use_bias=False)
        self.domain_prob = tf.nn.softmax(domain_logits)
        # print(self.prob)

        from nn.loss import softmax_with_logits_label_smooth

        from nn.loss import focal_loss_softmax

        self.loss = tf.reduce_mean(
            focal_loss_softmax(labels=self.y,
                               logits=logits,
                               alpha=0.5,
                               gamma=2.0))
        #self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))
        # self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y))

        # self.domain_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=domain_logits,labels=self.domain))
        # self.loss+=self.loss + lossL2

        # self.soft_loss = tf.reduce_mean(
        #     tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits/self.softmax_temperature,labels=self.soft_target)
        # )
        # self.task_balance=1.0
        # self.soft_loss =0.0
        # self.loss *=self.task_balance
        # self.loss += (1-self.task_balance)*self.soft_loss*(self.softmax_temperature**2)
        # self.loss +=self.domain_loss
        global_step = tf.train.get_or_create_global_step()

        self.input_placeholder_dict = OrderedDict({
            "token_ids": self.x,
            "labels": self.y,
            # "domain":self.domain,
            # 'soft_target':self.soft_target,
            "features": self.in_name_feature,
            "pos_feature": self.pos_feature,
            # 'ask_word_feature':self.ask_word_feature,
            "training": self.training,
        })

        self.output_variable_dict = OrderedDict({
            "predict":
            tf.argmax(logits, axis=1),
            "prob":
            self.prob
        })

        # 8. Metrics and summary
        with tf.variable_scope("train_metrics"):
            self.train_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.train_update_metrics = tf.group(
            *[op for _, op in self.train_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="train_metrics")
        self.train_metric_init_op = tf.variables_initializer(metric_variables)

        with tf.variable_scope("eval_metrics"):
            self.eval_metrics = {'loss': tf.metrics.mean(self.loss)}

        self.eval_update_metrics = tf.group(
            *[op for _, op in self.eval_metrics.values()])
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="eval_metrics")
        self.eval_metric_init_op = tf.variables_initializer(metric_variables)

        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()