def call(self, x: IOperator) -> IOperator: self.__var_list: List[ITrainable] = [] fc1 = Dense(inputs=x, activation=Tanh(), units=784) self.__var_list.extend(fc1.variables) fc2 = Dense(inputs=fc1, activation=Tanh(), units=784) self.__var_list.extend(fc2.variables) fc3 = Dense(inputs=fc2, activation=Tanh(), units=392) self.__var_list.extend(fc3.variables) dropout = Dropout(inputs=fc3) fc4 = Dense(inputs=dropout, activation=Tanh(), units=128) self.__var_list.extend(fc4.variables) fc5 = Dense(inputs=fc4, activation=Softmax(), units=10) self.__var_list.extend(fc5.variables) return fc5
from nn.loss import cross_entropy from nn.metrix import accuracy (X_train, y_train), (X_test, y_test) = load_mnist() X_train = X_train.reshape((X_train.shape[0], -1)) / 255 X_test = X_test.reshape((X_test.shape[0], -1)) / 255 transformer = MakeOneHot() y_train = transformer.fit_transform(y_train) y_test = transformer.transform(y_test) model = Model() model.add(FC(500, input_shape=784)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(150)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(50)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(10)) model.add(Softmax()) model.compile(Adam(eta=0.01), cross_entropy, accuracy) model.fit(X_train, y_train, max_iter=10, batch_size=2000) print("train acc: {:.2f}%".format(model.score(X_train, y_train))) print("test acc: {:.2f}%".format(model.score(X_test, y_test)))
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None]) self.x_len = tf.placeholder(tf.int32, [None]) self.y = tf.placeholder(tf.int32, [None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab() + 1, self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) dropout = Dropout(self.keep_prob) varition_dropout = VariationalDropout(self.keep_prob) input_x = dropout(input_x, self.training) encoder1 = BiLSTM(self.rnn_hidden_size, name='layer_1') input_x, _ = encoder1(input_x, self.x_len) input_x = varition_dropout(input_x, self.training) encoder2 = BiLSTM(self.rnn_hidden_size, name='layer_2') input_x, _ = encoder2(input_x, self.x_len) input_x = varition_dropout(input_x, self.training) avg_pool = tf.reduce_mean(input_x, axis=1) avg_max = tf.reduce_max(input_x, axis=1) merge = tf.concat([avg_pool, avg_max], axis=1) # # dense = tf.keras.layers.Dense(16,activation=tf.nn.relu) # merge = dense(merge) # merge = dropout(merge, self.training) self.logits = tf.keras.layers.Dense(self.num_class, activation=None)(merge) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y)) global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, "text_len": self.x_len, "training": self.training }) self.output_variable_dict = OrderedDict( {"predict": tf.argmax(self.logits, axis=1)}) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None]) self.y = tf.placeholder(tf.int32, [None]) self.pos_feature = tf.placeholder(tf.int32, [None, None]) self.ask_word_feature = tf.placeholder(tf.int32, [None, None]) self.in_name_feature = tf.placeholder(tf.int32, [None, None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab() + 1, self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) pos_embedding = Embedding(pretrained_embedding=None, embedding_shape=(self.pos_vocab_size, self.pos_embedding_size)) # # input_x_pos = pos_embedding(self.pos_feature) # # feature_x = tf.one_hot(self.in_name_feature,depth=2) # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2) # input_x = tf.concat([input_x,feature_x],axis=-1) # # input_x = tf.concat([input_x,ask_word_feature],axis=-1) # input_x = tf.concat([input_x,input_x_pos],axis=-1) # # print(input_x.shape) dropout = Dropout(self.keep_prob) input_x = dropout(input_x, self.training) pooled = [] c4 = None c5 = None c6 = None for idx, kernel_size in enumerate(self.filter_sizes1): con1d = tf.layers.conv1d(input_x, self.filter_nums1[idx], kernel_size, padding='same', activation=tf.nn.relu, name='conv1d-%d' % (idx)) pooled_conv = tf.layers.max_pooling1d(con1d, 2, strides=1, padding='same') if kernel_size == 4: c4 = pooled_conv if kernel_size == 5: c5 = pooled_conv if kernel_size == 6: c6 = pooled_conv pooled.append(pooled_conv) merge = tf.concat(pooled, axis=-1) c1_concat = merge layer2_pooled = [] kernel_size = [2, 3] for idx, kernel_size in enumerate(kernel_size): con1d = tf.layers.conv1d(c1_concat, self.filter_nums1[idx], kernel_size, padding='same', activation=tf.nn.relu, name='conv1d-layer-2-%d' % (idx)) pooled_conv = tf.layers.max_pooling1d(con1d, 2, strides=1, padding='same') layer2_pooled.append(pooled_conv) c2_concat = tf.concat([tf.concat(layer2_pooled, axis=-1), c4, c5, c6], axis=-1) # print(merge.shape) # print(c2_concat.shape) merge = tf.concat([c2_concat, merge], axis=-1) conv1d = tf.layers.conv1d(merge, 128, kernel_size=1, padding='same', activation=tf.nn.relu, name='layer_%d' % (3)) merge = tf.reduce_max(conv1d, axis=1) # merge = tf.reduce_max(merge,axis=1) merge = dropout(merge, self.training) merge = tf.layers.batch_normalization(inputs=merge) dense1 = tf.keras.layers.Dense(128, activation=tf.nn.tanh) merge = dense1(merge) merge = tf.layers.batch_normalization(inputs=merge) merge = dropout(merge, self.training) dense2 = tf.keras.layers.Dense(self.num_class, activation=None, use_bias=False) logits = dense2(merge) # self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.y)) # self.loss+=self.loss + lossL2 global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, # "features":self.in_name_feature, # "pos_feature":self.pos_feature, # 'ask_word_feature':self.ask_word_feature, "training": self.training, }) self.output_variable_dict = OrderedDict( {"predict": tf.argmax(logits, axis=1)}) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None]) self.x_len = tf.placeholder(tf.int32, [None]) self.y = tf.placeholder(tf.int32, [None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab() + 1, self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) dropout = Dropout(self.keep_prob) input_x = dropout(input_x, self.training) encoder1 = BiLSTM(self.rnn_hidden_size, name='layer_1') input_x, _ = encoder1(input_x, self.x_len) H = tf.nn.tanh(input_x) alpha = tf.nn.softmax(tf.squeeze(tf.layers.Dense(1)(H), axis=-1)) H_star = tf.squeeze(tf.matmul(tf.expand_dims(alpha, axis=1), H), axis=1) H_star = dropout(H_star, self.training) self.logits = tf.keras.layers.Dense(self.num_class, activation=None, use_bias=True)(H_star) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y)) global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, "text_len": self.x_len, "training": self.training }) self.output_variable_dict = OrderedDict( {"predict": tf.argmax(self.logits, axis=1)}) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None]) self.x_len = tf.placeholder(tf.int32, [None]) self.pos_feature = tf.placeholder(tf.int32, [None, None]) print(self.x.name) # = tf.placeholder(tf.int32, [None, None]) print(self.x_len.name) # = tf.placeholder(tf.int32, [None, None]) print( self.pos_feature.name) # = tf.placeholder(tf.int32, [None, None]) #self.in_name_feature = tf.placeholder(tf.int32,[None,None]) self.y = tf.placeholder(tf.int32, [None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab() + 1, self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) pos_embedding = Embedding(pretrained_embedding=None, embedding_shape=(self.pos_vocab_size, self.pos_embedding_size)) input_x_pos = pos_embedding(self.pos_feature) input_x = tf.concat([input_x, input_x_pos], axis=-1) input_q = input_x #feature_x = tf.one_hot(self.in_name_feature,depth=2) #input_x = tf.concat([input_x,feature_x],axis=-1) self.filters = 256 # dropout = Dropout(self.keep_prob) varition_dropout = VariationalDropout(self.keep_prob) input_x = dropout(input_x, self.training) mask = create_padding_mask(self.x) encoder1 = BiGRU(self.rnn_hidden_size, name='layer_1') input_x, _ = encoder1(input_x, self.x_len) input_x = varition_dropout(input_x, self.training) tmp_ma = MultiHeadAttention(self.filters, 8, name='mha1') norm_x = tf.layers.batch_normalization(input_x) mha_out, _ = tmp_ma(norm_x, norm_x, norm_x, mask) input_x += mha_out encoder2 = BiGRU(self.rnn_hidden_size, name='layer_2') input_x, _ = encoder2(input_x, self.x_len) input_x = varition_dropout(input_x, self.training) #tmp_ma = MultiHeadAttention(self.filters,8) tmp_ma = MultiHeadAttention(self.filters, 8, name='mha2') norm_x = tf.layers.batch_normalization(input_x) mha_out, _ = tmp_ma(norm_x, norm_x, norm_x, mask) input_x = mha_out #tmp_ma = MultiHeadAttention(self.filters,8) avg_pool = tf.reduce_mean(input_x, axis=1) avg_max = tf.reduce_max(input_x, axis=1) merge = tf.concat([avg_pool, avg_max], axis=1) ''' pooled =[] for idx,kernel_size in enumerate(self.filter_sizes1): con1d = tf.layers.conv1d(input_x,self.filter_nums1[idx],kernel_size,padding='same',activation=tf.nn.relu, name='conv1d-%d'%(idx)) pooled_conv = tf.reduce_max(con1d,axis=1) pooled.append(pooled_conv) merge_pooled = tf.concat(pooled,axis=1) merge = tf.concat([merge_pooled,merge],axis=1) '''# dense = tf.keras.layers.Dense(16,activation=tf.nn.relu) # merge = dense(merge) # merge = dropout(merge, self.training) self.logits = tf.keras.layers.Dense(self.num_class, activation=None, name='final_dense')(merge) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y)) global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, "text_len": self.x_len, "training": self.training, "pos_feature": self.pos_feature, # "features":self.in_name_feature, }) self.output_variable_dict = OrderedDict({ "prob": tf.nn.softmax(self.logits), "predict": tf.argmax(self.logits, axis=1) }) print(self.output_variable_dict) # 8. Metrics and summary # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32,[None,None]) self.y = tf.placeholder(tf.int32,[None]) self.domain = tf.placeholder(tf.int32,[None]) self.x_len = tf.placeholder(tf.int32,[None]) # self.soft_target = tf.placeholder(tf.float32,[None,None]) self.pos_feature = tf.placeholder(tf.int32,[None,None]) # self.ask_word_feature = tf.placeholder(tf.int32,[None,None]) self.in_name_feature = tf.placeholder(tf.int32,[None,None]) self.training = tf.placeholder_with_default(False,shape=(),name='is_training') word_embedding = Embedding(pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab(), self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) pos_embedding = Embedding(pretrained_embedding=None, embedding_shape=(self.pos_vocab_size, self.pos_embedding_size)) input_x_pos = pos_embedding(self.pos_feature) self.filters = 256# feature_x = tf.one_hot(self.in_name_feature,depth=2) # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2) input_x = tf.concat([input_x,feature_x],axis=-1) # # input_x = tf.concat([input_x,ask_word_feature],axis=-1) input_x = tf.concat([input_x,input_x_pos],axis=-1) divisors = tf.pow(tf.constant([10000.0] * (self.filters // 2), dtype=tf.float32), tf.range(0, self.filters, 2, dtype=tf.float32) / self.filters) quotients = tf.cast(tf.expand_dims(tf.range(0, tf.reduce_max(self.x_len)), -1), tf.float32) / tf.expand_dims(divisors, 0) position_repr = tf.concat([tf.sin(quotients), tf.cos(quotients)], -1) mask = create_padding_mask(self.x)#tmp_ma = MultiHeadAttention(self.filters,16,name='layer1') tmp_ma = MultiHeadAttention(self.filters,16,name='layer1') atten_x,_ = tmp_ma(input_x,input_x,input_x,mask=mask) input_x = tf.concat([input_x,atten_x],axis=-1)#print(position_repr.shape)## print(input_x.shape) #sys.exit(1)#print(position_repr)## print(input_x.shape) # print(input_x.shape) dropout = Dropout(self.keep_prob) input_x = dropout(input_x,self.training) pooled =[] for idx,kernel_size in enumerate(self.filter_sizes1): con1d = tf.layers.conv1d(input_x,self.filter_nums1[idx],kernel_size,padding='same',activation=tf.nn.relu, name='conv1d-%d'%(idx)) pooled_conv = tf.reduce_max(con1d,axis=1) pooled.append(pooled_conv) merge = tf.concat(pooled,axis=1) merge = dropout(merge,self.training) # merge = tf.layers.batch_normalization(inputs=merge) # dense1 = tf.keras.layers.Dense(128,activation=tf.nn.tanh) merge = tf.layers.dense(merge,128,activation=tf.nn.tanh,name='dense1') # merge=tf.layers.batch_normalization(inputs=merge) merge = dropout(merge,self.training) logits = tf.layers.dense(merge,self.num_class,activation=None,use_bias=False) # logits = dense2(merge,name='dense2') self.prob = tf.nn.softmax(logits) domain_logits = tf.layers.dense(merge,2,activation=None,use_bias=False) self.domain_prob = tf.nn.softmax(domain_logits) # print(self.prob) from nn.loss import softmax_with_logits_label_smooth from nn.loss import focal_loss_softmax self.loss = tf.reduce_mean(focal_loss_softmax(labels=self.y,logits=logits,alpha=0.5)) #self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) # self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) # self.domain_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=domain_logits,labels=self.domain)) # self.loss+=self.loss + lossL2 # self.soft_loss = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits/self.softmax_temperature,labels=self.soft_target) # ) # self.task_balance=1.0 # self.soft_loss =0.0 # self.loss *=self.task_balance # self.loss += (1-self.task_balance)*self.soft_loss*(self.softmax_temperature**2) # self.loss +=self.domain_loss global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels":self.y, "text_len":self.x_len, # "domain":self.domain, # 'soft_target':self.soft_target, "features":self.in_name_feature, "pos_feature":self.pos_feature, # 'ask_word_feature':self.ask_word_feature, "training": self.training, }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(logits,axis=1), "prob":self.prob }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = { 'loss': tf.metrics.mean(self.loss) } self.train_update_metrics = tf.group(*[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = { 'loss': tf.metrics.mean(self.loss) } self.eval_update_metrics = tf.group(*[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, 20]) self.y = tf.placeholder(tf.int32, [None]) self.domain = tf.placeholder(tf.int32, [None]) # self.soft_target = tf.placeholder(tf.float32,[None,None]) self.pos_feature = tf.placeholder(tf.int32, [None, None]) # self.ask_word_feature = tf.placeholder(tf.int32,[None,None]) # self.in_name_feature = tf.placeholder(tf.int32,[None,None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab(), self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) input_x += self.positional_encoding(input_x, 20, masking=False) self.enc = input_x for i in range(self.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): self.enc = self.multihead_attention(queries=self.enc, keys=self.enc, num_units=256, num_heads=8) self.enc = self.feedforward(self.enc, num_units=[2 * 256, 256]) input_x = self.enc print(input_x) #= self.enc # # feature_x = tf.one_hot(self.in_name_feature,depth=2) # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2) # input_x = tf.concat([input_x,feature_x],axis=-1) # # input_x = tf.concat([input_x,ask_word_feature],axis=-1) # input_x = tf.concat([input_x,input_x_pos],axis=-1) # print(input_x.shape) dropout = Dropout(self.keep_prob) merge = tf.reduce_mean(input_x, axis=1) # merge = tf.layers.batch_normalization(inputs=merge) # dense1 = tf.keras.layers.Dense(128,activation=tf.nn.tanh) merge = tf.layers.dense(merge, 128, activation=tf.nn.tanh, name='dense1') # merge=tf.layers.batch_normalization(inputs=merge) merge = dropout(merge, self.training) logits = tf.layers.dense(merge, self.num_class, activation=None, use_bias=False) # logits = dense2(merge,name='dense2') self.prob = tf.nn.softmax(logits) domain_logits = tf.layers.dense(merge, 2, activation=None, use_bias=False) self.domain_prob = tf.nn.softmax(domain_logits) # print(self.prob) from nn.loss import softmax_with_logits_label_smooth from nn.loss import focal_loss_softmax #self.loss = tf.reduce_mean(focal_loss_softmax(labels=self.y,logits=logits,alpha=0.5)) #self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) #self.loss = tf.reduce_mean(softmax_with_logits_label_smooth(logits=logits,labels=self.y)) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.y)) # self.domain_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=domain_logits,labels=self.domain)) # self.loss+=self.loss + lossL2 # self.soft_loss = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits/self.softmax_temperature,labels=self.soft_target) # ) # self.task_balance=1.0 # self.soft_loss =0.0 # self.loss *=self.task_balance # self.loss += (1-self.task_balance)*self.soft_loss*(self.softmax_temperature**2) # self.loss +=self.domain_loss global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, # "domain":self.domain, # 'soft_target':self.soft_target, # "features":self.in_name_feature, # "pos_feature":self.pos_feature, # 'ask_word_feature':self.ask_word_feature, "training": self.training, }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(logits, axis=1), "prob": self.prob }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.training = tf.placeholder_with_default(False, shape=(), name='is_training') self.input_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name='input_ids') self.input_mask = tf.placeholder(shape=[None, None], dtype=tf.int32, name="input_mask") self.segment_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name="segment_ids") self.y = tf.placeholder(tf.int32, [None]) self.bert_embedding = BertEmbedding(self.bert_dir) _, output_layer = self.bert_embedding(input_ids=self.input_ids, input_mask=self.input_mask, segment_ids=self.segment_ids, is_training=self.training, return_pool_output=True, use_fp16=self.use_fp16) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [self.num_class, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [self.num_class], initializer=tf.zeros_initializer()) dropout = Dropout(0.9) output_layer = dropout(output_layer, self.training) # if is_training: # # I.e., 0.1 dropout # output_layer = tf.nn.dropout(output_layer, keep_prob=0.9,) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1, name="probs") log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(self.y, depth=self.num_class, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) self.loss = tf.reduce_mean(per_example_loss) self.probs = probabilities self.input_placeholder_dict = OrderedDict({ "input_ids": self.input_ids, "segment_ids": self.segment_ids, "labels": self.y, "input_mask": self.input_mask, "training": self.training }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(self.probs, axis=1), "probabilities": probabilities }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None]) self.x_len = tf.placeholder(tf.int32,[None]) self.y = tf.placeholder(tf.int32, [None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') self.kernel_initializer = tf.truncated_normal_initializer(stddev=0.05) self.filter_sizes = [5,5,3,3,3,3] self.num_filters = 256 word_embedding = Embedding(pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_char_vocab_size() + 1, self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) # pos_embedding = Embedding(pretrained_embedding=None, # embedding_shape=(self.pos_vocab_size, self.pos_embedding_size)) # # input_x_pos = pos_embedding(self.pos_feature) # # feature_x = tf.one_hot(self.in_name_feature, depth=2) # input_x = tf.concat([input_x, feature_x], axis=-1) # input_x = tf.concat([input_x, input_x_pos], axis=-1) dropout = Dropout(self.keep_prob) input_x = dropout(input_x, self.training) input_x = tf.expand_dims(input_x,axis=-1) print(input_x.shape) # ============= Convolutional Layers ============= with tf.name_scope("conv-maxpool-1"): conv1 = tf.layers.conv2d( input_x, filters=self.num_filters, kernel_size=[self.filter_sizes[0], self.word_embedding_size], kernel_initializer=self.kernel_initializer, activation=tf.nn.relu) pool1 = tf.layers.max_pooling2d( conv1, pool_size=(3, 1), strides=(3, 1)) pool1 = tf.transpose(pool1, [0, 1, 3, 2]) with tf.name_scope("conv-maxpool-2"): conv2 = tf.layers.conv2d( pool1, filters=self.num_filters, kernel_size=[self.filter_sizes[1], self.num_filters], kernel_initializer=self.kernel_initializer, activation=tf.nn.relu) pool2 = tf.layers.max_pooling2d( conv2, pool_size=(3, 1), strides=(3, 1)) pool2 = tf.transpose(pool2, [0, 1, 3, 2]) with tf.name_scope("conv-3"): conv3 = tf.layers.conv2d( pool2, filters=self.num_filters, kernel_size=[self.filter_sizes[2], self.num_filters], kernel_initializer=self.kernel_initializer, activation=tf.nn.relu) conv3 = tf.transpose(conv3, [0, 1, 3, 2]) with tf.name_scope("conv-4"): conv4 = tf.layers.conv2d( conv3, filters=self.num_filters, kernel_size=[self.filter_sizes[3], self.num_filters], kernel_initializer=self.kernel_initializer, activation=tf.nn.relu) conv4 = tf.transpose(conv4, [0, 1, 3, 2]) with tf.name_scope("conv-5"): conv5 = tf.layers.conv2d( conv4, filters=self.num_filters, kernel_size=[self.filter_sizes[4], self.num_filters], kernel_initializer=self.kernel_initializer, activation=tf.nn.relu) conv5 = tf.transpose(conv5, [0, 1, 3, 2]) with tf.name_scope("conv-maxpool-6"): conv6 = tf.layers.conv2d( conv5, filters=self.num_filters, kernel_size=[self.filter_sizes[5], self.num_filters], kernel_initializer=self.kernel_initializer, activation=tf.nn.relu) pool6 = tf.layers.max_pooling2d( conv6, pool_size=(3, 1), strides=(3, 1)) pool6 = tf.transpose(pool6, [0, 2, 1, 3]) print(pool6.get_shape().as_list()) h_pool = tf.reshape(pool6, [-1, self.num_filters]) print(h_pool.shape) print(input_x) fc1_layer = tf.keras.layers.Dense(128,activation=tf.nn.relu) fc1_out = fc1_layer(h_pool) # fc1_out = dropout(fc1_out,self.training) # fc2_layer = tf.keras.layers.Dense(1024,activation=tf.nn.relu) # fc2_out = fc2_layer(fc1_out) # encoder1 = BiLSTM(self.rnn_hidden_size,name='layer_1') # input_x,_ = encoder1(input_x,self.x_len) # # encoder2 = BiLSTM(self.rnn_hidden_size,name='layer_2') # input_x,_ = encoder2(input_x,self.x_len) # print(input_x.shape) # merge = tf.reshape(input_x,[tf.shape(input_x)[0],-1]) # avg_pool = tf.reduce_mean(input_x,axis=1) # avg_max = tf.reduce_max(input_x,axis=1) # # merge = tf.concat([avg_pool,avg_max],axis=1) # print(merge.shape) # h_conc_linear1 = tf.keras.layers.Dense(200,use_bias=False,activation=tf.nn.relu)(merge) # h_conc_linear2 = tf.keras.layers.Dense(200,use_bias=False,activation=tf.nn.relu)(merge) # merge = merge+h_conc_linear1+h_conc_linear2 # # dense = tf.keras.layers.Dense(16,activation=tf.nn.relu) # merge = dense(merge) # merge = dropout(merge, self.training) self.logits = tf.keras.layers.Dense(self.num_class,activation=None)(fc1_out) self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,labels=self.y)) global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "char_ids": self.x, "labels": self.y, "text_len":self.x_len, "training": self.training }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(self.logits, axis=1) }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = { 'loss': tf.metrics.mean(self.loss) } self.train_update_metrics = tf.group(*[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = { 'loss': tf.metrics.mean(self.loss) } self.eval_update_metrics = tf.group(*[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.training = tf.placeholder_with_default(False, shape=(), name='is_training') self.input_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name='input_ids') self.input_mask = tf.placeholder(shape=[None, None], dtype=tf.int32, name="input_mask") self.segment_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name="segment_ids") self.y = tf.placeholder(tf.int32, [None]) self.bert_embedding = BertEmbedding(self.bert_dir) _, output_layer = self.bert_embedding(input_ids=self.input_ids, input_mask=self.input_mask, segment_ids=self.segment_ids, is_training=self.training, return_pool_output=True, use_fp16=self.use_fp16) hidden_size = output_layer.shape[-1].value # output_weights = tf.get_variable( # "output_weights", [self.num_class, hidden_size], # initializer=tf.truncated_normal_initializer(stddev=0.02)) # # output_bias = tf.get_variable( # "output_bias", [self.num_class], initializer=tf.zeros_initializer()) dropout = Dropout(0.9) output_layer = dropout(output_layer, self.training) #add cnn layer pooled = [] for idx, kernel_size in enumerate(self.filter_sizes1): con1d = tf.layers.conv1d(output_layer, self.filter_nums1[idx], kernel_size, padding='same', activation=tf.nn.relu, name='conv1d-%d' % (idx)) pooled_conv = tf.reduce_max(con1d, axis=1) pooled.append(pooled_conv) merge = tf.concat(pooled, axis=1) merge = dropout(merge, self.training) merge = tf.layers.dense(merge, 128, activation=tf.nn.tanh, name='dense1') # merge=tf.layers.batch_normalization(inputs=merge) merge = dropout(merge, self.training) logits = tf.layers.dense(merge, self.num_class, activation=None, use_bias=False) # if is_training: # # I.e., 0.1 dropout # output_layer = tf.nn.dropout(output_layer, keep_prob=0.9,) # logits = tf.matmul(output_layer, output_weights, transpose_b=True) # logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1, name="probs") log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(self.y, depth=self.num_class, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) self.loss = tf.reduce_mean(per_example_loss) self.probs = probabilities self.input_placeholder_dict = OrderedDict({ "input_ids": self.input_ids, "segment_ids": self.segment_ids, "labels": self.y, "input_mask": self.input_mask, "training": self.training }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(self.probs, axis=1), "probabilities": probabilities }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None], name='token_ids') self.x_char = tf.placeholder(tf.int32, [None, None], name="char_ids") # self.x_len = tf.placeholder(tf.int32,[None]) # self.x_char_len = tf.placeholder(tf.int32,[None]) self.y = tf.placeholder(tf.int32, [None]) #self.pos_feature = tf.placeholder(tf.int32,[None,None]) # self.ask_word_feature = tf.placeholder(tf.int32,[None,None]) # self.in_name_feature = tf.placeholder(tf.int32,[None,None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab() + 1 if self.pretrained_word_embedding != None else self.vocab.get_word_vocab(), self.word_embedding_size), trainable=self.word_embedding_trainable) char_embedding = Embedding( pretrained_embedding=None, embedding_shape=(self.vocab.get_char_vocab_size() + 1, self.word_embedding_size), trainable=True) x_char_embeded = char_embedding(self.x_char) print(x_char_embeded) input_x = word_embedding(self.x) x_w_embeded = input_x pos_embedding = Embedding(pretrained_embedding=None, embedding_shape=(self.pos_vocab_size, self.pos_embedding_size)) #input_x_pos = pos_embedding(self.pos_feature) # # # feature_x = tf.one_hot(self.in_name_feature,depth=2) # # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2) # input_x = tf.concat([input_x,feature_x],axis=-1) # # input_x = tf.concat([input_x,ask_word_feature],axis=-1) #input_x = tf.concat([input_x,input_x_pos],axis=-1) # print(input_x.shape) dropout = Dropout(self.keep_prob) input_x = dropout(input_x, self.training) pooled = [] for idx, kernel_size in enumerate(self.filter_sizes1): con1d = tf.layers.conv1d(input_x, self.filter_nums1[idx], kernel_size, padding='same', activation=tf.nn.relu, name='conv1d-%d' % (idx)) # con1d = dropout(con1d,self.training) similarity_func = ProjectedDotProduct(name='conv1d-%d' % (idx), hidden_units=64) # attn = UniAttention(similarity_func) # attn_c_w = attn(con1d,x_w_embeded,self.x_len) # # attn_c_w= dropout(attn_c_w,self.training) # attn_c_c= attn(con1d,x_char_embeded,self.x_char_len) # attn_c_c = dropout(attn_c_c,self.training) pooled_conv = tf.reduce_max( con1d, axis=1) #tf.concat([con1d,attn_c_w,attn_c_c],axis=-1),axis=1) pooled.append(pooled_conv) char_kernel_size = [2, 3, 5, 7] for idx, kernel_size in enumerate(char_kernel_size): con1d = tf.layers.conv1d(x_char_embeded, 128, kernel_size, padding='same', activation=tf.nn.relu, name='char_conv1d-%d' % (idx)) # con1d = dropout(con1d,self.training) # similarity_func = ProjectedDotProduct(name='char_cond-%d' % (idx), hidden_units=64) # attn = UniAttention(similarity_func) # attn_c_w = attn(con1d, x_w_embeded, self.x_len) # attn_c_c = attn(con1d, x_char_embeded, self.x_char_len) # attn_c_c = dropout(attn_c_c,self.training) # attn_c_w = dropout(attn_c_w,self.training) pooled_conv = tf.reduce_max( con1d, axis=1) #tf.concat([con1d,attn_c_c,attn_c_w],axis=-1),axis=1) pooled.append(pooled_conv) # print(merge.shape) # print(c2_concat.shape) merge = tf.concat(pooled, axis=-1) merge = dropout(merge, self.training) merge = tf.layers.batch_normalization(inputs=merge) dense1 = tf.keras.layers.Dense(128, activation=tf.nn.tanh) merge = dense1(merge) merge = tf.layers.batch_normalization(inputs=merge) merge = dropout(merge, self.training) dense2 = tf.keras.layers.Dense(self.num_class, activation=None, use_bias=False) logits = dense2(merge) self.prob = tf.nn.softmax(logits, name="probs") from nn.loss import focal_loss_softmax # self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.y)) #self.loss = focal_loss_softmax(labels=self.y,logits = logits) # self.loss = tf.reduce_mean(focal_loss_softmax(labels=self.y, logits=logits, alpha=0.25)) # self.loss+=self.loss + lossL2 global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, # "text_len":self.x_len, # 'char_lens':self.x_char_len, # "features":self.in_name_feature, "char_ids": self.x_char, # "pos_feature":self.pos_feature, # 'ask_word_feature':self.ask_word_feature, "training": self.training, }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(logits, axis=1), "probs": self.prob }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def _build_graph(self): self.x = tf.placeholder(tf.int32, [None, None]) self.y = tf.placeholder(tf.int32, [None]) self.domain = tf.placeholder(tf.int32, [None]) print(self.x) # self.soft_target = tf.placeholder(tf.float32,[None,None]) self.pos_feature = tf.placeholder(tf.int32, [None, None]) print(self.pos_feature) # self.ask_word_feature = tf.placeholder(tf.int32,[None,None]) self.in_name_feature = tf.placeholder(tf.int32, [None, None]) print(self.in_name_feature) # self.ask_word_feature = tf.placeholder(tf.int32,[None,None]) self.training = tf.placeholder_with_default(False, shape=(), name='is_training') word_embedding = Embedding( pretrained_embedding=self.pretrained_word_embedding, embedding_shape=(self.vocab.get_word_vocab(), self.word_embedding_size), trainable=self.word_embedding_trainable) input_x = word_embedding(self.x) pos_embedding = Embedding(pretrained_embedding=None, embedding_shape=(self.pos_vocab_size, self.pos_embedding_size)) input_x_pos = pos_embedding(self.pos_feature) # feature_x = tf.one_hot(self.in_name_feature, depth=2) # ask_word_feature = tf.one_hot(self.ask_word_feature,depth=2) # input_x = tf.concat([input_x,feature_x],axis=-1) # # input_x = tf.concat([input_x,ask_word_feature],axis=-1) input_x = tf.concat([input_x, input_x_pos], axis=-1) # print(input_x.shape) dropout = Dropout(self.keep_prob) input_x = dropout(input_x, self.training) pooled = [] input_x = tf.layers.dense(input_x, 128, activation=None, name='aff1') for idx in range(2): conv_x = tf.layers.conv1d(input_x, 128, 3, padding='same', activation=tf.nn.relu, name='conv1d-%d' % (idx)) input_x += conv_x input_x = dropout(input_x, self.training) input_x = tf.reduce_max(input_x, axis=1) # merge = dropout(merge,self.training) # merge = tf.layers.batch_normalization(inputs=merge) # dense1 = tf.keras.layers.Dense(128,activation=tf.nn.tanh) merge = tf.layers.dense(input_x, 128, activation=tf.nn.tanh, name='dense1') # merge=tf.layers.batch_normalization(inputs=merge) merge = dropout(merge, self.training) logits = tf.layers.dense(merge, self.num_class, activation=None, use_bias=True) # logits = dense2(merge,name='dense2') self.prob = tf.nn.softmax(logits) domain_logits = tf.layers.dense(merge, 2, activation=None, use_bias=False) self.domain_prob = tf.nn.softmax(domain_logits) # print(self.prob) from nn.loss import softmax_with_logits_label_smooth from nn.loss import focal_loss_softmax self.loss = tf.reduce_mean( focal_loss_softmax(labels=self.y, logits=logits, alpha=0.5, gamma=2.0)) #self.loss = tf.reduce_mean(focal_loss_softmax(self.y,logits))#tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) # self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)) # self.domain_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=domain_logits,labels=self.domain)) # self.loss+=self.loss + lossL2 # self.soft_loss = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits/self.softmax_temperature,labels=self.soft_target) # ) # self.task_balance=1.0 # self.soft_loss =0.0 # self.loss *=self.task_balance # self.loss += (1-self.task_balance)*self.soft_loss*(self.softmax_temperature**2) # self.loss +=self.domain_loss global_step = tf.train.get_or_create_global_step() self.input_placeholder_dict = OrderedDict({ "token_ids": self.x, "labels": self.y, # "domain":self.domain, # 'soft_target':self.soft_target, "features": self.in_name_feature, "pos_feature": self.pos_feature, # 'ask_word_feature':self.ask_word_feature, "training": self.training, }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(logits, axis=1), "prob": self.prob }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()