def logits_cnn_1d(self): logging.info("##########logit is cnn##########") embedded_words = self.embedding(self.sequence) embedded_words = tf.layers.dropout(embedded_words, self.fc_drop, training=self.is_train) self.global_fake = None self.represent_size = self.emb_size if self._config.attention_type == "same_init": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) if self._config.encode_type == "attend_rnn": global_encode = tf.reduce_sum(global_encode, axis=1) else: global_encode = tf.reduce_max(global_encode, axis=1) global_encode_mlp = tf.layers.batch_normalization( global_encode, training=self.is_train) global_encode_mlp = layer.fc_fun( global_encode_mlp, self.emb_size, initial_type=self._config.initial_type, activation="relu") global_encode_mlp = tf.tile(global_encode_mlp, [1, self.max_seq_len]) global_encode_mlp = tf.reshape( global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) self.global_fake = global_encode_mlp elif self._config.attention_type == "attend_init": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) # global_encode_mlp = layer.fc_fun( # global_encode, self.emb_size, initial_type=self._config.initial_type) # self.global_fake = tf.reshape(global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) self.global_fake = global_encode else: if self._config.attention_type is not None: raise NotImplementedError outputs = self.cnn(embedded_words, global_infor=self.global_fake) with tf.variable_scope("output"): outputs = tf.nn.leaky_relu(outputs) outputs = tf.layers.batch_normalization(outputs, training=self.is_train) fcl_output = layer.fc_fun(outputs, 2000, initial_type='xavier', activation=self._config.fc_activation_1) fcl_output = tf.layers.dropout(fcl_output, rate=self.fc_drop, training=self.is_train) fcl_output = layer.fc_fun(fcl_output, self.n_classes, initial_type='xavier') return fcl_output
def encoder(self, sequence, embedded_words): # return: (b_s, max_seq_len, global_encode_units) last_state = None global_encode = None with tf.variable_scope('encode_module'): if self._config.encode_type == "transformer": global_encode = self.encoder_func(sequence, embedded_words) global_encode_units = self.emb_size elif self._config.encode_type == "other_transformer": global_encode = layer.attention_fun( embedded_words, dropout_rate=self.attention_drop, is_training=self.is_train, config=self._config, scope="attention_encode") global_encode_units = self.emb_size elif self._config.encode_type == "disan": sequence_length = tf.squeeze(self.sequence_length) rep_mask = tf.sequence_mask(sequence_length, self._config.max_sequence_length) global_encode = utils_fast_disa.fast_directional_self_attention( embedded_words, rep_mask, hn=self._config.disan_units, head_num=self._config.attention_head, msl=self.max_seq_len, is_train=self.is_train) global_encode_units = self._config.disan_units elif self._config.encode_type == "cnn": global_encode = self.cnn_enc(embedded_words) global_encode_units = 128 print "encode is cnn" elif self._config.encode_type == "rnn": rnn_encode_cell = tf.nn.rnn_cell.GRUCell( name="encode_gru", num_units=self.rnn_units, kernel_initializer=tf.initializers.orthogonal()) # self.tmp2 = tf.get_variable("rnn/encode_gru/gates/bias:0") x = tf.unstack(embedded_words, self.max_seq_len, 1) global_encode, last_state = tf.nn.static_rnn( cell=rnn_encode_cell, dtype=tf.float32, inputs=x) # global_encode, last_state = tf.nn.dynamic_rnn(cell=rnn_encode_cell, dtype=tf.float32, inputs=embedded_words) # global_encode = tf.reduce_max(outputs, 1) # last_state = tf.get_variable("encode_v", [self.batch_size, self.rnn_units]) global_encode_units = self.rnn_units print "*****encode is rnn*******" elif self._config.encode_type == "w": global_encode = layer.fc_fun( embedded_words, self.emb_size, initial_type=self._config.initial_type, factor=self._config.xavier_factor, activation="leaky_relu") global_encode_units = self.emb_size print "encode is w" else: raise NotImplementedError self.global_encode = global_encode return global_encode, global_encode_units, last_state
def logits(self): embedded_words = self.embedding(self.sequence) sequence_length = tf.squeeze(self.sequence_length) rep_mask = tf.sequence_mask(sequence_length, self._config.max_sequence_length) if self._config.disan_type == 'origin': outputs = utils_disan.disan(embedded_words, rep_mask, self._config, is_train=self.is_train, keep_prob=self.disan_keep) elif self._config.disan_type == 'fast': # outputs, tmp1, tmp2 = utils_fast_disa.fast_directional_self_attention(embedded_words, rep_mask, hn=512, msl=self.max_seq_len, is_train=self.is_train) outputs = utils_fast_disa.fast_directional_self_attention( embedded_words, rep_mask, hn=self._config.disan_units, head_num=self._config.attention_head, msl=self.max_seq_len, is_train=self.is_train) outputs = tf.reduce_max(outputs, 1) else: raise NotImplementedError # self.tmp1= tmp1 # self.tmp2 = tmp2 # self.tmp1 = outputs # self.tmp2 = outputs # outputs = layer.fc_fun(tf.layers.flatten(embedded_words), self.n_classes) # return outputs fcl_output = layer.fc_fun(outputs, self.mlp_units, initial_type='xavier', activation=self._config.fc_activation_1) fcl_output = tf.layers.dropout(fcl_output, rate=self.fc_drop, training=self.is_train) fcl_output = layer.fc_fun(fcl_output, self.n_classes, initial_type='xavier') # self.tmp2 = fcl_output return fcl_output
def logits(self): # x is fcl_output embedded_words = self.embedding(self.sequence) # outputs = self.transformer(self.sequence, embedded_words) outputs = self.transformer(self.sequence, embedded_words) # self.train_symbol_show = train_show outputs = tf.reduce_max(outputs, axis=1) fcl_output = layer.fc_fun(outputs, self.n_classes, initial_type='xavier') return fcl_output
def pre_pad(self, x): ''' pre pad for d_rnn :param x: A 3d tensor with shape of [batch_size, sequence_length, emb_size] :return: A 4d tensor for d_rnn with shape of [batch_size, block, window_size, emb_size] ''' pad_input = tf.pad(x, [[0, 0], [self.window_size - 1, 0], [0, 0]], mode="CONSTANT") # print("pad_input:", pad_input.get_shape()) (batch_size, seq_max_len + window_size - 1, embed_size) # self.tmp1 = pad_input rnn_inputs = [] for i in range(self.max_seq_len): rnn_inputs.append( tf.slice(pad_input, [0, i, 0], [-1, self.window_size, -1], name='rnn_input')) rnn_input_tensor = tf.stack( rnn_inputs, 1) # (batch_size, seq_max_len, window_size, embed_size) if self._config.attention_type in ["same_init", "diff_init"]: # self.global_fake: [self.batch_size, self.max_seq_len, self.emb_size]) fake_input = tf.reshape( self.global_fake, [self.batch_size, self.max_seq_len, 1, self.emb_size]) rnn_input_tensor = tf.concat([fake_input, rnn_input_tensor], 2) self.real_window_size = self.window_size + 1 elif self._config.attention_type in ["attend_init"]: block_rep = tf.reduce_mean( rnn_input_tensor, 2) # (batch_size, seq_max_len, embed_size) fake_input = layer.basic_attention(block_rep, self.global_fake, "rnn") fake_input = tf.layers.batch_normalization(fake_input, training=self.is_train) fake_input = layer.fc_fun(fake_input, self.emb_size, initial_type=self._config.initial_type, activation="relu") fake_input = tf.reshape( fake_input, [self.batch_size, self.max_seq_len, 1, self.emb_size]) rnn_input_tensor = tf.concat([fake_input, rnn_input_tensor], 2) self.real_window_size = self.window_size + 1 # print("rnn_input_tensor:", rnn_input_tensor.get_shape()) # self.tmp2 = rnn_input_tensor return rnn_input_tensor
def loss_encoder(self): if self._config.attention_type is None\ or (not self._config.encoder_fixed_epoch): return None label = self.label global_logits = tf.reduce_max(self.global_encode, axis=1) logits = layer.fc_fun(global_logits, self.n_classes) if self._config.type == 'single_label': global_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label)) elif self._config.type == 'multi_label': global_loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=label)) else: raise NotImplementedError return global_loss
def logits_cnn_dp(self): logging.info("DPCNN") with tf.name_scope("embedding"): self.filter_num = 250 self.kernel_size = 3 embedding = self.embedding(self.sequence) self.embedding_dim = self.emb_size # embedding_inputs = tf.expand_dims(embedding, axis=-1) # [None,seq,embedding,1] # region_embedding # [batch,seq-3+1,1,250] # region_embedding = tf.layers.conv2d(embedding_inputs, self.num_filters, # [self.kernel_size, self.embedding_dim]) region_embedding = self.conv1d(embedding, 3, self.emb_size, no_act=True) region_embedding = tf.expand_dims(region_embedding, axis=2) # (4, 254, 1, 250), max_sl: 256 pre_activation = tf.nn.relu(region_embedding, name='preactivation') with tf.name_scope("conv3_0"): conv3 = tf.layers.conv2d(pre_activation, self.filter_num, self.kernel_size, padding="same") conv3 = tf.layers.batch_normalization(conv3, training=self.is_train) with tf.name_scope("conv3_1"): conv3 = tf.layers.conv2d(conv3, self.filter_num, self.kernel_size, padding="same") conv3 = tf.layers.batch_normalization(conv3, training=self.is_train) # print conv3.shape # (4, 254, 1, 250) # resdul conv3 = conv3 + region_embedding for block in range(6): with tf.name_scope("block_{}".format(block)): with tf.name_scope("pool_1"): pool = tf.pad(conv3, paddings=[[0, 0], [0, 1], [0, 0], [0, 0]]) # print "pool", pool.shape # (4, 255, 1, 250) pool = tf.nn.max_pool(pool, [1, 3, 1, 1], strides=[1, 2, 1, 1], padding='VALID') # print "pool", pool.shape # (4, 127, 1, 250) with tf.name_scope("conv3_2"): conv3 = tf.layers.conv2d(pool, self.filter_num, self.kernel_size, padding="same", activation=tf.nn.relu) # print conv3.shape # (4, 127, 1, 250) conv3 = tf.layers.batch_normalization( conv3, training=self.is_train) with tf.name_scope("conv3_3"): conv3 = tf.layers.conv2d(conv3, self.filter_num, self.kernel_size, padding="same", activation=tf.nn.relu) # print conv3.shape # (4, 127, 1, 250) conv3 = tf.layers.batch_normalization( conv3, training=self.is_train) # resdul conv3 = conv3 + pool # print conv3.shape # pool_size = int((self.max_seq_len - 3 + 1) / 2) # conv3 = tf.layers.max_pooling1d(tf.squeeze(conv3, [2]), pool_size, 1) conv3 = tf.reduce_max(conv3, 1) conv3 = tf.squeeze(conv3) # [batch,250] conv3 = tf.layers.dropout(conv3, self.fc_drop, training=self.is_train) fcl_output = layer.fc_fun(conv3, self.n_classes, initial_type='xavier') return fcl_output
def cnn(self, x, global_infor): logging.info("##########cnn#########") self.conv_out = [] with tf.variable_scope("extractor_cnn"): concat_vec = [] if self._config.attention_type == "attend_init": global_infor_abstract = tf.reduce_max(global_infor, axis=1) global_infor_abstract = tf.layers.batch_normalization( global_infor_abstract, training=self.is_train) global_infor_abstract = layer.fc_fun(global_infor_abstract, self.emb_size, activation="relu") global_infor_abstract = tf.reshape( tf.tile(global_infor_abstract, [1, self.max_seq_len]), [self.batch_size, self.max_seq_len, -1]) for filter_i in self.filter_size: # filter_shape = [filter_i, self.represent_size, self.filter_num] with tf.variable_scope("cnn_filter_{}".format(filter_i)): # conv_W = layer.conv_weight_variable(filter_shape, name=filter_i) # conv_b = layer.bias_variable([self.filter_num], name=filter_i) if self._config.attention_type == "attend_init": pad_input = tf.pad(x, [[0, 0], [filter_i - 1, 0], [0, 0]], mode="CONSTANT") # print("pad_input:", pad_input.get_shape()) (batch_size, seq_max_len + window_size - 1, embed_size) cnn_blocks = [] for tmp_i in range(self.max_seq_len): cnn_blocks.append( tf.slice(pad_input, [0, tmp_i, 0], [-1, filter_i, -1], name='cnn_block')) cnn_blocks = tf.stack( cnn_blocks, 1 ) # (batch_size, seq_max_len, filter_i, embed_size) cnn_blocks = tf.reduce_mean(cnn_blocks, 2) global_infor_attend = layer.basic_attention( cnn_blocks, global_infor, name="cnn") global_infor_attend = tf.layers.batch_normalization( global_infor_attend, training=self.is_train) global_infor_attend = layer.fc_fun(global_infor_attend, self.emb_size, activation="relu") global_infor = tf.concat( [global_infor_attend, global_infor_abstract], 2) global_infor = tf.layers.batch_normalization( global_infor, training=self.is_train) global_infor = layer.fc_fun( global_infor, self.emb_size, initial_type=self._config.initial_type, activation="relu") conved = self.conv1d(x, filter_i, self.represent_size, global_infor=global_infor) # conved = self.conv1d(x, filter_i, self.represent_size, global_infor=global_infor, conv_W=conv_W, conv_b=conv_b) self.conv_out.append(conved) conved = tf.reduce_max(conved, axis=1) concat_vec.append(conved) self.conv_out = tf.stack(self.conv_out) self.conv_out = tf.transpose(self.conv_out, [1, 0, 2, 3]) return tf.concat(concat_vec, -1)
def logits_drnn(self): logging.info("##########logit is rnn##########") self.represent_size = self.emb_size # self.initial_state = self.rnn_cell.zero_state(self.batch_size*self.max_seq_len, dtype=tf.float32) # self.initial_state = tf.cast(self.initial_state, tf.float32) embedded_words = self.embedding(self.sequence) embedded_words = tf.layers.dropout(embedded_words, self.fc_drop, training=self.is_train) if self._config.attention_type == "same_init": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) if self._config.encode_type == "attend_rnn": global_encode = tf.reduce_sum(global_encode, axis=1) else: global_encode = tf.reduce_max(global_encode, axis=1) global_encode_mlp = tf.layers.batch_normalization( global_encode, training=self.is_train) global_encode_mlp = layer.fc_fun( global_encode_mlp, self.emb_size, initial_type=self._config.initial_type, activation="relu") global_encode_mlp = tf.tile(global_encode_mlp, [1, self.max_seq_len]) global_encode_mlp = tf.reshape( global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) self.global_fake = global_encode_mlp elif self._config.attention_type == "attend_init": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, self.emb_size, initial_type=self._config.initial_type) self.global_fake = tf.reshape( global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) # even if we set self.initial_state as a variable, it stills dons't update, for there is no grad in the variable # init_state = tf.get_variable(name='initial_state', shape=[self.batch_size*self.max_seq_len, self.rnn_units]) else: if self._config.attention_type is not None: raise NotImplementedError input_pad = self.pre_pad( embedded_words) # [batch_size, block, window_size, emb_size] drnn_output = self.d_rnn(input_pad) # [batch_size, block, rnn_units] # self.tmp2 = drnn_output drnn_output = tf.reshape(drnn_output, [-1, self.mlp_units]) drnn_output = tf.matmul(drnn_output, self.WC) drnn_output = tf.reshape(drnn_output, [self.batch_size, -1, self.mlp_units]) mask = tf.sequence_mask( self.sequence_length, self.max_seq_len, dtype=drnn_output.dtype) # [batch_size, max_seq_len] mask = tf.reshape(mask, [self.batch_size, self.max_seq_len, 1]) # self.tmp1 = mask drnn_output = drnn_output * mask hs = tf.reduce_max(drnn_output, axis=1) hs = tf.layers.dropout(hs, self.fc_drop, training=self.is_train) mlp = tf.matmul(hs, self.W) mlp = tf.layers.batch_normalization(mlp, training=self.is_train) mlp = tf.nn.relu(mlp) fcl_output = tf.matmul(mlp, self.u) # self.tmp2 = fcl_output return fcl_output
def logits_cnn_1d(self): embedded_words = self.embedding(self.sequence) embedded_words = tf.layers.dropout(embedded_words, self.fc_drop, training=self.is_train) if self._config.global_size: global_size = self._config.global_size else: global_size = self.rnn_units self.global_fake = None self.represent_size = self.emb_size if self._config.attention_type == "pre_attention": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) embedded_words = global_encode elif self._config.attention_type == "diff_concat": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, global_size, initial_type=self._config.initial_type, factor=self._config.xavier_factor) self.represent_size += global_size print("represent_size: {}".format(self.represent_size)) embedded_words = tf.concat([embedded_words, global_encode_mlp], axis=-1) elif self._config.attention_type == "same_concat": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, global_size, initial_type=self._config.initial_type, factor=self._config.xavier_factor) global_encode_mlp = tf.reduce_max(global_encode_mlp, axis=1) global_encode_mlp = tf.reshape( tf.tile(global_encode_mlp, [1, self.max_seq_len]), [self.batch_size, self.max_seq_len, -1]) self.represent_size += global_size print("represent_size: {}".format(self.represent_size)) embedded_words = tf.concat([embedded_words, global_encode_mlp], axis=-1) elif self._config.attention_type == "same_init": global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) global_encode = tf.reduce_max(global_encode, axis=1) global_encode_mlp = layer.fc_fun( global_encode, self.rnn_units, initial_type=self._config.initial_type) global_encode_mlp = tf.tile(global_encode_mlp, [1, self.max_seq_len]) global_encode_mlp = tf.reshape( global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) self.global_fake = global_encode_mlp elif self._config.attention_type in ["diff_init", "attend_init"]: global_encode, global_encode_units = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, self.rnn_units, initial_type=self._config.initial_type) self.global_fake = tf.reshape( global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) else: if self._config.attention_type is not None: raise NotImplementedError outputs = self.cnn(embedded_words, global_infor=self.global_fake) outputs = tf.nn.leaky_relu(outputs) outputs = tf.layers.batch_normalization(outputs, training=self.is_train) fcl_output = layer.fc_fun(outputs, 2000, initial_type='xavier', activation=self._config.fc_activation_1) fcl_output = tf.layers.dropout(fcl_output, rate=self.fc_drop, training=self.is_train) fcl_output = layer.fc_fun(fcl_output, self.n_classes, initial_type='xavier') return fcl_output
def logits_rnn(self): self.represent_size = self.emb_size self.initial_state = None # self.initial_state = self.rnn_cell.zero_state(self.batch_size*self.max_seq_len, dtype=tf.float32) # self.initial_state = tf.cast(self.initial_state, tf.float32) embedded_words = self.embedding(self.sequence) # embedded_words = tf.layers.dropout(embedded_words, self.fc_drop, training=self.is_train) if self._config.global_size: global_size = self._config.global_size else: global_size = self.rnn_units if self._config.attention_type == "pre_attention": global_encode, global_encode_units, last_state = self.encoder( self.sequence, embedded_words) embedded_words = global_encode elif self._config.attention_type == "diff_concat": global_encode, global_encode_units, last_state = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, global_size, initial_type=self._config.initial_type) self.represent_size += global_size embedded_words = tf.concat([embedded_words, global_encode_mlp], axis=-1) elif self._config.attention_type == "same_concat": global_encode, global_encode_units, last_state = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, global_size, initial_type=self._config.initial_type) global_encode_mlp = tf.reduce_max(global_encode_mlp, axis=1) global_encode_mlp = tf.reshape( tf.tile(global_encode_mlp, [1, self.max_seq_len]), [self.batch_size, self.max_seq_len, -1]) self.represent_size += global_size print self.represent_size embedded_words = tf.concat([embedded_words, global_encode_mlp], axis=-1) elif self._config.attention_type == "same_init": global_encode, global_encode_units, last_state = self.encoder( self.sequence, embedded_words) if self._config.encode_type == "rnn": last_state = last_state # last_state = layer.fc_fun( # last_state, self.rnn_units, initial_type=self._config.initial_type, # factor=self._config.xavier_factor, activation="relu") elif self._config.encode_type == "cnn": last_state = tf.reduce_max(global_encode, 1) last_state = tf.layers.batch_normalization( last_state, training=self.is_train) last_state = layer.fc_fun( last_state, self.rnn_units, initial_type=self._config.initial_type, activation="relu") print("********initial state*********") last_state = tf.tile(last_state, [1, self.max_seq_len]) # last_state = tf.reshape(last_state, [self.batch_size, self.rnn_units, self.max_seq_len]) last_state = tf.reshape( last_state, [self.batch_size * self.max_seq_len, self.rnn_units]) print("initial_state.shape {}".format(last_state.shape)) self.initial_state = last_state # self.initial_state = tf.get_variable(name='initial_state', shape=[self.batch_size*self.max_seq_len, self.rnn_units]) elif self._config.attention_type in ["diff_init", "attend_init"]: global_encode, global_encode_units, last_state = self.encoder( self.sequence, embedded_words) global_encode_mlp = layer.fc_fun( global_encode, self.rnn_units, initial_type=self._config.initial_type) self.global_fake = tf.reshape( global_encode_mlp, [self.batch_size, self.max_seq_len, self.emb_size]) # even if we set self.initial_state as a variable, it stills dons't update, for there is no grad in the variable # init_state = tf.get_variable(name='initial_state', shape=[self.batch_size*self.max_seq_len, self.rnn_units]) else: if self._config.attention_type is not None: raise NotImplementedError input_pad = self.pre_pad( embedded_words) # [batch_size, block, window_size, emb_size] drnn_output = self.d_rnn(input_pad) # [batch_size, block, rnn_units] # self.tmp2 = drnn_output drnn_output = tf.reshape(drnn_output, [-1, self.mlp_units]) drnn_output = tf.matmul(drnn_output, self.WC) drnn_output = tf.reshape(drnn_output, [self.batch_size, -1, self.mlp_units]) mask = tf.sequence_mask( self.sequence_length, self.max_seq_len, dtype=drnn_output.dtype) # [batch_size, max_seq_len] mask = tf.reshape(mask, [self.batch_size, self.max_seq_len, 1]) # self.tmp1 = mask drnn_output = drnn_output * mask hs = tf.reduce_max(drnn_output, axis=1) # hs = tf.layers.dropout(hs, self.fc_drop, training=self.is_train) mlp = tf.matmul(hs, self.W) mlp = tf.layers.batch_normalization(mlp, training=self.is_train) mlp = tf.nn.relu(mlp) # mlp = tf.layers.dropout(mlp, self.fc_drop, training=self.is_train) fcl_output = tf.matmul(mlp, self.u) # self.tmp2 = fcl_output return fcl_output