def _init_body(self, scope): with tf.variable_scope(scope): word_level_inputs = tf.reshape(self.inputs_embedded, [ self.document_size * self.sentence_size, self.word_size, self.embedding_size ]) word_level_lengths = tf.reshape( self.word_lengths, [self.document_size * self.sentence_size]) with tf.variable_scope('word') as scope: word_encoder_output, _ = bidirectional_rnn( self.fw_word_cell, self.bw_word_cell, word_level_inputs, word_level_lengths, scope=scope) with tf.variable_scope('attention') as scope: word_level_output = task_specific_attention( word_encoder_output, self.word_output_size, scope=scope) with tf.variable_scope('dropout'): word_level_output = layers.dropout( word_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) # sentence_level sentence_inputs = tf.reshape( word_level_output, [self.document_size, self.sentence_size, self.word_output_size]) with tf.variable_scope('sentence') as scope: sentence_encoder_output, _ = bidirectional_rnn( self.fw_sentence_cell, self.bw_sentence_cell, sentence_inputs, self.sentence_lengths, scope=scope) with tf.variable_scope('attention') as scope: sentence_level_output = task_specific_attention( sentence_encoder_output, self.sentence_output_size, scope=scope) with tf.variable_scope('dropout'): sentence_level_output = layers.dropout( sentence_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) with tf.variable_scope('classifier'): self.logits = layers.fully_connected( sentence_level_output, self.classes, activation_fn=None) self.prediction = tf.argmax(self.logits, axis=-1)
def _init_body(self, scope): with tf.variable_scope(scope): word_level_inputs = tf.reshape(self.inputs_embedded, [ self.document_size * self.sentence_size, self.word_size, self.embedding_size ]) word_level_lengths = tf.reshape( self.word_lengths, [self.document_size * self.sentence_size]) with tf.variable_scope('word') as scope: word_encoder_output, _ = bidirectional_rnn( self.word_cell, self.word_cell, word_level_inputs, word_level_lengths, scope=scope) with tf.variable_scope('attention') as scope: word_level_output = task_specific_attention( word_encoder_output, self.word_output_size, scope=scope) with tf.variable_scope('dropout'): word_level_output = layers.dropout( word_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) # sentence_level sentence_inputs = tf.reshape( word_level_output, [self.document_size, self.sentence_size, self.word_output_size]) with tf.variable_scope('sentence') as scope: sentence_encoder_output, _ = bidirectional_rnn( self.sentence_cell, self.sentence_cell, sentence_inputs, self.sentence_lengths, scope=scope) with tf.variable_scope('attention') as scope: sentence_level_output = task_specific_attention( sentence_encoder_output, self.sentence_output_size, scope=scope) with tf.variable_scope('dropout'): sentence_level_output = layers.dropout( sentence_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) with tf.variable_scope('classifier'): self.logits = layers.fully_connected( sentence_level_output, self.classes, activation_fn=None) self.prediction = tf.argmax(self.logits, axis=-1)
def word_level_output(self): with tf.name_scope("word_level"): word_level_inputs = tf.reshape(self.inputs_embedded, [ self.document_size * self.sentence_size, self.word_size, self.embedding_size ]) word_level_lengths = tf.reshape( self.word_lengths, [self.document_size * self.sentence_size]) with tf.variable_scope('word') as scope: word_encoder_output, _ = bidirectional_rnn(self.word_cell, self.word_cell, word_level_inputs, word_level_lengths, scope=scope) with tf.variable_scope('attention') as scope: word_level_output = task_specific_attention( word_encoder_output, self.word_output_size, scope=scope) with tf.variable_scope('dropout'): word_level_output = layers.dropout( word_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) return word_level_output
def sentence_level_output(self): with tf.name_scope("sentence_level"): sentence_inputs = tf.reshape(self.word_level_output, [ self.document_size, self.sentence_size, self.word_output_size ]) with tf.variable_scope('sentence') as scope: sentence_encoder_output, _ = bidirectional_rnn( self.sentence_cell, self.sentence_cell, sentence_inputs, self.sentence_lengths, scope=scope) with tf.variable_scope('attention') as scope: sentence_level_output = task_specific_attention( sentence_encoder_output, self.sentence_output_size, scope=scope) with tf.variable_scope('dropout'): sentence_level_output = layers.dropout( sentence_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) return sentence_level_output
def _init_bidirectional_encoder(self): from model_components import bidirectional_rnn with tf.variable_scope("encoder") as scope: self.encoder_outputs, self.encoder_state = bidirectional_rnn( cell_fw=self.encoder_cell, cell_bw=self.encoder_cell, inputs_embedded=self.encoder_inputs_embedded, input_lengths=self.encoder_inputs_length, time_major=True, )
def HAN_model(input_x,input_ys, word_lengths, sentence_lengths, is_training, dropout_keep_prob, embedding_numpy): word_embeddings = tf.get_variable( name="word_embedding", dtype=tf.float32, shape=embedding_numpy.shape, initializer=tf.constant_initializer(embedding_numpy), trainable=False) input_x = tf.nn.embedding_lookup(word_embeddings, input_x) # shape: [n_batch,n_sent,n_word,embed_size] # ============================================= word_level AN ===============================================# word_level_inputs = tf.reshape(input_x, [-1, parameters.max_sentence_length, parameters.embedding_size]) #reshape to 3D #shape of word_lengths: 2D [n_batch, n_sent] word_level_lengths = tf.reshape(word_lengths, [-1]) # reshape to 1D with tf.variable_scope("word") as scope: word_fw_cell = BNLSTMCell(100, is_training) word_bw_cell = BNLSTMCell(100, is_training) word_encoder_output, _ = bidirectional_rnn( word_fw_cell, word_bw_cell, word_level_inputs, word_level_lengths, scope=scope) with tf.variable_scope('attention') as scope: word_level_output = task_specific_attention( word_encoder_output, parameters.word_output_size, scope=scope) with tf.name_scope("dropout"): word_level_output = tf.nn.dropout(word_level_output, dropout_keep_prob) # shape of word_level_output: 2D [n_batch*n_sent, word_output_size] # ============================================= sent_level HAN ===============================================# sentence_level_inputs = tf.reshape(word_level_output, [-1, parameters.max_document_length, parameters.word_output_size]) # reshape to 3D # sentence_lengths:n_batch with tf.variable_scope('sentence') as scope: sentence_fw_cell = BNLSTMCell(100, is_training) sentence_bw_cell = BNLSTMCell(100, is_training) sentence_encoder_output, _ = bidirectional_rnn( sentence_fw_cell, sentence_bw_cell, sentence_level_inputs, sentence_lengths, scope=scope) with tf.variable_scope('attention') as scope: sentence_level_output = task_specific_attention( sentence_encoder_output, parameters.sentence_output_size, scope=scope) patient_vector = sentence_level_output with tf.name_scope("dropout"): sentence_level_output = tf.nn.dropout(sentence_level_output, dropout_keep_prob) # shape: n_batch * sentence_output_size total_loss = 0 scores_soft_max_list = [] for (M,input_y) in enumerate(input_ys): with tf.name_scope("task"+str(M)): W = tf.Variable(tf.truncated_normal([parameters.sentence_output_size, parameters.num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[parameters.num_classes]), name="b") scores = tf.nn.xw_plus_b(sentence_level_output, W, b) # scores has shape: [n_batch, num_classes] scores_soft_max = tf.nn.softmax(scores) scores_soft_max_list.append(scores_soft_max) # scores_soft_max_list shape:[multi_size, n_batch, num_classes] # predictions = tf.argmax(scores, axis=1, name="predictions") # predictions has shape: [None, ]. A shape of [x, ] means a vector of size x losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=input_y) # losses has shape: [None, ] # include target replication # total_loss += losses loss_avg = tf.reduce_mean(losses) total_loss += loss_avg # avg_loss = tf.reduce_mean(total_loss) # optimize function optimizer = tf.train.AdamOptimizer(learning_rate=parameters.learning_rate) optimize = optimizer.minimize(total_loss) scores_soft_max_list = tf.stack(scores_soft_max_list, axis=0) # correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1)) # accuracy = tf.reduce_sum(tf.cast(correct_predictions, "float"), name="accuracy") return optimize, scores_soft_max_list, patient_vector
def __init__(self, sequence_length, num_classes, embedding_size, filter_sizes, num_filters, num_hidden): # PLACEHOLDERS self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x") # X - The Data self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") # Y - The Lables self.dropout_keep_prob = tf.placeholder( tf.float32, name="dropout_keep_prob") # Dropout self.h_drop_input = tf.nn.dropout(self.input_x, 0.8) self.training = tf.placeholder(tf.bool) print(self.h_drop_input) def length(sequence): used = tf.sign(tf.reduce_max(tf.abs(sequence), 2)) length = tf.reduce_sum(used, 1) length = tf.cast(length, tf.int32) return length def batchnorm(Ylogits, is_test, offset, convolutional=True): #Y2l = tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') #Y2bn, update_ema2 = batchnorm(Y2l, tst, iter, B2, convolutional=True) #Y2r = tf.nn.relu(Y2bn) exp_moving_avg = tf.train.ExponentialMovingAverage( 0.999, 10000 ) # adding the iteration prevents from averaging across non-existing iterations bnepsilon = 1e-5 if convolutional: mean, variance = tf.nn.moments(Ylogits, [0, 1]) else: mean, variance = tf.nn.moments(Ylogits, [0]) update_moving_averages = exp_moving_avg.apply([mean, variance]) m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean) v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance) Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon) return Ybn, update_moving_averages #l2_loss = tf.constant(0.0) # Keeping track of l2 regularization loss #1. EMBEDDING LAYER ################################################################ # with tf.device('/cpu:0'), tf.name_scope("embedding"): # self.W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),name="W") # self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) # self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) #2. CONVOLUTION LAYER + MAXPOOLING LAYER (per filter) ############################### pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # CONVOLUTION LAYER filter_shape = [filter_size, embedding_size, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") print(W) b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv1d(self.h_drop_input, W, stride=1, padding="SAME", name="conv") print(conv) conv, _ = batchnorm(conv, self.training, b) h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") #h = batch_norm_layer(h,train_phase=self.training,scope_bn='bn') # MAXPOOLING # pooled = tf.nn.max_pool(h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='SAME', name="pool") # print(pooled) pooled_outputs.append(h) # COMBINING POOLED FEATURES h = tf.concat(pooled_outputs, 2) print(h) filter_shape = [3, 120, 128] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W2") b = tf.Variable(tf.constant(0.1, shape=[128]), name="b2") conv2 = tf.nn.conv1d(h, W, stride=1, padding="SAME", name="conv2") conv2, _ = batchnorm(conv2, self.training, b) y = tf.nn.relu(tf.nn.bias_add(conv2, b), name="relu") #y = batch_norm_layer(y,train_phase=self.training,scope_bn='bn') #3. DROPOUT LAYER ################################################################### with tf.name_scope("dropout_hid"): #self.h_drop = tf.layers.batch_normalization(self.h_pool) self.h_drop = tf.nn.dropout(y, self.dropout_keep_prob) print(self.h_drop) #4. LSTM LAYER ###################################################################### # cell_fw = BNLSTMCell(num_hidden, self.training) #LSTMCell(hidden_size) # cell_bw = BNLSTMCell(num_hidden, self.training) cell_fw = BasicLSTMCell_LayerNorm(num_hidden) cell_bw = BasicLSTMCell_LayerNorm(num_hidden) val_, state = bidirectional_rnn(cell_fw, cell_bw, self.h_drop, input_lengths=length(self.input_x)) #_, final_hidden = state val = tf.concat(val_, 2) print(val) self.semantic = tf.nn.dropout(val, self.dropout_keep_prob) #embed() #Attention layer with tf.variable_scope("self_attention"): self.val = task_specific_attention(self.semantic, num_hidden * 2) print(self.val) # with tf.name_scope('self_attention'): # val=Self_Attention(val,val,val,2,num_hidden/2) # print(val) # with tf.name_scope('Attention_layer'): # attention_output, alphas = attention(val, num_hidden, return_alphas=True) # tf.summary.histogram('alphas', alphas) denses = tf.layers.dense(inputs=tf.reshape(self.val, shape=[-1, num_hidden * 2]), units=num_hidden, activation=tf.nn.relu, trainable=True) denses = tf.nn.dropout(denses, self.dropout_keep_prob) print(denses) # val2 = tf.transpose(val, [1, 0, 2]) # last = tf.gather(val2, int(val2.get_shape()[0]) - 1) # print(last) out_weight = tf.Variable(tf.random_normal([num_hidden, num_classes])) out_bias = tf.Variable(tf.random_normal([num_classes])) with tf.name_scope("output"): #lstm_final_output = val[-1] #embed() self.scores = tf.nn.xw_plus_b(denses, out_weight, out_bias, name="scores") self.predictions = tf.nn.softmax(self.scores, name="predictions") with tf.name_scope("loss"): self.losses = tf.nn.softmax_cross_entropy_with_logits( logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(self.losses, name="loss") with tf.name_scope("accuracy"): self.correct_pred = tf.equal(tf.argmax(self.predictions, 1), tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, "float"), name="accuracy") print("(!) LOADED CNN-LSTM! :)") #embed() total_parameters = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]) print("Total number of trainable parameters: %d" % total_parameters)
def _init_body(self, scope): with tf.variable_scope(scope): # word layer word_level_inputs = tf.reshape(self.inputs_embedded, [ self.document_size * self.sentence_size, self.word_size, self.embedding_size ]) word_level_lengths = tf.reshape( self.word_lengths, [self.document_size * self.sentence_size ]) # 2D(self.word_lengths) to 1D(word_level_lengths) with tf.variable_scope('word') as scope: # word_encoder_output[i] = [fw_outputs[i], bw_outputs[i]] # shape(word_encoder_output) : [self.document_size * self.sentence_size, # self.word_size, # rnnCell.output_size() * 2] word_encoder_output, _ = bidirectional_rnn( self.word_cell, self.word_cell, word_level_inputs, word_level_lengths, scope) with tf.variable_scope('attention') as scope: word_level_output, penalization = word_attention( word_encoder_output, aspect_size=self.aspect_size, scope=scope) self.penalization = penalization with tf.variable_scope('dropout'): word_level_output = layers.dropout( word_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) # sentence layer sentence_level_inputs = tf.reshape( word_level_output, shape=[ self.document_size, self.sentence_size * self.aspect_size, self.word_cell.output_size * 2 ]) with tf.variable_scope('sentence') as scope: # sentence_encoder_output[i] = [fw_outputs[i], bw_outputs[i]] # shape(sentence_encoder_output) : [self.document_size, self.sentence_size, self.aspect_size, sentence_cell.output_size() * 2] sentence_encoder_output, _ = bidirectional_rnn( self.sentence_cell, self.sentence_cell, sentence_level_inputs, scope=scope ) # shape(self.sentence_lengths) : self.document_size sentence_encoder_output = tf.reshape( sentence_encoder_output, shape=[ self.document_size, self.sentence_size, self.aspect_size, self.sentence_cell.output_size * 2 ]) with tf.variable_scope('attention') as scope: # shape(sentence_level_output) : [self.document_size, aspect_size, sentence_cell.output_size() * 2] sentence_level_output = sentence_attention( sentence_encoder_output, scope=scope) with tf.variable_scope('dropout'): sentence_level_output = layers.dropout( sentence_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) # if self.aspect_size > 1: with tf.variable_scope('aspect') as scope: aspect_encoder_output, _ = bidirectional_rnn( self.aspect_cell, self.aspect_cell, sentence_level_output, scope=scope) with tf.variable_scope('attention') as scope: # shape(aspect_level_output) : [self.document_size, aspect_cell.output_size() * 2] aspect_level_output = aspect_attention( aspect_encoder_output, scope=scope) with tf.variable_scope('dropout'): aspect_level_output = layers.dropout( aspect_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) output_for_classifier = aspect_level_output # else: # output_for_classifier = sentence_level_output with tf.variable_scope('classifier'): self.logits = layers.fully_connected(output_for_classifier, self.classes, activation_fn=None) self.prediction = tf.argmax(self.logits, axis=-1)