def __init__(self, embedding, batch_size): TEXT, vocab_size, word_embeddings, self.train_iter, self.valid_iter, self.test_iter = load_dataset.load( embedding=embedding, batch_size=batch_size) self.embedding = embedding output_size = 10 hidden_size = 256 embedding_length = 300 self.model = RNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), weight_decay=0.0005, lr=0.0001) loss_fn = F.cross_entropy self.training_handler = TrainingHandler(optimizer, loss_fn, batch_size)
class RecurrentNN(): def __init__(self, embedding, batch_size): TEXT, vocab_size, word_embeddings, self.train_iter, self.valid_iter, self.test_iter = load_dataset.load( embedding=embedding, batch_size=batch_size) self.embedding = embedding output_size = 10 hidden_size = 256 embedding_length = 300 self.model = RNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), weight_decay=0.0005, lr=0.0001) loss_fn = F.cross_entropy self.training_handler = TrainingHandler(optimizer, loss_fn, batch_size) def train(self, numberOfEpochs): patience_threshold = 3 patience = patience_threshold min_valid_loss = np.Inf for epoch in range(numberOfEpochs): if torch.cuda.is_available(): torch.cuda.empty_cache() train_loss, train_acc = self.training_handler.train_model( self.model, self.train_iter, epoch) val_loss, val_acc = self.training_handler.eval_model( self.model, self.valid_iter) print( f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%' ) output_handler.outputFileHandler.write( f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%' ) patience -= 1 if val_loss < min_valid_loss and abs(min_valid_loss - val_loss) > 0.005: patience = patience_threshold torch.save(self.model, "./saved_models/rnn-" + self.embedding) min_valid_loss = val_loss if patience == 0: break def test(self): self.model = torch.load("./saved_models/rnn-" + self.embedding) test_loss, test_acc = self.training_handler.eval_model( self.model, self.test_iter) return test_loss, test_acc
def __init__(self, sequence_length, num_classes, embedding_size, vocab_size, static, rnn_hidden_size, num_layers, dynamic, use_attention, attention_size): """ transfer model contains embedding layer, rnn layer and fully-connected layer and will all be initialized by the corresponding params of adversarial network the rnn params will be initialized by the shared rnn model in adversarial network """ self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="y") self.input_keep_prob = tf.placeholder(tf.float32, name="keep_prob_in") self.output_keep_prob = tf.placeholder(tf.float32, name="keep_prob_out") self.rnn_model = RNN(sequence_length, rnn_hidden_size, num_layers, dynamic=True, use_attention=True, attention_size=attention_size) self.W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="transfer-W") with tf.name_scope("embedding-layer"): self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) with tf.name_scope("sequence-length"): mask = tf.sign(self.input_x) range_ = tf.range(start=1, limit=sequence_length + 1, dtype=tf.int32) mask = tf.multiply(mask, range_, name="mask") # element wise seq_len = tf.reduce_max(mask, axis=1) with tf.name_scope("rnn-processing"): """ initialize the rnn model using pre-trained adversarial model """ s = self.rnn_model.process( self.embedded_chars, seq_len, self.input_keep_prob, self.output_keep_prob, scope="transfer-shared", ) with tf.name_scope("transfer-fully-connected-layer"): w = tf.Variable(tf.truncated_normal( [rnn_hidden_size * 2, num_classes], stddev=0.1), name="w") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") scores = tf.nn.xw_plus_b(s, w, b) with tf.name_scope("loss"): task_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.input_y, logits=scores) # logits and labels must be same size self.task_loss = tf.reduce_mean(task_losses) with tf.name_scope("task-accuracy"): self.predictions = tf.argmax(scores, 1, name="predictions") correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.task_accuracy = tf.reduce_mean(tf.cast( correct_predictions, "float"), name="accuracy")
def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, embedding_matrix, static, hidden_size, num_layers, dynamic, use_attention, attention_size): self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="y") with tf.name_scope("embedding-layer"): self.W = tf.get_variable( shape=[vocab_size, embedding_size], initializer=tf.constant_initializer(embedding_matrix), name='W', trainable=not static) self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) with tf.name_scope("calculate-sequence-length"): mask = tf.sign(self.input_x) range_ = tf.range(start=1, limit=sequence_length + 1, dtype=tf.int32) mask = tf.multiply(mask, range_, name="mask") # element wise seq_len = tf.reduce_max(mask, axis=1) with tf.name_scope("rnn-processing"): self.rnn_model = RNN(sequence_length, hidden_size, num_layers, dynamic=False, use_attention=True, attention_size=attention_size) output, alpha = self.rnn_model.process(self.embedded_chars, seq_len, "rnn-model") """ with tf.name_scope("forward-cell"): if num_layers != 1: cells = [] for i in range(num_layers): rnn_cell = DropoutWrapper( GRUCell(hidden_size), input_keep_prob=1.0, output_keep_prob=1.0 ) cells.append(rnn_cell) self.cell_fw = MultiRNNCell(cells) else: self.cell_fw = DropoutWrapper( GRUCell(hidden_size), input_keep_prob=1.0, output_keep_prob=1.0 ) with tf.name_scope("backward-cell"): if num_layers != 1: cells = [] for i in range(num_layers): rnn_cell = DropoutWrapper( GRUCell(hidden_size), input_keep_prob=1.0, output_keep_prob=1.0 ) cells.append(rnn_cell) self.cell_bw = MultiRNNCell(cells) else: self.cell_bw = DropoutWrapper( GRUCell(hidden_size), input_keep_prob=1.0, output_keep_prob=1.0 ) if dynamic: with tf.name_scope("dynamic-rnn-with-{}-layers".format(num_layers)): outputs, _ = tf.nn.bidirectional_dynamic_rnn( inputs=self.embedded_chars, cell_fw=self.cell_fw, cell_bw=self.cell_bw, sequence_length=seq_len, dtype=tf.float32 ) # If no initial_state is provided, dtype must be specified # outputs -> type list(tensor) shape: sequence_length, batch_size, hidden_size * 2 output_fw, output_bw = outputs outputs = tf.concat([output_fw, output_bw], axis=2) # shape: batch_size, sequence_length, hidden_size * 2 batch_size = tf.shape(outputs)[0] index = tf.range(0, batch_size) * \ sequence_length + (seq_len - 1) output = tf.gather(tf.reshape( outputs, [-1, hidden_size * 2]), index) # shape: batch_size, hidden_size * 2 """ with tf.name_scope("fully-connected-layer"): w = tf.Variable(tf.truncated_normal([hidden_size * 2, num_classes], stddev=0.1), name="w") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") scores = tf.nn.xw_plus_b(output, w, b) with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.input_y, logits=scores) self.loss = tf.reduce_mean(losses) with tf.name_scope("accuracy"): predictions = tf.argmax(scores, 1, name="predictions") correct_predictions = tf.equal(predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def __init__(self, sequence_length, num_classes, embedding_size, vocab_size, embedding_matrix, static, rnn_hidden_size, shared_num_layers, private_num_layers, dynamic, use_attention, attention_size, mlp_hidden_size): """ Args: input_keep_prob (float): dropout rate in rnn model output_keep_prob (float): dropout rate in rnn model """ self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="y") self.task = tf.placeholder(tf.int32, name="task") self.input_keep_prob = tf.placeholder(tf.float32, name="keep_prob_in") self.output_keep_prob = tf.placeholder(tf.float32, name="keep_prob_out") self.rnn_model = RNN(sequence_length, rnn_hidden_size, private_num_layers, dynamic=True, use_attention=True, attention_size=attention_size) # attempting to use uninitialized value beta2_power_2 if with tf.variable_scope("shared") # this is cause by Adam optimizer # if not in this with, it says no variables to optimize if embedding_matrix: self.W = tf.get_variable( shape=[vocab_size, embedding_size], initializer=tf.constant_initializer(embedding_matrix), name='W', trainable=not static) else: self.W = tf.Variable(tf.random_uniform( [vocab_size, embedding_size], -1.0, 1.0), name="W") print("embedding matrix complete!") with tf.variable_scope("discriminator"): self.discriminator = MLP(sequence_length=rnn_hidden_size * 2, hidden_size=mlp_hidden_size, num_classes=len(params["task"])) task_label = tf.one_hot(self.task, len(params["task"])) task_label = tf.expand_dims(task_label, 0) batch_size = tf.shape(self.input_x)[0] task_label = tf.tile(task_label, multiples=[batch_size, 1]) task_label = tf.cast(task_label, tf.float32) # batch_size, num_tasks with tf.name_scope("embedding-layer"): self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) with tf.name_scope("sequence-length"): mask = tf.sign(self.input_x) range_ = tf.range(start=1, limit=sequence_length + 1, dtype=tf.int32) mask = tf.multiply(mask, range_, name="mask") # element wise seq_len = tf.reduce_max(mask, axis=1) with tf.name_scope("shared-model-processing"): s = self.rnn_model.process(self.embedded_chars, seq_len, self.input_keep_prob, self.output_keep_prob, scope="shared") # batch_size, rnn_hidden_size * 2 # with tf.name_scope("private-model-processing"): # # selected_model = tf.gather(self.private_model, self.task) # didn't work # private_outputs = [] # for model in self.private_model: # output = model.process(self.embedded_chars, seq_len) # # TODO ValueError: Variable bidirectional_rnn/fw/gru_cell/gates/kernel already exists, disallowed. \ # # Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? # private_outputs.append(output) # # p = selected_model.process(self.embedded_chars, seq_len) # p = tf.gather(private_outputs, task) # # batch_size, rnn_hidden_size * 2 with tf.name_scope("private-model-processing"): useless = tf.constant([0] * 2 * rnn_hidden_size, dtype=tf.float32) useless = tf.expand_dims(useless, 0) useless = tf.tile(useless, multiples=[batch_size, 1]) # shape of all inputs of op gather must match def fn(i): output = self.rnn_model.process( self.embedded_chars, seq_len, self.input_keep_prob, self.output_keep_prob, "private-{}".format(params["task"][i])) return output l = [] for i in range(len(params["task"])): temp = tf.cond(tf.equal(self.task, i), lambda: fn(i), lambda: useless) # set reuse=True or reuse=tf.AUTO_REUSE l.append(temp) p = tf.gather(l, self.task) # batch_size, rnn_hidden_size * 2 with tf.name_scope("discriminator-processing"): ds = self.discriminator.process(s) dp = self.discriminator.process(p) # batch_size, num_tasks with tf.name_scope("fully-connected-layer"): sp = tf.concat([s, p], axis=1) # batch_size, rnn_hidden_size * 4 w = tf.Variable(tf.truncated_normal( [rnn_hidden_size * 4, num_classes], stddev=0.1), name="w") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") scores = tf.nn.xw_plus_b(sp, w, b) with tf.name_scope("loss"): # adv_losses = tf.nn.softmax_cross_entropy_with_logits_v2( # labels=task_label, logits=d) # self.adv_loss = tf.reduce_mean(adv_losses) disc_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=task_label, logits=dp) gen_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=task_label, logits=ds) self.disc_loss = tf.reduce_mean(disc_losses + gen_losses) self.gen_loss = tf.reduce_mean(gen_losses) # diff_losses = tf.norm( # tf.multiply(s, p), ord=2, axis=1) # TODO still need to be tested # diff_losses = tf.multiply(s, p) # diff_losses = tf.nn.relu(diff_losses) # diff_losses = tf.norm(diff_losses, ord=2, axis=1) diff_losses = tf.multiply(s, p) diff_losses = tf.reduce_sum(diff_losses, axis=1) diff_losses = tf.norm(diff_losses, ord=2, axis=0) # setting all negative values of a tensor to zero # https://stackoverflow.com/questions/41043894/setting-all-negative-values-of-a-tensor-to-zero-in-tensorflow self.diff_loss = tf.reduce_mean(diff_losses) task_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.input_y, logits=scores) # logits and labels must be same size self.task_loss = tf.reduce_mean(task_losses) with tf.name_scope("task-accuracy"): predictions = tf.argmax(scores, 1, name="predictions") correct_predictions = tf.equal(predictions, tf.argmax(self.input_y, 1)) self.task_accuracy = tf.reduce_mean(tf.cast( correct_predictions, "float"), name="accuracy") with tf.name_scope("discriminator-accuracy"): predictions_shared = tf.argmax(ds, 1, name="predictions-shared") correct_predictions_shared = tf.equal(predictions_shared, tf.argmax(task_label, 1)) predictions_private = tf.argmax(dp, 1, name="predictions-private") correct_predictions_private = tf.equal(predictions_private, tf.argmax(task_label, 1)) print(correct_predictions_shared) print(correct_predictions_private) correct_predictions = tf.concat( [correct_predictions_shared, correct_predictions_private], axis=0) self.discriminator_accuracy = tf.reduce_mean(tf.cast( correct_predictions, "float"), name="accuracy")