Esempio n. 1
0
    def __init__(self, embedding, batch_size):
        TEXT, vocab_size, word_embeddings, self.train_iter, self.valid_iter, self.test_iter = load_dataset.load(
            embedding=embedding, batch_size=batch_size)
        self.embedding = embedding

        output_size = 10
        hidden_size = 256
        embedding_length = 300

        self.model = RNN(batch_size, output_size, hidden_size, vocab_size,
                         embedding_length, word_embeddings)

        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                            self.model.parameters()),
                                     weight_decay=0.0005,
                                     lr=0.0001)
        loss_fn = F.cross_entropy
        self.training_handler = TrainingHandler(optimizer, loss_fn, batch_size)
Esempio n. 2
0
class RecurrentNN():
    def __init__(self, embedding, batch_size):
        TEXT, vocab_size, word_embeddings, self.train_iter, self.valid_iter, self.test_iter = load_dataset.load(
            embedding=embedding, batch_size=batch_size)
        self.embedding = embedding

        output_size = 10
        hidden_size = 256
        embedding_length = 300

        self.model = RNN(batch_size, output_size, hidden_size, vocab_size,
                         embedding_length, word_embeddings)

        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                            self.model.parameters()),
                                     weight_decay=0.0005,
                                     lr=0.0001)
        loss_fn = F.cross_entropy
        self.training_handler = TrainingHandler(optimizer, loss_fn, batch_size)

    def train(self, numberOfEpochs):
        patience_threshold = 3
        patience = patience_threshold
        min_valid_loss = np.Inf
        for epoch in range(numberOfEpochs):
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            train_loss, train_acc = self.training_handler.train_model(
                self.model, self.train_iter, epoch)
            val_loss, val_acc = self.training_handler.eval_model(
                self.model, self.valid_iter)
            print(
                f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%'
            )
            output_handler.outputFileHandler.write(
                f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%'
            )

            patience -= 1
            if val_loss < min_valid_loss and abs(min_valid_loss -
                                                 val_loss) > 0.005:
                patience = patience_threshold
                torch.save(self.model, "./saved_models/rnn-" + self.embedding)
                min_valid_loss = val_loss

            if patience == 0:
                break

    def test(self):
        self.model = torch.load("./saved_models/rnn-" + self.embedding)
        test_loss, test_acc = self.training_handler.eval_model(
            self.model, self.test_iter)
        return test_loss, test_acc
    def __init__(self, sequence_length, num_classes, embedding_size,
                 vocab_size, static, rnn_hidden_size, num_layers, dynamic,
                 use_attention, attention_size):
        """
        transfer model contains embedding layer, rnn layer and fully-connected layer
        and will all be initialized by the corresponding params of adversarial network
        the rnn params will be initialized by the shared rnn model in adversarial network           
        """
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="y")
        self.input_keep_prob = tf.placeholder(tf.float32, name="keep_prob_in")
        self.output_keep_prob = tf.placeholder(tf.float32,
                                               name="keep_prob_out")

        self.rnn_model = RNN(sequence_length,
                             rnn_hidden_size,
                             num_layers,
                             dynamic=True,
                             use_attention=True,
                             attention_size=attention_size)

        self.W = tf.Variable(tf.random_uniform([vocab_size, embedding_size],
                                               -1.0, 1.0),
                             name="transfer-W")

        with tf.name_scope("embedding-layer"):
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)

        with tf.name_scope("sequence-length"):
            mask = tf.sign(self.input_x)
            range_ = tf.range(start=1,
                              limit=sequence_length + 1,
                              dtype=tf.int32)
            mask = tf.multiply(mask, range_, name="mask")  # element wise
            seq_len = tf.reduce_max(mask, axis=1)

        with tf.name_scope("rnn-processing"):
            """            
            initialize the rnn model using pre-trained adversarial model 
            """
            s = self.rnn_model.process(
                self.embedded_chars,
                seq_len,
                self.input_keep_prob,
                self.output_keep_prob,
                scope="transfer-shared",
            )

        with tf.name_scope("transfer-fully-connected-layer"):
            w = tf.Variable(tf.truncated_normal(
                [rnn_hidden_size * 2, num_classes], stddev=0.1),
                            name="w")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            scores = tf.nn.xw_plus_b(s, w, b)

        with tf.name_scope("loss"):
            task_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.input_y,
                logits=scores)  # logits and labels must be same size
            self.task_loss = tf.reduce_mean(task_losses)

        with tf.name_scope("task-accuracy"):
            self.predictions = tf.argmax(scores, 1, name="predictions")
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.task_accuracy = tf.reduce_mean(tf.cast(
                correct_predictions, "float"),
                                                name="accuracy")
    def __init__(self, sequence_length, num_classes, vocab_size,
                 embedding_size, embedding_matrix, static, hidden_size,
                 num_layers, dynamic, use_attention, attention_size):
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="y")

        with tf.name_scope("embedding-layer"):
            self.W = tf.get_variable(
                shape=[vocab_size, embedding_size],
                initializer=tf.constant_initializer(embedding_matrix),
                name='W',
                trainable=not static)
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)

        with tf.name_scope("calculate-sequence-length"):
            mask = tf.sign(self.input_x)
            range_ = tf.range(start=1,
                              limit=sequence_length + 1,
                              dtype=tf.int32)
            mask = tf.multiply(mask, range_, name="mask")  # element wise
            seq_len = tf.reduce_max(mask, axis=1)

        with tf.name_scope("rnn-processing"):
            self.rnn_model = RNN(sequence_length,
                                 hidden_size,
                                 num_layers,
                                 dynamic=False,
                                 use_attention=True,
                                 attention_size=attention_size)
            output, alpha = self.rnn_model.process(self.embedded_chars,
                                                   seq_len, "rnn-model")
        """

        with tf.name_scope("forward-cell"):
            if num_layers != 1:
                cells = []
                for i in range(num_layers):
                    rnn_cell = DropoutWrapper(
                        GRUCell(hidden_size),
                        input_keep_prob=1.0,
                        output_keep_prob=1.0
                    )
                    cells.append(rnn_cell)
                self.cell_fw = MultiRNNCell(cells)
            else:
                self.cell_fw = DropoutWrapper(
                    GRUCell(hidden_size),
                    input_keep_prob=1.0,
                    output_keep_prob=1.0
                )

        with tf.name_scope("backward-cell"):
            if num_layers != 1:
                cells = []
                for i in range(num_layers):
                    rnn_cell = DropoutWrapper(
                        GRUCell(hidden_size),
                        input_keep_prob=1.0,
                        output_keep_prob=1.0
                    )
                    cells.append(rnn_cell)
                self.cell_bw = MultiRNNCell(cells)
            else:
                self.cell_bw = DropoutWrapper(
                    GRUCell(hidden_size),
                    input_keep_prob=1.0,
                    output_keep_prob=1.0
                )

        if dynamic:
            with tf.name_scope("dynamic-rnn-with-{}-layers".format(num_layers)):
                outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                    inputs=self.embedded_chars,
                    cell_fw=self.cell_fw,
                    cell_bw=self.cell_bw,
                    sequence_length=seq_len,
                    dtype=tf.float32
                )
                # If no initial_state is provided, dtype must be specified
                # outputs -> type list(tensor) shape: sequence_length, batch_size, hidden_size * 2
             
                output_fw, output_bw = outputs
                outputs = tf.concat([output_fw, output_bw], axis=2)
                # shape: batch_size, sequence_length, hidden_size * 2
                batch_size = tf.shape(outputs)[0]
                index = tf.range(0, batch_size) * \
                    sequence_length + (seq_len - 1)
                output = tf.gather(tf.reshape(
                    outputs, [-1, hidden_size * 2]), index)
                # shape: batch_size, hidden_size * 2
        """
        with tf.name_scope("fully-connected-layer"):
            w = tf.Variable(tf.truncated_normal([hidden_size * 2, num_classes],
                                                stddev=0.1),
                            name="w")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            scores = tf.nn.xw_plus_b(output, w, b)

        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.input_y, logits=scores)
            self.loss = tf.reduce_mean(losses)

        with tf.name_scope("accuracy"):
            predictions = tf.argmax(scores, 1, name="predictions")
            correct_predictions = tf.equal(predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
    def __init__(self, sequence_length, num_classes, embedding_size,
                 vocab_size, embedding_matrix, static, rnn_hidden_size,
                 shared_num_layers, private_num_layers, dynamic, use_attention,
                 attention_size, mlp_hidden_size):
        """
        Args:
            input_keep_prob (float): dropout rate in rnn model
            output_keep_prob (float): dropout rate in rnn model
        """
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="y")
        self.task = tf.placeholder(tf.int32, name="task")

        self.input_keep_prob = tf.placeholder(tf.float32, name="keep_prob_in")
        self.output_keep_prob = tf.placeholder(tf.float32,
                                               name="keep_prob_out")

        self.rnn_model = RNN(sequence_length,
                             rnn_hidden_size,
                             private_num_layers,
                             dynamic=True,
                             use_attention=True,
                             attention_size=attention_size)

        # attempting to use uninitialized value beta2_power_2 if with tf.variable_scope("shared")
        # this is cause by Adam optimizer
        # if not in this with, it says no variables to optimize
        if embedding_matrix:
            self.W = tf.get_variable(
                shape=[vocab_size, embedding_size],
                initializer=tf.constant_initializer(embedding_matrix),
                name='W',
                trainable=not static)
        else:
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                 name="W")
        print("embedding matrix complete!")
        with tf.variable_scope("discriminator"):
            self.discriminator = MLP(sequence_length=rnn_hidden_size * 2,
                                     hidden_size=mlp_hidden_size,
                                     num_classes=len(params["task"]))

        task_label = tf.one_hot(self.task, len(params["task"]))
        task_label = tf.expand_dims(task_label, 0)
        batch_size = tf.shape(self.input_x)[0]
        task_label = tf.tile(task_label, multiples=[batch_size, 1])
        task_label = tf.cast(task_label, tf.float32)
        # batch_size, num_tasks

        with tf.name_scope("embedding-layer"):
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)

        with tf.name_scope("sequence-length"):
            mask = tf.sign(self.input_x)
            range_ = tf.range(start=1,
                              limit=sequence_length + 1,
                              dtype=tf.int32)
            mask = tf.multiply(mask, range_, name="mask")  # element wise
            seq_len = tf.reduce_max(mask, axis=1)

        with tf.name_scope("shared-model-processing"):
            s = self.rnn_model.process(self.embedded_chars,
                                       seq_len,
                                       self.input_keep_prob,
                                       self.output_keep_prob,
                                       scope="shared")
            # batch_size, rnn_hidden_size * 2

        # with tf.name_scope("private-model-processing"):
        #     # selected_model = tf.gather(self.private_model, self.task) # didn't work
        #     private_outputs = []
        #     for model in self.private_model:
        #         output = model.process(self.embedded_chars, seq_len)
        #         # TODO ValueError: Variable bidirectional_rnn/fw/gru_cell/gates/kernel already exists, disallowed. \
        #         # Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?
        #         private_outputs.append(output)
        #     # p = selected_model.process(self.embedded_chars, seq_len)
        #     p = tf.gather(private_outputs, task)
        #     # batch_size, rnn_hidden_size * 2

        with tf.name_scope("private-model-processing"):
            useless = tf.constant([0] * 2 * rnn_hidden_size, dtype=tf.float32)
            useless = tf.expand_dims(useless, 0)
            useless = tf.tile(useless, multiples=[batch_size, 1])

            # shape of all inputs of op gather must match

            def fn(i):
                output = self.rnn_model.process(
                    self.embedded_chars, seq_len, self.input_keep_prob,
                    self.output_keep_prob,
                    "private-{}".format(params["task"][i]))
                return output

            l = []
            for i in range(len(params["task"])):
                temp = tf.cond(tf.equal(self.task, i), lambda: fn(i),
                               lambda: useless)
                # set reuse=True or reuse=tf.AUTO_REUSE
                l.append(temp)
            p = tf.gather(l, self.task)
            # batch_size, rnn_hidden_size * 2

        with tf.name_scope("discriminator-processing"):
            ds = self.discriminator.process(s)
            dp = self.discriminator.process(p)
            # batch_size, num_tasks

        with tf.name_scope("fully-connected-layer"):
            sp = tf.concat([s, p], axis=1)
            # batch_size, rnn_hidden_size * 4
            w = tf.Variable(tf.truncated_normal(
                [rnn_hidden_size * 4, num_classes], stddev=0.1),
                            name="w")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            scores = tf.nn.xw_plus_b(sp, w, b)

        with tf.name_scope("loss"):
            # adv_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
            #     labels=task_label, logits=d)
            # self.adv_loss = tf.reduce_mean(adv_losses)
            disc_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=task_label, logits=dp)
            gen_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=task_label, logits=ds)
            self.disc_loss = tf.reduce_mean(disc_losses + gen_losses)
            self.gen_loss = tf.reduce_mean(gen_losses)
            # diff_losses = tf.norm(
            #     tf.multiply(s, p), ord=2, axis=1)  # TODO still need to be tested
            # diff_losses = tf.multiply(s, p)
            # diff_losses = tf.nn.relu(diff_losses)
            # diff_losses = tf.norm(diff_losses, ord=2, axis=1)

            diff_losses = tf.multiply(s, p)
            diff_losses = tf.reduce_sum(diff_losses, axis=1)
            diff_losses = tf.norm(diff_losses, ord=2, axis=0)
            # setting all negative values of a tensor to zero
            # https://stackoverflow.com/questions/41043894/setting-all-negative-values-of-a-tensor-to-zero-in-tensorflow
            self.diff_loss = tf.reduce_mean(diff_losses)
            task_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.input_y,
                logits=scores)  # logits and labels must be same size
            self.task_loss = tf.reduce_mean(task_losses)

        with tf.name_scope("task-accuracy"):
            predictions = tf.argmax(scores, 1, name="predictions")
            correct_predictions = tf.equal(predictions,
                                           tf.argmax(self.input_y, 1))
            self.task_accuracy = tf.reduce_mean(tf.cast(
                correct_predictions, "float"),
                                                name="accuracy")

        with tf.name_scope("discriminator-accuracy"):
            predictions_shared = tf.argmax(ds, 1, name="predictions-shared")
            correct_predictions_shared = tf.equal(predictions_shared,
                                                  tf.argmax(task_label, 1))

            predictions_private = tf.argmax(dp, 1, name="predictions-private")
            correct_predictions_private = tf.equal(predictions_private,
                                                   tf.argmax(task_label, 1))

            print(correct_predictions_shared)
            print(correct_predictions_private)
            correct_predictions = tf.concat(
                [correct_predictions_shared, correct_predictions_private],
                axis=0)

            self.discriminator_accuracy = tf.reduce_mean(tf.cast(
                correct_predictions, "float"),
                                                         name="accuracy")