Beispiel #1
0
    def train_cls_step(inputs, labels):
        """Training step for simple classification.

        Args:
            inputs: list or tuple
                a list of tensors
                multiple input tensors can be passed to given model
            labels: tensor
        Returns:
            loss: batch loss
            acc: train accuracy
        """
        nonlocal model
        nonlocal optimizer
        nonlocal loss_func

        with tf.GradientTape() as tape:
            out = model(*inputs)
            if isinstance(out, tuple):
                out = out[0]
            cost = loss_func(labels, out, from_logits=True)
            loss = tf.reduce_mean(cost)

        trainables = model.trainable_variables
        gradients = tape.gradient(loss, trainables)
        optimizer.apply_gradients(zip(gradients, trainables))

        acc = calculate_accuracy(out, labels)

        return loss, acc
Beispiel #2
0
    def test_calculate_accuracy_works(self):
        batch_size = 10
        num_classes = 5
        # this should be calculated argmax first
        input1 = tf.Variable(np.random.rand(batch_size, 4))

        _labels = np.zeros((batch_size, num_classes + 1), dtype=np.int32)
        for i in range(batch_size):
            _labels[i, random.randint(0, num_classes)] = 1
        labels1 = tf.Variable(_labels)

        score = calculate_accuracy(input1, labels1)

        self.assertGreater(score, 0)
        self.assertLess(score, 1)

        # this does not need to be calculated argmax
        labels2 = tf.Variable(np.arange(1, batch_size + 1, dtype=np.int32))

        score = calculate_accuracy(input1, labels2)

        self.assertGreater(score, 0)
        self.assertLess(score, 1)
Beispiel #3
0
    def test_calculate_accuracy_with_numpy(self):
        # check if numpy compatibility
        batch_size = 10
        num_classes = 5
        # this should be calculated argmax first
        input1 = \
            np.random.rand(batch_size, 4)

        labels1 = np.zeros((batch_size, num_classes + 1))
        for i in range(batch_size):
            labels1[i, random.randint(0, num_classes)] = 1

        score = calculate_accuracy(input1, labels1)

        self.assertGreater(score, 0)
        self.assertLess(score, 1)

        # this does not need to be calculated argmax
        labels2 = np.arange(1, batch_size + 1)

        score = calculate_accuracy(input1, labels2)

        self.assertGreater(score, 0)
        self.assertLess(score, 1)
Beispiel #4
0
def main(train, val, *, save=False):
    max_acc = 0

    # set up training model
    model = ClassificationModel(units, pad_max_len, processor.vocab_size,
                                embedding_dim, 3)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    train_cls_step = make_training_cls_model(
        model, optimizer, 'sparse_categorical_crossentropy')

    for epoch in range(1, epochs + 1):
        epoch_start = time.time()
        print('=====' * 10)
        print('  Epochs:', epoch)
        print('=====' * 10)

        batch_st = time.time()

        random.shuffle(train)

        for batch, (inputs, labels) \
                in enumerate(data_generator(train, batch_size=batch_size)):
            st = time.time()

            loss, acc = train_cls_step(inputs, labels)

            if batch % display_step == 0:
                if DEBUG:
                    pred, weights = model(*inputs)
                    print('[DEBUG] Batch: {}'.format(batch))
                    #print('[DEBUG] Average weights :'.format(batch))
                    #for layer in model.layers:
                    #    print('  Layer:', model.name + ':' + layer.name)
                    #    print('  Weights:')
                    #    print('    mean:', np.mean(layer.get_weights()[0]))
                    #    print('     std:', np.std(layer.get_weights()[0]))
                    #    print()
                    print('[DEBUG] Prediction:')
                    #print('   values:\n', pred.reshape(-1, 3))
                    print('   Pred:\n    ', np.argmax(pred, axis=-1))
                    print('  Label:\n    ', labels.reshape(-1, ))
                    print('[DEBUG] Weights/Question:')
                    print(weights[0].numpy().reshape(-1))
                    print(*[
                        processor.index_word[q] for q in questions[0] if q > 0
                    ])
                    print()

                end = time.time()
                batch_end = time.time()
                print('    Batch -', batch)
                print('      Train:  Loss - {:.4f}  Acc - {:.4f}  '
                      'Time(calc) - {:.4f}s/batch  '
                      'Time(total) - {:.4f}s/batch'.format(
                          loss, acc, end - st, batch_end - batch_st))

            batch_st = time.time()

        loss_val = 0
        acc_val = 0
        count = 0

        val_st = time.time()

        # calculate validation data
        for in_val, l_val in data_generator(val, batch_size=batch_size):
            out_val = model(*in_val)
            if isinstance(out_val, tuple):
                out_val = out_val[0]
            cost = tf.keras.losses.sparse_categorical_crossentropy(
                l_val, out_val, from_logits=True)
            loss_val += tf.reduce_mean(cost)
            acc_val += calculate_accuracy(out_val, l_val)
            count += 1

        # calculate average
        loss_val /= count
        acc_val /= count

        val_end = time.time()

        print()
        print('      Validation(approx.): Loss - {:.4f}  Acc - {:.4f}  '
              'Time - {:.4f}s'.format(loss_val, acc_val, val_end - val_st))
        print('  Total time per epoch: {:.4f}s'.format(time.time() -
                                                       epoch_start))
        print()

        # save when get the highest accuracy in validation
        score = acc_val - loss_val
        if save and acc_val > max_acc:
            max_acc = acc_val
            print('Saving model weights')
            model.save_weights(
                os.path.join(Config.MODELS.get('Y/N'), 'weights'))
            print('Saved!')
def main(*, training=True, save_to=None, load_from=None, val=0.2):
    global data_size
    global num_classes
    global processor

    vqa = VQA()
    vqa.load_data(num_data=data_size)
    questions, question_types, _, _ = next(vqa.data_generator())
    labels = [
        q2id[q] if q in q2id else q2id['none of the above']
        for q in question_types
    ]

    # build processor based on training dataset
    # if processor is not reused
    if training:
        # preprocessing dataset
        # split train and test set
        train_size = int(data_size * (1 - val))

        # inputs
        inputs_train = questions[:train_size]
        inputs_val = questions[train_size:]

        # process inputs
        # if tokenizer is not loaded, create new one
        if processor is None:
            processor = text_processor(inputs_train)

    # iinitialize model
    model = QuestionTypeClassification(
        embedding_dim=embedding_dim,
        units=hidden_units,
        vocab_size=vocab_size,  # need to add 1 due to Embedding implementation
        num_classes=num_classes)

    # set initial weights to the model
    if load_from is not None:
        print('Loading weights...')
        model.load_weights(load_from)

    # TRAINING STEP
    if training:
        min_loss_val = 1.0

        print('Start training')

        inputs_train = processor(inputs_train)
        inputs_val = [processor(inputs_val)]

        # labels
        labels = np.array(labels, dtype=np.int32)

        labels_train = labels[:train_size]
        labels_val = labels[train_size:]

        loss = 0
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        train_cls_step = make_training_cls_model(
            model, optimizer, loss='sparse_categorical_crossentropy')

        # execute training
        for epoch in range(epochs):
            print('=====' * 10)
            print('    Epoch {}'.format(epoch + 1))
            print('=====' * 10)

            dataset = data_generator(inputs_train, labels_train, batch_size)

            for batch, (ins, outs) in enumerate(dataset):
                st = time.time()
                ins = [ins]
                batch_loss, accuracy = train_cls_step(ins, outs)

                end = time.time()

                if batch % 100 == 0:
                    out_val = model(*inputs_val)
                    cost_val = tf.keras.losses.sparse_categorical_crossentropy(
                        labels_val, out_val, from_logits=True)
                    loss_val = tf.reduce_mean(cost_val)
                    acc_val = calculate_accuracy(out_val, labels_val)

                    if DEBUG:
                        print('[DEBUG] Batch:', batch)
                        for layer in model.layers:
                            print('  Layer:', model.name + ':' + layer.name)
                            print('  Weights:')
                            print('    mean:', np.mean(layer.get_weights()[0]))
                            print('     std:', np.std(layer.get_weights()[0]))
                            print()

                    batch_loss = batch_loss.numpy()
                    print('  Batch:', batch)
                    # TODO: add accuracy
                    print(
                        '    Loss: {:.4f}  Accuracy(Train): {:.4f}  Loss(Val): {:.4f}  Accuracy(Val): {:.4f}  Time(batch): {:.4f}s'
                        .format(batch_loss, accuracy, loss_val, acc_val,
                                end - st))

            if loss_val < min_loss_val:
                min_loss_val = loss_val
                print('Saving models...')
                # save tokenizer info for resuse
                processor.to_json('./.env/tokenizer_config.json')
                model.save_weights(save_to)
                print('Saved!!')

        print()
        print('Training completed')

    else:
        # if not training mode test with all given data
        st = time.time()
        inputs = processor(questions)
        out = model(inputs)
        labels = tf.Variable(labels, dtype=tf.int32)
        accuracy = calculate_accuracy(out, labels)
        end = time.time()
        print('Evaluated score: Accuracy: {:.4f} Time: {:.4f}s'.format(
            accuracy, end - st))

    return model
Beispiel #6
0
def main(train, val):
    global graph

    # not necessary when run this as script
    # but this is needed if run this code iteratively such as on jupyter notebook
    tf.compat.v1.reset_default_graph()

    graph = tf.Graph()

    with graph.as_default():

        with tf.name_scope('cls'):
            LABELS = tf.compat.v1.placeholder(dtype=tf.float32,
                                              shape=(None, ),
                                              name='labels')

        with tf.name_scope('questions'):
            embedding = tf.keras.layers.Embedding(processor.vocab_size + 1,
                                                  embedding_dim)
            q_gru = tf.keras.layers.GRU(units,
                                        return_state=True,
                                        return_sequences=True,
                                        recurrent_initializer='glorot_uniform')
            attention_q = Attention(units)
            QS = tf.compat.v1.placeholder(dtype=tf.float32,
                                          shape=(None, pad_max_len),
                                          name='encoded_questions')

        with tf.name_scope('images'):
            attention_img = Attention(units)
            IMGS = tf.compat.v1.placeholder(dtype=tf.float32,
                                            shape=(None, image_seq, 1024),
                                            name='imgs')

        # images
        img_encoded = tf.keras.layers.Dense(embedding_dim)(IMGS)

        # use last state from question encoding for attention input
        # (batch_size, seq_length, embedding_dim)
        q_encoded = embedding(QS)
        q_outputs, q_state = q_gru(q_encoded)

        # image attention
        context2, _ = attention_img(img_encoded, q_state)

        # questions
        context1, WEIGHTS = attention_q(q_outputs, context2)

        # classification
        x = tf.concat([context1, context2], axis=-1)
        x = tf.keras.layers.Dense(1024)(x)
        x = tf.keras.layers.Dense(1024)(x)
        PRED = tf.keras.layers.Dense(3)(x)

        COST = tf.keras.losses.sparse_categorical_crossentropy(
            LABELS, PRED, from_logits=True, axis=-1)
        LOSS = tf.reduce_mean(COST)

        OPT = tf.compat.v1.train.GradientDescentOptimizer(
            learning_rate=learning_rate).minimize(LOSS)

    with tf.compat.v1.Session(graph=graph) as sess:

        sess.run(tf.compat.v1.global_variables_initializer())
        set_session(sess)

        if DEBUG:
            trainables = sess.run(tf.compat.v1.trainable_variables())
            print('Total trainables:', len(trainables))

        for epoch in range(1, epochs + 1):
            epoch_start = time.time()
            print('=====' * 10)
            print('  Epochs:', epoch)
            print('=====' * 10)

            batch_start = time.time()

            random.shuffle(train)

            for batch, (questions, labels, img_features) \
                    in enumerate(data_generator(train, batch_size=batch_size)):
                st = time.time()

                _, loss, cost, pred, weights = sess.run(
                    [OPT, LOSS, COST, PRED, WEIGHTS],
                    feed_dict={
                        QS: questions,
                        LABELS: labels,
                        IMGS: img_features
                    })

                end_calc = time.time()

                if DEBUG:
                    if batch % display_step == 0:
                        print('[DEBUG] Batch: {}'.format(batch))
                        #print('[DEBUG] Average weights :'.format(batch))
                        #for layer in model.layers:
                        #    print('Layer:', model.name + ':' + layer.name)
                        #    print('Weights:')
                        #    print('  mean:', np.mean(layer.get_weights()[0]))
                        #    print('   std:', np.std(layer.get_weights()[0]))
                        #    print()
                        print('[DEBUG] Prediction:')
                        #print('   values:\n', pred.reshape(-1, 3))
                        print('   Pred:\n    ', np.argmax(pred, axis=-1))
                        print('  Label:\n    ', labels.reshape(-1, ))
                        print('[DEBUG] Weights/Question:')
                        print(weights[0].reshape(-1))
                        print(*[
                            processor.index_word[q] for q in questions[0]
                            if q > 0
                        ])
                        print()

                acc = calculate_accuracy(pred, labels)

                if batch % display_step == 0:
                    print('    Batch -', batch)
                    print(
                        '      Train:  Loss - {:.4f}  Acc - {:.4f}  Time(calc) - {:.4f}s/batch  Time(total) - {:.4f}s/batch'
                        .format(loss, acc, end_calc - st,
                                time.time() - batch_start))

                batch_start = time.time()

            # after finished training in each epoch
            # evaluate model by validation dataset
            loss_val = 0
            acc_val = 0
            st_val = time.time()
            #for q_val, l_val, i_val in data_generator(val, batch_size=batch_size):
            # TODO:test
            for q_val, l_val, i_val in data_generator(val,
                                                      batch_size=batch_size):
                _loss_val, pred_val = sess.run([LOSS, PRED],
                                               feed_dict={
                                                   QS: q_val,
                                                   LABELS: l_val,
                                                   IMGS: i_val
                                               })
                l_val = l_val.ravel()
                loss_val += _loss_val
                acc_val += calculate_accuracy(pred_val, l_val)

            loss_val /= step_per_val
            acc_val /= step_per_val

            end_val = time.time()

            print()
            print(
                '      Validation(approx.): Loss - {:.4f}  Acc - {:.4f}  Time - {:.4f}s'
                .format(loss_val, acc_val, end_val - st_val))
            print('  Total time per epoch: {:.4f}s'.format(time.time() -
                                                           epoch_start))
            print()