Example #1
0
def test_finetune_last():
    """ finetuning using 'last'.
    """
    dataset_path = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(dataset_path, vocab)
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
    print(model)
    model, acc = finetune(model,
                          data['texts'],
                          data['labels'],
                          nb_classes,
                          data['batch_size'],
                          method='last',
                          nb_epochs=1)

    print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc))

    assert acc >= min_acc
Example #2
0
def test_finetune_full():
    """ finetuning using 'full'.
    """
    DATASET_PATH = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results
    # We reduce the min accuracy needed here to pass the test
    # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)
    print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes,
                               PRETRAINED_PATH,
                               extend_embedding=data['added'])
    print(model)
    model, acc = finetune(model,
                          data['texts'],
                          data['labels'],
                          nb_classes,
                          data['batch_size'],
                          method='full',
                          nb_epochs=1)

    print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc))
    assert acc >= min_acc
Example #3
0
def test_finetune_last():
    """ finetuning using 'last'.
    """
    dataset_path = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(dataset_path, vocab)
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
    print(model)
    model, acc = finetune(model, data['texts'], data['labels'], nb_classes,
                          data['batch_size'], method='last', nb_epochs=1)

    print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc))

    assert acc >= min_acc
Example #4
0
def test_finetune_full():
    """ finetuning using 'full'.
    """
    DATASET_PATH = ROOT_PATH+'/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results
    # We reduce the min accuracy needed here to pass the test
    # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)
    print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added'])
    print(model)
    model, acc = finetune(model, data['texts'], data['labels'], nb_classes,
                          data['batch_size'], method='full', nb_epochs=1)

    print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc))
    assert acc >= min_acc
Example #5
0
                    nb_model_classes,
                    weight_path,
                    extend_embedding=data['added'])
        print(model)

        # Training
        print('Training: {}'.format(path))
        if use_f1_score:
            model, result = class_avg_finetune(model, data['texts'],
                                               data['labels'],
                                               nb_classes, data['batch_size'],
                                               FINETUNE_METHOD,
                                               verbose=VERBOSE)
        else:
            model, result = finetune(model, data['texts'], data['labels'],
                                     nb_classes, data['batch_size'],
                                     FINETUNE_METHOD, metric='acc',
                                     verbose=VERBOSE)

        # Write results
        if use_f1_score:
            print('Overall F1 score (dset = {}): {}'.format(dset, result))
            with open('{}/{}_{}_{}_results.txt'.
                      format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter),
                      "w") as f:
                f.write("F1: {}\n".format(result))
        else:
            print('Test accuracy (dset = {}): {}'.format(dset, result))
            with open('{}/{}_{}_{}_results.txt'.
                      format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter),
                      "w") as f:
                f.write("Acc: {}\n".format(result))
4) Unfreeze all layers and train entire model.
"""

from __future__ import print_function
import example_helper
import json
from torchmoji.model_def import torchmoji_transfer
from torchmoji.global_variables import PRETRAINED_PATH
from torchmoji.finetuning import (
     load_benchmark,
     finetune)


DATASET_PATH = '../data/kaggle-insults/raw.pickle'
nb_classes = 2

with open('../model/vocabulary.json', 'r') as f:
    vocab = json.load(f)

# Load dataset. Extend the existing vocabulary with up to 10000 tokens from
# the training dataset.
data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)

# Set up model and finetune. Note that we have to extend the embedding layer
# with the number of tokens added to the vocabulary.
model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added'])
print(model)
model, acc = finetune(model, data['texts'], data['labels'], nb_classes,
                      data['batch_size'], method='chain-thaw')
print('Acc: {}'.format(acc))
Example #7
0
        # Training
        print('Training: {}'.format(path))
        if use_f1_score:
            model, result = class_avg_finetune(model,
                                               data['texts'],
                                               data['labels'],
                                               nb_classes,
                                               data['batch_size'],
                                               FINETUNE_METHOD,
                                               verbose=VERBOSE)
        else:
            model, result = finetune(model,
                                     data['texts'],
                                     data['labels'],
                                     nb_classes,
                                     data['batch_size'],
                                     FINETUNE_METHOD,
                                     metric='acc',
                                     verbose=VERBOSE)

        # Write results
        if use_f1_score:
            print('Overall F1 score (dset = {}): {}'.format(dset, result))
            with open(
                    '{}/{}_{}_{}_results.txt'.format(RESULTS_DIR, dset,
                                                     FINETUNE_METHOD,
                                                     rerun_iter), "w") as f:
                f.write("F1: {}\n".format(result))
        else:
            print('Test accuracy (dset = {}): {}'.format(dset, result))
            with open(