Beispiel #1
0
def test_finetune_last():
    """ finetuning using 'last'.
    """
    dataset_path = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(dataset_path, vocab)
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
    print(model)
    model, acc = finetune(model,
                          data['texts'],
                          data['labels'],
                          nb_classes,
                          data['batch_size'],
                          method='last',
                          nb_epochs=1)

    print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc))

    assert acc >= min_acc
Beispiel #2
0
def test_finetune_full():
    """ finetuning using 'full'.
    """
    DATASET_PATH = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results
    # We reduce the min accuracy needed here to pass the test
    # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)
    print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes,
                               PRETRAINED_PATH,
                               extend_embedding=data['added'])
    print(model)
    model, acc = finetune(model,
                          data['texts'],
                          data['labels'],
                          nb_classes,
                          data['batch_size'],
                          method='full',
                          nb_epochs=1)

    print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc))
    assert acc >= min_acc
Beispiel #3
0
def test_change_trainable():
    """ change_trainable() changes trainability of layers.
    """
    model = torchmoji_transfer(5)
    change_trainable(model.embed, False)
    assert not any(p.requires_grad for p in model.embed.parameters())
    change_trainable(model.embed, True)
    assert all(p.requires_grad for p in model.embed.parameters())
Beispiel #4
0
def test_torchmoji_transfer_extend_embedding():
    """ Defining torchmoji with extension.
    """
    extend_with = 50
    model = torchmoji_transfer(5,
                               weight_path=PRETRAINED_PATH,
                               extend_embedding=extend_with)
    embedding_layer = model.embed
    assert embedding_layer.weight.size()[0] == NB_TOKENS + extend_with
Beispiel #5
0
def test_freeze_layers():
    """ Correct layers are frozen.
    """
    model = torchmoji_transfer(5)
    keyword = 'output_layer'

    model = freeze_layers(model, unfrozen_keyword=keyword)

    for name, module in model.named_children():
        trainable = keyword.lower() in name.lower()
        assert all(p.requires_grad == trainable for p in module.parameters())
Beispiel #6
0
from __future__ import print_function
import json
from torchMoji.torchmoji.model_def import torchmoji_transfer
from torchMoji.torchmoji.global_variables import PRETRAINED_PATH
from torchMoji.torchmoji.finetuning import (load_benchmark, finetune)

DATASET_PATH = '../data/kaggle-insults/raw.pickle'
nb_classes = 2

with open('../model/vocabulary.json', 'r') as f:
    vocab = json.load(f)

# Load dataset. Extend the existing vocabulary with up to 10000 tokens from
# the training dataset.
data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)

# Set up model and finetune. Note that we have to extend the embedding layer
# with the number of tokens added to the vocabulary.
model = torchmoji_transfer(nb_classes,
                           PRETRAINED_PATH,
                           extend_embedding=data['added'])
print(model)
model, acc = finetune(model,
                      data['texts'],
                      data['labels'],
                      nb_classes,
                      data['batch_size'],
                      method='chain-thaw')
print('Acc: {}'.format(acc))
Beispiel #7
0
   vocabulary and do not extend the embedding layer.
1) Freeze all layers except for the softmax layer.
2) Train.
"""

from __future__ import print_function
import json
from torchMoji.torchmoji.model_def import torchmoji_transfer
from torchMoji.torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH, ROOT_PATH
from torchMoji.torchmoji.finetuning import (load_benchmark, finetune)

DATASET_PATH = '{}/data/SS-Youtube/raw.pickle'.format(ROOT_PATH)
nb_classes = 2

with open(VOCAB_PATH, 'r') as f:
    vocab = json.load(f)

# Load dataset.
data = load_benchmark(DATASET_PATH, vocab)

# Set up model and finetune
model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
print(model)
model, acc = finetune(model,
                      data['texts'],
                      data['labels'],
                      nb_classes,
                      data['batch_size'],
                      method='last')
print('Acc: {}'.format(acc))
Beispiel #8
0
        elif FINETUNE_METHOD in ['new', 'full', 'chain-thaw']:
            extend_with = 10000
        else:
            raise ValueError('Finetuning method not recognised!')

        # Load dataset.
        data = load_benchmark(path, vocab, extend_with=extend_with)

        (X_train, y_train) = (data['texts'][0], data['labels'][0])
        (X_val, y_val) = (data['texts'][1], data['labels'][1])
        (X_test, y_test) = (data['texts'][2], data['labels'][2])

        weight_path = PRETRAINED_PATH if FINETUNE_METHOD != 'new' else None
        nb_model_classes = 2 if use_f1_score else nb_classes
        model = torchmoji_transfer(nb_model_classes,
                                   weight_path,
                                   extend_embedding=data['added'])
        print(model)

        # Training
        print('Training: {}'.format(path))
        if use_f1_score:
            model, result = class_avg_finetune(model,
                                               data['texts'],
                                               data['labels'],
                                               nb_classes,
                                               data['batch_size'],
                                               FINETUNE_METHOD,
                                               verbose=VERBOSE)
        else:
            model, result = finetune(model,