Python torchmoji_transfer Examples, torchmoji.model_def.torchmoji_transfer Python Examples

Example #1

0

Show file

def test_finetune_full():
    """ finetuning using 'full'.
    """
    DATASET_PATH = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results
    # We reduce the min accuracy needed here to pass the test
    # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)
    print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes,
                               PRETRAINED_PATH,
                               extend_embedding=data['added'])
    print(model)
    model, acc = finetune(model,
                          data['texts'],
                          data['labels'],
                          nb_classes,
                          data['batch_size'],
                          method='full',
                          nb_epochs=1)

    print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc))
    assert acc >= min_acc

Example #2

0

Show file

def test_finetune_last():
    """ finetuning using 'last'.
    """
    dataset_path = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(dataset_path, vocab)
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
    print(model)
    model, acc = finetune(model,
                          data['texts'],
                          data['labels'],
                          nb_classes,
                          data['batch_size'],
                          method='last',
                          nb_epochs=1)

    print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc))

    assert acc >= min_acc

Example #3

0

Show file

def test_change_trainable():
    """ change_trainable() changes trainability of layers.
    """
    model = torchmoji_transfer(5)
    change_trainable(model.embed, False)
    assert not any(p.requires_grad for p in model.embed.parameters())
    change_trainable(model.embed, True)
    assert all(p.requires_grad for p in model.embed.parameters())

Example #4

0

Show file

File: test_finetuning.py Project: cclauss/torchMoji

def test_change_trainable():
    """ change_trainable() changes trainability of layers.
    """
    model = torchmoji_transfer(5)
    change_trainable(model.embed, False)
    assert not any(p.requires_grad for p in model.embed.parameters())
    change_trainable(model.embed, True)
    assert all(p.requires_grad for p in model.embed.parameters())

Example #5

0

Show file

File: test_finetuning.py Project: cclauss/torchMoji

def test_torchmoji_transfer_extend_embedding():
    """ Defining torchmoji with extension.
    """
    extend_with = 50
    model = torchmoji_transfer(5, weight_path=PRETRAINED_PATH,
                              extend_embedding=extend_with)
    embedding_layer = model.embed
    assert embedding_layer.weight.size()[0] == NB_TOKENS + extend_with

Example #6

0

Show file

def test_torchmoji_transfer_extend_embedding():
    """ Defining torchmoji with extension.
    """
    extend_with = 50
    model = torchmoji_transfer(5,
                               weight_path=PRETRAINED_PATH,
                               extend_embedding=extend_with)
    embedding_layer = model.embed
    assert embedding_layer.weight.size()[0] == NB_TOKENS + extend_with

Example #7

0

Show file

def test_freeze_layers():
    """ Correct layers are frozen.
    """
    model = torchmoji_transfer(5)
    keyword = 'output_layer'

    model = freeze_layers(model, unfrozen_keyword=keyword)

    for name, module in model.named_children():
        trainable = keyword.lower() in name.lower()
        assert all(p.requires_grad == trainable for p in module.parameters())

Example #8

0

Show file

File: test_finetuning.py Project: cclauss/torchMoji

def test_freeze_layers():
    """ Correct layers are frozen.
    """
    model = torchmoji_transfer(5)
    keyword = 'output_layer'

    model = freeze_layers(model, unfrozen_keyword=keyword)

    for name, module in model.named_children():
        trainable = keyword.lower() in name.lower()
        assert all(p.requires_grad == trainable for p in module.parameters())

Example #9

0

Show file

File: test_finetuning.py Project: cclauss/torchMoji

def test_finetune_last():
    """ finetuning using 'last'.
    """
    dataset_path = ROOT_PATH + '/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(dataset_path, vocab)
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
    print(model)
    model, acc = finetune(model, data['texts'], data['labels'], nb_classes,
                          data['batch_size'], method='last', nb_epochs=1)

    print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc))

    assert acc >= min_acc

Example #10

0

Show file

File: test_finetuning.py Project: cclauss/torchMoji

def test_finetune_full():
    """ finetuning using 'full'.
    """
    DATASET_PATH = ROOT_PATH+'/data/SS-Youtube/raw.pickle'
    nb_classes = 2
    # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results
    # We reduce the min accuracy needed here to pass the test
    # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11
    min_acc = 0.68

    with open(VOCAB_PATH, 'r') as f:
        vocab = json.load(f)

    data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)
    print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH))
    model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added'])
    print(model)
    model, acc = finetune(model, data['texts'], data['labels'], nb_classes,
                          data['batch_size'], method='full', nb_epochs=1)

    print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc))
    assert acc >= min_acc

Example #11

0

Show file

File: finetune_dataset.py Project: cclauss/torchMoji

        elif FINETUNE_METHOD in ['new', 'full', 'chain-thaw']:
            extend_with = 10000
        else:
            raise ValueError('Finetuning method not recognised!')

        # Load dataset.
        data = load_benchmark(path, vocab, extend_with=extend_with)

        (X_train, y_train) = (data['texts'][0], data['labels'][0])
        (X_val, y_val) = (data['texts'][1], data['labels'][1])
        (X_test, y_test) = (data['texts'][2], data['labels'][2])

        weight_path = PRETRAINED_PATH if FINETUNE_METHOD != 'new' else None
        nb_model_classes = 2 if use_f1_score else nb_classes
        model = torchmoji_transfer(
                    nb_model_classes,
                    weight_path,
                    extend_embedding=data['added'])
        print(model)

        # Training
        print('Training: {}'.format(path))
        if use_f1_score:
            model, result = class_avg_finetune(model, data['texts'],
                                               data['labels'],
                                               nb_classes, data['batch_size'],
                                               FINETUNE_METHOD,
                                               verbose=VERBOSE)
        else:
            model, result = finetune(model, data['texts'], data['labels'],
                                     nb_classes, data['batch_size'],
                                     FINETUNE_METHOD, metric='acc',

Example #12

0

Show file

File: finetune_sentence_last.py Project: mwakaba2/torchMoji

from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH, ROOT_PATH

DATASET_PATH = '{}/data/Sentence/sentenceSentiment_small.pickle'.format(
    ROOT_PATH)
nb_classes = 3

with open(VOCAB_PATH, 'r') as f:
    vocab = json.load(f)

# Load dataset. Extend the existing vocabulary with up to 10000 tokens from
# the training dataset.
data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)

# Set up model and finetune. Note that we have to extend the embedding layer
# with the number of tokens added to the vocabulary.
#
# Also note that when using class average F1 to evaluate, the model has to be
# defined with two classes, since the model will be trained for each class
# separately.
model = torchmoji_transfer(2, PRETRAINED_PATH, extend_embedding=data['added'])
print(model)

# For finetuning however, pass in the actual number of classes.
model, f1 = class_avg_finetune(model,
                               data['texts'],
                               data['labels'],
                               nb_classes,
                               data['batch_size'],
                               method='last')
print('F1: {}'.format(f1))

Example #13

0

Show file

File: finetune_insults_chain-thaw.py Project: cclauss/torchMoji

4) Unfreeze all layers and train entire model.
"""

from __future__ import print_function
import example_helper
import json
from torchmoji.model_def import torchmoji_transfer
from torchmoji.global_variables import PRETRAINED_PATH
from torchmoji.finetuning import (
     load_benchmark,
     finetune)


DATASET_PATH = '../data/kaggle-insults/raw.pickle'
nb_classes = 2

with open('../model/vocabulary.json', 'r') as f:
    vocab = json.load(f)

# Load dataset. Extend the existing vocabulary with up to 10000 tokens from
# the training dataset.
data = load_benchmark(DATASET_PATH, vocab, extend_with=10000)

# Set up model and finetune. Note that we have to extend the embedding layer
# with the number of tokens added to the vocabulary.
model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added'])
print(model)
model, acc = finetune(model, data['texts'], data['labels'], nb_classes,
                      data['batch_size'], method='chain-thaw')
print('Acc: {}'.format(acc))

Example #14

0

Show file

File: finetune_youtube_last.py Project: cclauss/torchMoji

The 'last' method does the following:
0) Load all weights except for the softmax layer. Do not add tokens to the
   vocabulary and do not extend the embedding layer.
1) Freeze all layers except for the softmax layer.
2) Train.
"""

from __future__ import print_function
import example_helper
import json
from torchmoji.model_def import torchmoji_transfer
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH, ROOT_PATH
from torchmoji.finetuning import (
     load_benchmark,
     finetune)

DATASET_PATH = '{}/data/SS-Youtube/raw.pickle'.format(ROOT_PATH)
nb_classes = 2

with open(VOCAB_PATH, 'r') as f:
    vocab = json.load(f)

# Load dataset.
data = load_benchmark(DATASET_PATH, vocab)

# Set up model and finetune
model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
print(model)
model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='last')
print('Acc: {}'.format(acc))

Example #15

0

Show file

        elif FINETUNE_METHOD in ['new', 'full', 'chain-thaw']:
            extend_with = 10000
        else:
            raise ValueError('Finetuning method not recognised!')

        # Load dataset.
        data = load_benchmark(path, vocab, extend_with=extend_with)

        (X_train, y_train) = (data['texts'][0], data['labels'][0])
        (X_val, y_val) = (data['texts'][1], data['labels'][1])
        (X_test, y_test) = (data['texts'][2], data['labels'][2])

        weight_path = PRETRAINED_PATH if FINETUNE_METHOD != 'new' else None
        nb_model_classes = 2 if use_f1_score else nb_classes
        model = torchmoji_transfer(nb_model_classes,
                                   data['maxlen'],
                                   weight_path,
                                   extend_embedding=data['added'])
        model.summary()

        # Training
        print('Training: {}'.format(path))
        if use_f1_score:
            model, result = class_avg_finetune(model,
                                               data['texts'],
                                               data['labels'],
                                               nb_classes,
                                               data['batch_size'],
                                               FINETUNE_METHOD,
                                               verbose=VERBOSE)
        else:
            model, result = finetune(model,

Example #16

0

Show file

1) Freeze all layers except for the softmax layer.
2) Train.
"""

from __future__ import print_function
import example_helper
import json
from torchmoji.model_def import torchmoji_transfer
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH, ROOT_PATH
from torchmoji.finetuning import (load_benchmark, finetune)

DATASET_PATH = '{}/data/SS-Youtube/raw.pickle'.format(ROOT_PATH)
nb_classes = 2

with open(VOCAB_PATH, 'r') as f:
    vocab = json.load(f)

# Load dataset.
data = load_benchmark(DATASET_PATH, vocab)

# Set up model and finetune
model = torchmoji_transfer(nb_classes, PRETRAINED_PATH)
print(model)
model, acc = finetune(model,
                      data['texts'],
                      data['labels'],
                      nb_classes,
                      data['batch_size'],
                      method='last')
print('Acc: {}'.format(acc))