Beispiel #1
0
def test_encoder_graph_dimensions(dataset, mod_attr, graph_attr):
    color_seqs, word_seqs, vocab = dataset
    mod = ContextualColorDescriber(vocab, hidden_dim=5, max_iter=1)
    mod.fit(color_seqs, word_seqs)
    mod_attr_val = getattr(mod, mod_attr)
    graph_attr_val = getattr(mod.model.encoder.rnn, graph_attr)
    assert mod_attr_val == graph_attr_val
Beispiel #2
0
def test_predict_functions_honor_device(dataset, func):
    color_seqs, word_seqs, vocab = dataset
    mod = ContextualColorDescriber(vocab, max_iter=2)
    mod.fit(color_seqs, word_seqs)
    prediction_func = getattr(mod, func)
    with pytest.raises(RuntimeError):
        if func == "predict":
            prediction_func(color_seqs, device="FAKE_DEVICE")
        else:
            prediction_func(color_seqs, word_seqs, device="FAKE_DEVICE")
Beispiel #3
0
def test_embedding_update_control(dataset, freeze, outcome):
    color_seqs, word_seqs, vocab = dataset
    embed_dim = 5
    embedding = np.ones((len(vocab), embed_dim))
    mod = ContextualColorDescriber(vocab,
                                   max_iter=10,
                                   embedding=embedding,
                                   freeze_embedding=freeze)
    mod.fit(color_seqs, word_seqs)
    graph_emb = mod.model.decoder.embedding.weight.detach().cpu().numpy()
    assert np.array_equal(embedding, graph_emb) == outcome
Beispiel #4
0
def test_pretrained_embedding(dataset):
    color_seqs, word_seqs, vocab = dataset
    embed_dim = 5
    embedding = np.ones((len(vocab), embed_dim))
    mod = ContextualColorDescriber(vocab,
                                   max_iter=1,
                                   embedding=embedding,
                                   freeze_embedding=True)
    mod.fit(color_seqs, word_seqs)
    graph_emb = mod.model.decoder.embedding.weight.detach().cpu().numpy()
    assert np.array_equal(embedding, graph_emb)
Beispiel #5
0
def test_predict_restores_device(dataset, func):
    color_seqs, word_seqs, vocab = dataset
    mod = ContextualColorDescriber(vocab, max_iter=2)
    mod.fit(color_seqs, word_seqs)
    current_device = mod.device
    assert current_device != torch.device("cpu:0")
    prediction_func = getattr(mod, func)
    if func == "predict":
        prediction_func(color_seqs, device="cpu:0")
    else:
        prediction_func(color_seqs, word_seqs, device="cpu:0")
    assert mod.device == current_device
Beispiel #6
0
def test_torch_color_describer_save_load(dataset):
    color_seqs, word_seqs, vocab = dataset
    mod = ContextualColorDescriber(vocab,
                                   embed_dim=10,
                                   hidden_dim=10,
                                   max_iter=100,
                                   embedding=None)
    mod.fit(color_seqs, word_seqs)
    mod.predict(color_seqs)
    with tempfile.NamedTemporaryFile(mode='wb') as f:
        name = f.name
        mod.to_pickle(name)
        mod2 = ContextualColorDescriber.from_pickle(name)
        mod2.predict(color_seqs)
        mod2.fit(color_seqs, word_seqs)
Beispiel #7
0
# ## Initial model
#
# The first model is configured right now to be a small model run for just a few iterations. It should be enough to get traction, but it's unlikely to be a great model. You are free to modify this configuration if you wish; it is here just for demonstration and testing:

# In[ ]:

dev_mod = ContextualColorDescriber(dev_vocab,
                                   embed_dim=10,
                                   hidden_dim=10,
                                   max_iter=5,
                                   batch_size=128)

# In[ ]:

_ = dev_mod.fit(dev_cols_train, dev_seqs_train)

# As discussed in [colors_overview.ipynb](colors_overview.ipynb), our primary metric is `listener_accuracy`:

# In[ ]:

dev_mod.listener_accuracy(dev_cols_test, dev_seqs_test)

# We can also see the model's predicted sequences given color context inputs:

# In[ ]:

dev_mod.predict(dev_cols_test[:1])

# In[ ]:
Beispiel #8
0
def test_predict_proba(dataset):
    color_seqs, word_seqs, vocab = dataset
    mod = ContextualColorDescriber(vocab, max_iter=1)
    mod.fit(color_seqs, word_seqs)
    probs = mod.predict_proba(color_seqs, word_seqs)
    assert all(np.round(t.sum(), 6) == 1.0 for seq in probs for t in seq)
Beispiel #9
0
from colors import ColorsCorpusReader
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch_color_describer import (ContextualColorDescriber,
                                   create_example_dataset)
import utils
from utils import START_SYMBOL, END_SYMBOL, UNK_SYMBOL

tiny_contexts, tiny_words, tiny_vocab = create_example_dataset(group_size=3,
                                                               vec_dim=2)

toy_mod = ContextualColorDescriber(
    tiny_vocab,
    embedding=None,  # Option to supply a pretrained matrix as an `np.array`.
    embed_dim=10,
    hidden_dim=20,
    max_iter=100,
    eta=0.01,
    optimizer=torch.optim.Adam,
    batch_size=128,
    l2_strength=0.0,
    warm_start=False,
    device=None)

_ = toy_mod.fit(tiny_contexts, tiny_words)

metric = toy_mod.listener_accuracy(tiny_contexts, tiny_words)
print("listener_accuracy:", metric)
toy_mod = ContextualColorDescriber(
    toy_vocab,
    embedding=None,  # Option to supply a pretrained matrix as an `np.array`.
    embed_dim=10,
    hidden_dim=10,
    max_iter=100,
    eta=0.01,
    optimizer=torch.optim.Adam,
    batch_size=128,
    l2_strength=0.0,
    warm_start=False,
    device=None)

# In[31]:

_ = toy_mod.fit(toy_color_seqs_train, toy_word_seqs_train)

# ### Predicting sequences

# The `predict` method takes a list of color contexts as input and returns model descriptions:

# In[32]:

toy_preds = toy_mod.predict(toy_color_seqs_test)

# In[33]:

toy_preds[0]

# We can then check that we predicted all correct sequences: