コード例 #1
0
import pytest
from sciwing.modules.embedders.elmo_embedder import ElmoEmbedder
from sciwing.utils.common import get_system_mem_in_gb

mem_gb = get_system_mem_in_gb()
mem_gb = int(mem_gb)


@pytest.fixture
def setup_elmo_embedder():
    elmo_embedder = ElmoEmbedder()
    instances = [
        "I like to test elmo".split(), "Elmo context embedder".split()
    ]
    return elmo_embedder, instances


@pytest.mark.skipif(
    mem_gb < 16, reason="skipping ELMO embedder because system memory is low")
class TestElmoEmbedder:
    def test_elmo_embedder_dimensions(self, setup_elmo_embedder):
        elmo_embedder, instances = setup_elmo_embedder
        embedding = elmo_embedder(instances)
        assert embedding.size() == (len(instances), 5, 1024)
コード例 #2
0
bert_types = [
    "bert-base-uncased",
    "bert-base-cased",
    "scibert-base-cased",
    "scibert-sci-cased",
    "scibert-base-uncased",
    "scibert-sci-uncased",
    "bert-large-uncased",
    "bert-large-cased",
]

aggregation_types = ["sum", "average"]

bert_type_aggregation = list(itertools.product(bert_types, aggregation_types))

system_memory = get_system_mem_in_gb()
system_memory = int(system_memory)


@pytest.fixture(params=bert_type_aggregation)
def setup_bert_embedder(request):
    dropout_value = 0.0
    bert_type, aggregation_type = request.param

    bert_embedder = BertEmbedder(
        dropout_value=dropout_value,
        aggregation_type=aggregation_type,
        bert_type=bert_type,
    )
    strings = [
        "Lets start by talking politics",
コード例 #3
0
import pytest
from sciwing.vocab.embedding_loader import EmbeddingLoader
from sciwing.vocab.vocab import Vocab
import os
from sciwing.utils.common import get_system_mem_in_gb


@pytest.fixture()
def setup_word_emb_loader():
    instances = [["load", "vocab"]]
    vocab = Vocab(instances=instances, max_num_tokens=1000)
    vocab.build_vocab()
    return vocab


memory_available = int(get_system_mem_in_gb())


@pytest.mark.skipif(
    memory_available < 16, reason="Memory is too low to run the word emb loader tests"
)
class TestWordEmbLoader:
    def test_invalid_embedding_type(self, setup_word_emb_loader):
        vocab = setup_word_emb_loader
        with pytest.raises(AssertionError):
            emb_loader = EmbeddingLoader(
                token2idx=vocab.get_token2idx_mapping(), embedding_type="notexistent"
            )

    @pytest.mark.parametrize(
        "embedding_type",
コード例 #4
0
import pytest
from sciwing.vocab.vocab import Vocab
import os
from sciwing.utils.common import get_system_mem_in_gb


@pytest.fixture
def instances():
    single_instance = [["i", "like", "nlp", "i", "i", "like"]]
    return {"single_instance": single_instance}


system_mem = int(get_system_mem_in_gb())


class TestVocab:
    def test_build_vocab_single_instance_has_words(self, instances):
        single_instance = instances["single_instance"]
        vocab_builder = Vocab(instances=single_instance, max_num_tokens=1000)
        vocab = vocab_builder.map_tokens_to_freq_idx()

        assert "i" in vocab.keys()
        assert "like" in vocab.keys()
        assert "nlp" in vocab.keys()

    def test_build_vocab_single_instance_descending_order(self, instances):
        single_instance = instances["single_instance"]
        vocab_builder = Vocab(instances=single_instance,
                              max_num_tokens=1000,
                              min_count=1)
        vocab = vocab_builder.map_tokens_to_freq_idx()