import pytest from sciwing.modules.embedders.elmo_embedder import ElmoEmbedder from sciwing.utils.common import get_system_mem_in_gb mem_gb = get_system_mem_in_gb() mem_gb = int(mem_gb) @pytest.fixture def setup_elmo_embedder(): elmo_embedder = ElmoEmbedder() instances = [ "I like to test elmo".split(), "Elmo context embedder".split() ] return elmo_embedder, instances @pytest.mark.skipif( mem_gb < 16, reason="skipping ELMO embedder because system memory is low") class TestElmoEmbedder: def test_elmo_embedder_dimensions(self, setup_elmo_embedder): elmo_embedder, instances = setup_elmo_embedder embedding = elmo_embedder(instances) assert embedding.size() == (len(instances), 5, 1024)
bert_types = [ "bert-base-uncased", "bert-base-cased", "scibert-base-cased", "scibert-sci-cased", "scibert-base-uncased", "scibert-sci-uncased", "bert-large-uncased", "bert-large-cased", ] aggregation_types = ["sum", "average"] bert_type_aggregation = list(itertools.product(bert_types, aggregation_types)) system_memory = get_system_mem_in_gb() system_memory = int(system_memory) @pytest.fixture(params=bert_type_aggregation) def setup_bert_embedder(request): dropout_value = 0.0 bert_type, aggregation_type = request.param bert_embedder = BertEmbedder( dropout_value=dropout_value, aggregation_type=aggregation_type, bert_type=bert_type, ) strings = [ "Lets start by talking politics",
import pytest from sciwing.vocab.embedding_loader import EmbeddingLoader from sciwing.vocab.vocab import Vocab import os from sciwing.utils.common import get_system_mem_in_gb @pytest.fixture() def setup_word_emb_loader(): instances = [["load", "vocab"]] vocab = Vocab(instances=instances, max_num_tokens=1000) vocab.build_vocab() return vocab memory_available = int(get_system_mem_in_gb()) @pytest.mark.skipif( memory_available < 16, reason="Memory is too low to run the word emb loader tests" ) class TestWordEmbLoader: def test_invalid_embedding_type(self, setup_word_emb_loader): vocab = setup_word_emb_loader with pytest.raises(AssertionError): emb_loader = EmbeddingLoader( token2idx=vocab.get_token2idx_mapping(), embedding_type="notexistent" ) @pytest.mark.parametrize( "embedding_type",
import pytest from sciwing.vocab.vocab import Vocab import os from sciwing.utils.common import get_system_mem_in_gb @pytest.fixture def instances(): single_instance = [["i", "like", "nlp", "i", "i", "like"]] return {"single_instance": single_instance} system_mem = int(get_system_mem_in_gb()) class TestVocab: def test_build_vocab_single_instance_has_words(self, instances): single_instance = instances["single_instance"] vocab_builder = Vocab(instances=single_instance, max_num_tokens=1000) vocab = vocab_builder.map_tokens_to_freq_idx() assert "i" in vocab.keys() assert "like" in vocab.keys() assert "nlp" in vocab.keys() def test_build_vocab_single_instance_descending_order(self, instances): single_instance = instances["single_instance"] vocab_builder = Vocab(instances=single_instance, max_num_tokens=1000, min_count=1) vocab = vocab_builder.map_tokens_to_freq_idx()