Ejemplo n.º 1
0
def test_tokenizer_type_error():
    """Test if tokenizer neither hf nor string"""
    tok = lambda x: x
    dataset = LanguageModelingDataset(texts, tok)  # noqa: F841
Ejemplo n.º 2
0
def test_tokenizer_tokenizer():
    """Test initialization with tokenizer"""
    tok = AutoTokenizer.from_pretrained("bert-base-uncased")
    dataset = LanguageModelingDataset(texts, tok)
    assert dataset[0] is not None
    assert len(dataset) == 2
Ejemplo n.º 3
0
def test_exception_with_sort():
    """Test lazy=True sort=True case"""
    tok = AutoTokenizer.from_pretrained("bert-base-uncased")
    dataset = LanguageModelingDataset(  # noqa: F841
        texts, tok, lazy=True, sort=True)
Ejemplo n.º 4
0
def test_tokenizer_str():
    """Test initialization with string"""
    dataset = LanguageModelingDataset(texts, "bert-base-uncased")
    assert dataset[0] is not None
    assert len(dataset) == 2