def test_corpus_apply(texts): c = Corpus({str(i): t for i, t in enumerate(texts)}) c_orig = c.copy() orig_doc_labels = c.doc_labels orig_doc_lengths = c.doc_lengths assert isinstance(c.apply(str.upper), Corpus) assert c.doc_labels == orig_doc_labels assert c.doc_lengths == orig_doc_lengths for dl, dt in c.items(): assert c_orig[dl].upper() == dt
def test_corpus_dict_methods(): c = Corpus() assert len(c) == 0 with pytest.raises(KeyError): x = c['x'] with pytest.raises(KeyError): c[1] = 'abc' with pytest.raises(KeyError): c[''] = 'abc' with pytest.raises(ValueError): c['d1'] = None c['d1'] = 'd1 text' assert len(c) == 1 assert 'd1' in c assert set(c.keys()) == {'d1'} assert c['d1'] == 'd1 text' c['d2'] = 'd2 text' assert len(c) == 2 for dl in c: assert dl in {'d1', 'd2'} assert set(c.keys()) == {'d1', 'd2'} for dl, dt in c.items(): assert dl in {'d1', 'd2'} assert c[dl] == dt with pytest.raises(KeyError): del c['d3'] del c['d1'] assert len(c) == 1 assert set(c.keys()) == {'d2'} del c['d2'] assert len(c) == 0 assert set(c.keys()) == set()