コード例 #1
0
 def test_read_write_docs(self, tmpdir, spacy_doc):
     expected = [tok.lower_ for tok in spacy_doc]
     for ext in (".pkl", ".pkl.gz", ".pkl.bz2", ".pkl.xz"):
         filepath = str(tmpdir.join("test_read_write_spacy_docs" + ext))
         io.write_spacy_docs(spacy_doc, filepath, make_dirs=True)
         observed = [
             tok.lower_ for doc in io.read_spacy_docs(filepath)
             for tok in doc
         ]
         assert observed == expected
コード例 #2
0
ファイル: test_io.py プロジェクト: psds01/textacy
 def test_read_write_docs_binary(self, tmpdir, spacy_doc):
     expected = [tok.lower_ for tok in spacy_doc]
     filepath = str(tmpdir.join("test_read_write_spacy_docs_binary.bin"))
     io.write_spacy_docs(spacy_doc, filepath, True, format="binary")
     with pytest.raises(ValueError):
         next(io.read_spacy_docs(filepath, format="binary", lang=None))
     observed = [
         tok.lower_
         for doc in io.read_spacy_docs(filepath, format="binary", lang="en")
         for tok in doc
     ]
     assert observed == expected
コード例 #3
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_docs_binary_exclude(self, tmpdir, spacy_doc):
     expected = [tok.lower_ for tok in spacy_doc]
     filepath = str(tmpdir.join("test_read_write_spacy_docs_binary_exclude.bin"))
     io.write_spacy_docs(
         spacy_doc, filepath, True,
         format="binary", exclude=["sentiment", "user_data"],
     )
     observed = [
         tok.lower_
         for doc in io.read_spacy_docs(filepath, format="binary", lang="en")
         for tok in doc
     ]
     assert observed == expected
コード例 #4
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_docs(self, tmpdir, spacy_doc):
     expected = [tok.lower_ for tok in spacy_doc]
     for ext in (".pkl", ".pkl.gz", ".pkl.bz2", ".pkl.xz"):
         filepath = str(tmpdir.join("test_read_write_spacy_docs" + ext))
         if compat.PY2 is True and ext == ".pkl.xz":
             with pytest.raises(ValueError):
                 io.open_sesame(filepath, mode="wb", encoding=None, make_dirs=True)
         else:
             io.write_spacy_docs(spacy_doc, filepath, True)
             observed = [
                 tok.lower_ for doc in io.read_spacy_docs(filepath) for tok in doc
             ]
             assert observed == expected
コード例 #5
0
def test_read_write_spacy_docs(tmpdir, spacy_doc):
    expected = [tok.lemma_ for tok in spacy_doc]
    for ext in ('.pkl', '.pkl.gz', '.pkl.bz2', '.pkl.xz'):
        filename = str(tmpdir.join('test_read_write_spacy_docs' + ext))
        if compat.is_python2 is True and ext == '.pkl.xz':
            with pytest.raises(ValueError):
                io.open_sesame(
                    filename, mode='wb', encoding=None, make_dirs=True)
        else:
            io.write_spacy_docs(spacy_doc, filename, True)
            observed = [
                tok.lemma_
                for doc in io.read_spacy_docs(filename)
                for tok in doc]
            assert observed == expected
コード例 #6
0
ファイル: test_io.py プロジェクト: dbragdon1/textacy
 def test_read_write_docs_binary_attrs(self, tmpdir, spacy_doc):
     expected = [tok.tag_ for tok in spacy_doc]
     filepath = str(
         tmpdir.join("test_read_write_spacy_docs_binary_exclude.bin"))
     io.write_spacy_docs(
         spacy_doc,
         filepath,
         make_dirs=True,
         format="binary",
         attrs=["ORTH", "TAG"],
         store_user_data=False,
     )
     observed = [
         tok.tag_ for doc in io.read_spacy_docs(
             filepath, format="binary", lang="en_core_web_sm")
         for tok in doc
     ]
     assert observed == expected