コード例 #1
0
ファイル: test_io.py プロジェクト: psds01/textacy
 def test_read_write_bytes_lines(self, tmpdir, spacy_doc):
     expected = [{
         "idx": i,
         "sent": sent.text
     } for i, sent in enumerate(spacy_doc.sents)]
     for ext in (".json", ".json.gz", ".json.bz2", ".json.xz"):
         filepath = str(
             tmpdir.join("test_read_write_json_lines_bytes" + ext))
         if compat.PY2 is True:
             if ext == ".json.xz":
                 with pytest.raises(ValueError):
                     io.open_sesame(filepath,
                                    mode="wb",
                                    encoding="utf-8",
                                    make_dirs=True)
             else:
                 io.write_json(expected,
                               filepath,
                               mode="wb",
                               make_dirs=True,
                               lines=True)
                 observed = list(
                     io.read_json(filepath, mode="rb", lines=True))
                 assert observed == expected
         else:
             with pytest.raises(TypeError):
                 io.write_json(
                     expected,
                     filepath,
                     mode="wb",
                     encoding=None,
                     make_dirs=True,
                     lines=True,
                 )
コード例 #2
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_unicode(self, tmpdir):
     expected = TEXT
     for ext in (".txt", ".gz", ".bz2", ".xz"):
         filepath = str(tmpdir.join("test_read_write_file_unicode" + ext))
         if compat.PY2 is True and ext != ".txt":
             with pytest.raises(ValueError):
                 io.open_sesame(filepath, mode="wt", encoding="utf-8", make_dirs=True)
         else:
             io.write_text(expected, filepath, mode="wt", make_dirs=True)
             observed = next(io.read_text(filepath, mode="rt"))
             assert observed == expected
コード例 #3
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_unicode(self, tmpdir, spacy_doc):
     expected = [{"idx": i, "sent": sent.text} for i, sent in enumerate(spacy_doc.sents)]
     for ext in (".json", ".json.gz", ".json.bz2", ".json.xz"):
         filepath = str(tmpdir.join("test_read_write_json_unicode" + ext))
         if compat.PY2 is True and ext != ".json":
             with pytest.raises(ValueError):
                 io.open_sesame(filepath, mode="wt", encoding=None, make_dirs=True)
         else:
             io.write_json(expected, filepath, mode="wt", make_dirs=True)
             observed = next(io.read_json(filepath, mode="rt", lines=False))
             assert observed == expected
コード例 #4
0
def test_read_write_text_unicode(tmpdir):
    expected = TEXT
    for ext in ('.txt', '.gz', '.bz2', '.xz'):
        filename = str(tmpdir.join('test_read_write_file_unicode' + ext))
        if compat.is_python2 is True and ext != '.txt':
            with pytest.raises(ValueError):
                io.open_sesame(
                    filename, mode='wt', encoding='utf-8', make_dirs=True)
        else:
            io.write_text(expected, filename, mode='wt', make_dirs=True)
            observed = next(io.read_text(filename, mode='rt'))
            assert observed == expected
コード例 #5
0
def test_read_write_csv_compressed(tmpdir):
    expected = [['this is some text', 'scandal', 42.0],
                ["here's some more text: boom!", 'escándalo', 1.0]]
    for ext in ('.csv', '.csv.gz', '.csv.bz2', '.csv.xz'):
        filename = str(tmpdir.join('test_read_write_csv' + ext))
        if compat.is_python2 is True and ext != '.csv':
            with pytest.raises(ValueError):
                io.open_sesame(
                    filename, mode='wt', encoding=None, make_dirs=True)
        else:
            io.write_csv(expected, filename, make_dirs=True)
            observed = list(io.read_csv(filename))
            assert observed == expected
コード例 #6
0
def test_read_write_json_lines_unicode(tmpdir, spacy_doc):
    expected = [{'idx': i, 'sent': sent.text}
                for i, sent in enumerate(spacy_doc.sents)]
    for ext in ('.json', '.json.gz', '.json.bz2', '.json.xz'):
        filename = str(tmpdir.join('test_read_write_json_lines_unicode' + ext))
        if compat.is_python2 is True and ext != '.json':
            with pytest.raises(ValueError):
                io.open_sesame(
                    filename, mode='wt', encoding=None, make_dirs=True)
        else:
            io.write_json(expected, filename, mode='wt', make_dirs=True, lines=True)
            observed = list(io.read_json(filename, mode='rt', lines=True))
            assert observed == expected
コード例 #7
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_unicode_lines(self, tmpdir, spacy_doc):
     expected = [sent.text for sent in spacy_doc.sents]
     for ext in (".txt", ".gz", ".bz2", ".xz"):
         filepath = str(tmpdir.join("test_read_write_file_lines_unicode" + ext))
         if compat.PY2 is True and ext != ".txt":
             with pytest.raises(ValueError):
                 io.open_sesame(filepath, mode="wt", encoding=None, make_dirs=True)
         else:
             io.write_text(expected, filepath, mode="wt", make_dirs=True, lines=True)
             observed = [
                 line.strip() for line in io.read_text(filepath, mode="rt", lines=True)
             ]
             assert observed == expected
コード例 #8
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_docs(self, tmpdir, spacy_doc):
     expected = [tok.lower_ for tok in spacy_doc]
     for ext in (".pkl", ".pkl.gz", ".pkl.bz2", ".pkl.xz"):
         filepath = str(tmpdir.join("test_read_write_spacy_docs" + ext))
         if compat.PY2 is True and ext == ".pkl.xz":
             with pytest.raises(ValueError):
                 io.open_sesame(filepath, mode="wb", encoding=None, make_dirs=True)
         else:
             io.write_spacy_docs(spacy_doc, filepath, True)
             observed = [
                 tok.lower_ for doc in io.read_spacy_docs(filepath) for tok in doc
             ]
             assert observed == expected
コード例 #9
0
def test_read_write_text_lines_bytes(tmpdir, spacy_doc):
    expected = [compat.unicode_to_bytes(sent.text) for sent in spacy_doc.sents]
    for ext in ('.txt', '.gz', '.bz2', '.xz'):
        filename = str(tmpdir.join('test_read_write_file_lines_bytes' + ext))
        if compat.is_python2 is True and ext == '.xz':
            with pytest.raises(ValueError):
                io.open_sesame(
                    filename, mode='wb', encoding='utf-8', make_dirs=True)
        else:
            io.write_text(expected, filename, mode='wb', make_dirs=True, lines=True)
            observed = [
                line.strip()
                for line in io.read_text(filename, mode='rb', lines=True)]
            assert observed == expected
コード例 #10
0
ファイル: test_io.py プロジェクト: neilaconway/textacy
 def test_read_write_compressed(self, tmpdir):
     expected = [
         ["this is some text", "scandal", 42.0],
         ["here's some more text: boom!", "escándalo", 1.0],
     ]
     for ext in (".csv", ".csv.gz", ".csv.bz2", ".csv.xz"):
         filepath = str(tmpdir.join("test_read_write_csv" + ext))
         if compat.PY2 is True and ext != ".csv":
             with pytest.raises(ValueError):
                 io.open_sesame(filepath, mode="wt", encoding=None, make_dirs=True)
         else:
             io.write_csv(expected, filepath, make_dirs=True)
             observed = list(io.read_csv(filepath))
             assert observed == expected
コード例 #11
0
ファイル: test_io.py プロジェクト: yashchoubey/textacy
def test_read_write_text_bytes(tmpdir):
    expected = compat.unicode_to_bytes(TEXT)
    for ext in (".txt", ".gz", ".bz2", ".xz"):
        filename = str(tmpdir.join("test_read_write_file_bytes" + ext))
        if compat.is_python2 is True and ext == ".xz":
            with pytest.raises(ValueError):
                io.open_sesame(filename,
                               mode="wb",
                               encoding="utf-8",
                               make_dirs=True)
        else:
            io.write_text(expected, filename, mode="wb", make_dirs=True)
            observed = next(io.read_text(filename, mode="rb"))
            assert observed == expected
コード例 #12
0
def test_read_write_spacy_docs(tmpdir, spacy_doc):
    expected = [tok.lemma_ for tok in spacy_doc]
    for ext in ('.pkl', '.pkl.gz', '.pkl.bz2', '.pkl.xz'):
        filename = str(tmpdir.join('test_read_write_spacy_docs' + ext))
        if compat.is_python2 is True and ext == '.pkl.xz':
            with pytest.raises(ValueError):
                io.open_sesame(
                    filename, mode='wb', encoding=None, make_dirs=True)
        else:
            io.write_spacy_docs(spacy_doc, filename, True)
            observed = [
                tok.lemma_
                for doc in io.read_spacy_docs(filename)
                for tok in doc]
            assert observed == expected
コード例 #13
0
def test_read_write_json_bytes(tmpdir, spacy_doc):
    expected = [{'idx': i, 'sent': sent.text}
                for i, sent in enumerate(spacy_doc.sents)]
    for ext in ('.json', '.json.gz', '.json.bz2', '.json.xz'):
        filename = str(tmpdir.join('test_read_write_json_bytes' + ext))
        if compat.is_python2 is True:
            if ext == '.json.xz':
                with pytest.raises(ValueError):
                    io.open_sesame(
                        filename, mode='wb', encoding='utf-8', make_dirs=True)
            else:
                io.write_json(expected, filename, mode='wb', make_dirs=True)
                observed = next(io.read_json(filename, mode='rb', lines=False))
                assert observed == expected
        else:
            with pytest.raises(TypeError):
                io.write_json(expected, filename, 'wb', make_dirs=True)
コード例 #14
0
ファイル: test_io.py プロジェクト: yashchoubey/textacy
def test_read_write_text_lines_bytes(tmpdir, spacy_doc):
    expected = [compat.unicode_to_bytes(sent.text) for sent in spacy_doc.sents]
    for ext in (".txt", ".gz", ".bz2", ".xz"):
        filename = str(tmpdir.join("test_read_write_file_lines_bytes" + ext))
        if compat.is_python2 is True and ext == ".xz":
            with pytest.raises(ValueError):
                io.open_sesame(filename,
                               mode="wb",
                               encoding="utf-8",
                               make_dirs=True)
        else:
            io.write_text(expected,
                          filename,
                          mode="wb",
                          make_dirs=True,
                          lines=True)
            observed = [
                line.strip()
                for line in io.read_text(filename, mode="rb", lines=True)
            ]
            assert observed == expected
コード例 #15
0
 def test_unpack_archive(self, tmpdir):
     data = "Here's some text data to pack and unpack."
     fpath_txt = str(tmpdir.join("test_unpack_archive.txt"))
     with io.open_sesame(fpath_txt, mode="wt") as f:
         f.write(data)
     fpath_zip = str(tmpdir.join("test_unpack_archive.zip"))
     with zipfile.ZipFile(fpath_zip, "w") as f:
         f.write(fpath_txt)
     io.unpack_archive(fpath_zip, extract_dir=tmpdir)
     fpath_tar = str(tmpdir.join("test_unpack_archive.tar"))
     with tarfile.TarFile(fpath_tar, "w") as f:
         f.add(fpath_txt)
     io.unpack_archive(fpath_tar, extract_dir=tmpdir)
     io.unpack_archive(fpath_txt, extract_dir=tmpdir)