def test_read_write_bytes_lines(self, tmpdir, spacy_doc): expected = [{ "idx": i, "sent": sent.text } for i, sent in enumerate(spacy_doc.sents)] for ext in (".json", ".json.gz", ".json.bz2", ".json.xz"): filepath = str( tmpdir.join("test_read_write_json_lines_bytes" + ext)) if compat.PY2 is True: if ext == ".json.xz": with pytest.raises(ValueError): io.open_sesame(filepath, mode="wb", encoding="utf-8", make_dirs=True) else: io.write_json(expected, filepath, mode="wb", make_dirs=True, lines=True) observed = list( io.read_json(filepath, mode="rb", lines=True)) assert observed == expected else: with pytest.raises(TypeError): io.write_json( expected, filepath, mode="wb", encoding=None, make_dirs=True, lines=True, )
def test_read_write_unicode(self, tmpdir): expected = TEXT for ext in (".txt", ".gz", ".bz2", ".xz"): filepath = str(tmpdir.join("test_read_write_file_unicode" + ext)) if compat.PY2 is True and ext != ".txt": with pytest.raises(ValueError): io.open_sesame(filepath, mode="wt", encoding="utf-8", make_dirs=True) else: io.write_text(expected, filepath, mode="wt", make_dirs=True) observed = next(io.read_text(filepath, mode="rt")) assert observed == expected
def test_read_write_unicode(self, tmpdir, spacy_doc): expected = [{"idx": i, "sent": sent.text} for i, sent in enumerate(spacy_doc.sents)] for ext in (".json", ".json.gz", ".json.bz2", ".json.xz"): filepath = str(tmpdir.join("test_read_write_json_unicode" + ext)) if compat.PY2 is True and ext != ".json": with pytest.raises(ValueError): io.open_sesame(filepath, mode="wt", encoding=None, make_dirs=True) else: io.write_json(expected, filepath, mode="wt", make_dirs=True) observed = next(io.read_json(filepath, mode="rt", lines=False)) assert observed == expected
def test_read_write_text_unicode(tmpdir): expected = TEXT for ext in ('.txt', '.gz', '.bz2', '.xz'): filename = str(tmpdir.join('test_read_write_file_unicode' + ext)) if compat.is_python2 is True and ext != '.txt': with pytest.raises(ValueError): io.open_sesame( filename, mode='wt', encoding='utf-8', make_dirs=True) else: io.write_text(expected, filename, mode='wt', make_dirs=True) observed = next(io.read_text(filename, mode='rt')) assert observed == expected
def test_read_write_csv_compressed(tmpdir): expected = [['this is some text', 'scandal', 42.0], ["here's some more text: boom!", 'escándalo', 1.0]] for ext in ('.csv', '.csv.gz', '.csv.bz2', '.csv.xz'): filename = str(tmpdir.join('test_read_write_csv' + ext)) if compat.is_python2 is True and ext != '.csv': with pytest.raises(ValueError): io.open_sesame( filename, mode='wt', encoding=None, make_dirs=True) else: io.write_csv(expected, filename, make_dirs=True) observed = list(io.read_csv(filename)) assert observed == expected
def test_read_write_json_lines_unicode(tmpdir, spacy_doc): expected = [{'idx': i, 'sent': sent.text} for i, sent in enumerate(spacy_doc.sents)] for ext in ('.json', '.json.gz', '.json.bz2', '.json.xz'): filename = str(tmpdir.join('test_read_write_json_lines_unicode' + ext)) if compat.is_python2 is True and ext != '.json': with pytest.raises(ValueError): io.open_sesame( filename, mode='wt', encoding=None, make_dirs=True) else: io.write_json(expected, filename, mode='wt', make_dirs=True, lines=True) observed = list(io.read_json(filename, mode='rt', lines=True)) assert observed == expected
def test_read_write_unicode_lines(self, tmpdir, spacy_doc): expected = [sent.text for sent in spacy_doc.sents] for ext in (".txt", ".gz", ".bz2", ".xz"): filepath = str(tmpdir.join("test_read_write_file_lines_unicode" + ext)) if compat.PY2 is True and ext != ".txt": with pytest.raises(ValueError): io.open_sesame(filepath, mode="wt", encoding=None, make_dirs=True) else: io.write_text(expected, filepath, mode="wt", make_dirs=True, lines=True) observed = [ line.strip() for line in io.read_text(filepath, mode="rt", lines=True) ] assert observed == expected
def test_read_write_docs(self, tmpdir, spacy_doc): expected = [tok.lower_ for tok in spacy_doc] for ext in (".pkl", ".pkl.gz", ".pkl.bz2", ".pkl.xz"): filepath = str(tmpdir.join("test_read_write_spacy_docs" + ext)) if compat.PY2 is True and ext == ".pkl.xz": with pytest.raises(ValueError): io.open_sesame(filepath, mode="wb", encoding=None, make_dirs=True) else: io.write_spacy_docs(spacy_doc, filepath, True) observed = [ tok.lower_ for doc in io.read_spacy_docs(filepath) for tok in doc ] assert observed == expected
def test_read_write_text_lines_bytes(tmpdir, spacy_doc): expected = [compat.unicode_to_bytes(sent.text) for sent in spacy_doc.sents] for ext in ('.txt', '.gz', '.bz2', '.xz'): filename = str(tmpdir.join('test_read_write_file_lines_bytes' + ext)) if compat.is_python2 is True and ext == '.xz': with pytest.raises(ValueError): io.open_sesame( filename, mode='wb', encoding='utf-8', make_dirs=True) else: io.write_text(expected, filename, mode='wb', make_dirs=True, lines=True) observed = [ line.strip() for line in io.read_text(filename, mode='rb', lines=True)] assert observed == expected
def test_read_write_compressed(self, tmpdir): expected = [ ["this is some text", "scandal", 42.0], ["here's some more text: boom!", "escándalo", 1.0], ] for ext in (".csv", ".csv.gz", ".csv.bz2", ".csv.xz"): filepath = str(tmpdir.join("test_read_write_csv" + ext)) if compat.PY2 is True and ext != ".csv": with pytest.raises(ValueError): io.open_sesame(filepath, mode="wt", encoding=None, make_dirs=True) else: io.write_csv(expected, filepath, make_dirs=True) observed = list(io.read_csv(filepath)) assert observed == expected
def test_read_write_text_bytes(tmpdir): expected = compat.unicode_to_bytes(TEXT) for ext in (".txt", ".gz", ".bz2", ".xz"): filename = str(tmpdir.join("test_read_write_file_bytes" + ext)) if compat.is_python2 is True and ext == ".xz": with pytest.raises(ValueError): io.open_sesame(filename, mode="wb", encoding="utf-8", make_dirs=True) else: io.write_text(expected, filename, mode="wb", make_dirs=True) observed = next(io.read_text(filename, mode="rb")) assert observed == expected
def test_read_write_spacy_docs(tmpdir, spacy_doc): expected = [tok.lemma_ for tok in spacy_doc] for ext in ('.pkl', '.pkl.gz', '.pkl.bz2', '.pkl.xz'): filename = str(tmpdir.join('test_read_write_spacy_docs' + ext)) if compat.is_python2 is True and ext == '.pkl.xz': with pytest.raises(ValueError): io.open_sesame( filename, mode='wb', encoding=None, make_dirs=True) else: io.write_spacy_docs(spacy_doc, filename, True) observed = [ tok.lemma_ for doc in io.read_spacy_docs(filename) for tok in doc] assert observed == expected
def test_read_write_json_bytes(tmpdir, spacy_doc): expected = [{'idx': i, 'sent': sent.text} for i, sent in enumerate(spacy_doc.sents)] for ext in ('.json', '.json.gz', '.json.bz2', '.json.xz'): filename = str(tmpdir.join('test_read_write_json_bytes' + ext)) if compat.is_python2 is True: if ext == '.json.xz': with pytest.raises(ValueError): io.open_sesame( filename, mode='wb', encoding='utf-8', make_dirs=True) else: io.write_json(expected, filename, mode='wb', make_dirs=True) observed = next(io.read_json(filename, mode='rb', lines=False)) assert observed == expected else: with pytest.raises(TypeError): io.write_json(expected, filename, 'wb', make_dirs=True)
def test_read_write_text_lines_bytes(tmpdir, spacy_doc): expected = [compat.unicode_to_bytes(sent.text) for sent in spacy_doc.sents] for ext in (".txt", ".gz", ".bz2", ".xz"): filename = str(tmpdir.join("test_read_write_file_lines_bytes" + ext)) if compat.is_python2 is True and ext == ".xz": with pytest.raises(ValueError): io.open_sesame(filename, mode="wb", encoding="utf-8", make_dirs=True) else: io.write_text(expected, filename, mode="wb", make_dirs=True, lines=True) observed = [ line.strip() for line in io.read_text(filename, mode="rb", lines=True) ] assert observed == expected
def test_unpack_archive(self, tmpdir): data = "Here's some text data to pack and unpack." fpath_txt = str(tmpdir.join("test_unpack_archive.txt")) with io.open_sesame(fpath_txt, mode="wt") as f: f.write(data) fpath_zip = str(tmpdir.join("test_unpack_archive.zip")) with zipfile.ZipFile(fpath_zip, "w") as f: f.write(fpath_txt) io.unpack_archive(fpath_zip, extract_dir=tmpdir) fpath_tar = str(tmpdir.join("test_unpack_archive.tar")) with tarfile.TarFile(fpath_tar, "w") as f: f.add(fpath_txt) io.unpack_archive(fpath_tar, extract_dir=tmpdir) io.unpack_archive(fpath_txt, extract_dir=tmpdir)