def test_get_text_hexdigest_invalid(): """raises TypeError when invalid data provided""" with pytest.raises(TypeError): get_text_hexdigest(None) with pytest.raises(TypeError): get_text_hexdigest([])
def test_md5sum_text(): """md5 sum for text data should be computed""" data = u"åbcde" s = get_text_hexdigest(data) assert s data = "abcde" s = get_text_hexdigest(data) assert s
def read(self, identifier): """reads data corresponding to identifier""" if isinstance(identifier, DataStoreMember) and identifier.parent is self: identifier = identifier.name source = self.open(identifier) data = source.read() if self._md5: self._checksums[identifier] = get_text_hexdigest(data) source.close() return data
def test_md5sum_text(): """md5 sum for text data should be computed""" data = "åbcde" s = get_text_hexdigest(data) assert s data = "abcde" s = get_text_hexdigest(data) assert s # loading contents from files with diff line-endings and check hex_path = [ ("96eb2c2632bae19eb65ea9224aaafdad", "sample-lf.fasta"), ("e7e219f66be15d8afc7cdb85303305a7", "sample-crlf.fasta"), ] for h, p in hex_path: p = TEST_ROOTDIR / p data = p.read_bytes() print(p, repr(data)) got = get_text_hexdigest(data) assert got == h, (p, repr(data))
def write(self, identifier, data): relative_id = self.get_relative_identifier(identifier) absolute_id = self.get_absolute_identifier(relative_id, from_relative=True) if self._md5: self._checksums[absolute_id] = get_text_hexdigest(data) with atomic_write(str(relative_id), in_zip=self.source) as out: out.write(data) member = DataStoreMember(relative_id, self) if relative_id not in self and relative_id.endswith(self.suffix): self._members.append(member) return member
def test_read_from_written(): """create files with different line endings dynamically""" text = "abcdeENDedfguENDyhbnd" with TemporaryDirectory(dir=TEST_ROOTDIR) as dirname: for ex, lf in ( ("f06597f8a983dfc93744192b505a8af9", "\n"), ("39db5cc2f7749f02e0c712a3ece12ffc", "\r\n"), ): p = Path(dirname) / "test.txt" data = text.replace("END", lf) p.write_bytes(data.encode("utf-8")) expect = get_text_hexdigest(data) assert expect == ex, (expect, ex) got = get_file_hexdigest(p) assert got == expect, f"FAILED: {repr(lf)}, {(ex, got)}"
def write(self, identifier, data): super().write(identifier, data) id_suffix = identifier.split(".")[-1] if id_suffix not in (self.suffix, "log"): raise ValueError( f"identifier does not end with required suffix {self.suffix}" ) relative_id = self.get_relative_identifier(identifier) absolute_id = self.get_absolute_identifier(relative_id, from_relative=True) if self._md5: self._checksums[absolute_id] = get_text_hexdigest(data) with atomic_write(str(absolute_id), in_zip=False) as out: out.write(data) member = DataStoreMember(relative_id, self) if relative_id not in self and relative_id.endswith(self.suffix): self._members.append(member) return member
def read(self, identifier): data = self.open(identifier) if self._md5 and isinstance(data, str): self._checksums[identifier] = get_text_hexdigest(data) return data