Beispiel #1
0
 def test_list_long_format_human(self):
     data = self.get_example_data()
     kas.dump(data, self.temp_file)
     stdout, stderr = self.get_output(["ls", self.temp_file, "-l", "-H"])
     self.assertEqual(len(stderr), 0)
     lines = stdout.splitlines()
     self.assertEqual(len(lines), len(data))
Beispiel #2
0
 def test_dump_fileobj_single(self):
     data = {"a": np.arange(10)}
     with open(self.temp_file, "wb") as f:
         kas.dump(data, f, engine=self.engine)
     data_out = kas.load(self.temp_file, engine=self.engine)
     data2 = dict(data_out.items())
     self.verify_dicts_equal(data, data2)
Beispiel #3
0
 def test_py_engine_single(self):
     data = {"a": np.arange(10), "b": np.zeros(100)}
     fileobj = io.BytesIO()
     kas.dump(data, fileobj, engine=kas.PY_ENGINE)
     fileobj.seek(0)
     data_2 = kas.load(fileobj, engine=kas.PY_ENGINE)
     self.verify_dicts_equal(data, data_2)
Beispiel #4
0
 def test_dump(self):
     data = self.get_example_data()
     kas.dump(data, self.temp_file)
     for key in data.keys():
         stdout, stderr = self.get_output(["dump", self.temp_file, key])
         self.assertEqual(len(stderr), 0)
         self.assertEqual(stdout.splitlines(), list(map(str, data[key])))
Beispiel #5
0
 def test_c_engine_fails(self):
     data = {"a": np.arange(10), "b": np.zeros(100)}
     fileobj = io.BytesIO()
     with self.assertRaises(io.UnsupportedOperation):
         kas.dump(data, fileobj, engine=kas.C_ENGINE)
     with self.assertRaises(io.UnsupportedOperation):
         kas.load(fileobj, engine=kas.C_ENGINE)
Beispiel #6
0
 def validate_storage(self, data):
     kas.dump(data, self.temp_file, engine=self.engine)
     with open(self.temp_file, "rb") as f:
         contents = f.read()
     offset = store.HEADER_SIZE
     descriptors = []
     for _ in range(len(data)):
         descriptor = store.ItemDescriptor.unpack(
             contents[offset:offset + store.ItemDescriptor.size])
         descriptors.append(descriptor)
         offset += store.ItemDescriptor.size
     # Keys must be sorted lexicographically.
     sorted_keys = sorted(data.keys())
     # Keys should be packed sequentially immediately after the descriptors.
     offset = store.HEADER_SIZE + len(data) * store.ITEM_DESCRIPTOR_SIZE
     for d, key in zip(descriptors, sorted_keys):
         self.assertEqual(d.key_start, offset)
         unpacked_key = contents[d.key_start:d.key_start + d.key_len]
         self.assertEqual(key.encode("utf8"), unpacked_key)
         offset += d.key_len
     # Arrays should be packed sequentially immediately after the keys on
     # 8 byte boundaries
     for d, key in zip(descriptors, sorted_keys):
         remainder = offset % 8
         if remainder != 0:
             offset += 8 - remainder
         self.assertEqual(d.array_start, offset)
         nbytes = d.array_len * store.type_size(d.type)
         array = np.frombuffer(
             contents[d.array_start:d.array_start + nbytes],
             dtype=store.type_to_np_dtype_map[d.type],
         )
         np.testing.assert_equal(data[key], array)
         offset += nbytes
Beispiel #7
0
 def test_item_descriptor_format(self):
     for n in range(10):
         kas.dump(
             {str(j): j * np.ones(j)
              for j in range(n)},
             self.temp_file,
             engine=self.engine,
         )
         with open(self.temp_file, "rb") as f:
             contents = f.read()
         self.assertEqual(struct.unpack("<I", contents[12:16])[0], n)
         offset = store.HEADER_SIZE
         for _ in range(n):
             descriptor = contents[offset:offset +
                                   store.ITEM_DESCRIPTOR_SIZE]
             offset += store.ITEM_DESCRIPTOR_SIZE
             type_ = struct.unpack("<B", descriptor[0:1])[0]
             key_start, key_len, array_start, array_len = struct.unpack(
                 "<QQQQ", descriptor[8:40])
             trailer = descriptor[40:store.ITEM_DESCRIPTOR_SIZE]
             # The remainder should be zeros.
             self.assertEqual(
                 trailer,
                 bytearray(0
                           for _ in range(store.ITEM_DESCRIPTOR_SIZE - 40)),
             )
             self.assertEqual(descriptor[1:4], bytearray([0, 0, 0]))
             self.assertEqual(type_, store.FLOAT64)
             self.assertGreater(key_start, 0)
             self.assertGreater(key_len, 0)
             self.assertGreater(array_start, 0)
             self.assertGreaterEqual(array_len, 0)
Beispiel #8
0
    def verify_offset_columns(self, ts):
        ts.dump(self.temp_file)
        with kastore.load(self.temp_file) as store:
            all_data = dict(store)
        offset_col_pairs = []
        for col in all_data.keys():
            if col.endswith("_offset"):
                main_col = col[:col.index("_offset")]
                offset_col_pairs.append((main_col, col))
        for col, offset_col in offset_col_pairs:
            num_rows = len(all_data[offset_col]) - 1
            data = dict(all_data)
            # Check bad lengths of the offset col
            for bad_col_length in [[], range(2 * num_rows)]:
                data[offset_col] = bad_col_length
                kastore.dump(data, self.temp_file)
                with pytest.raises(exceptions.FileFormatError):
                    tskit.load(self.temp_file)

            # Check for a bad offset
            data = dict(all_data)
            original_offset = data[offset_col]
            original_col = data[col]
            data[offset_col] = np.zeros_like(original_offset)
            data[col] = np.zeros(10, dtype=original_col.dtype)
            kastore.dump(data, self.temp_file)
            with pytest.raises(exceptions.LibraryError):
                tskit.load(self.temp_file)
Beispiel #9
0
 def test_dump_fileobj_multi(self):
     with open(self.temp_file, "wb") as f:
         for i in range(10):
             data = {
                 "i" + str(i): np.arange(i, dtype=int),
                 "f" + str(i): np.arange(i, dtype=float),
             }
             kas.dump(data, f, engine=self.engine)
Beispiel #10
0
 def verify(self, data):
     kas.dump(data, self.temp_file, engine=kas.C_ENGINE)
     with open(self.temp_file, "rb") as f:
         c_file = f.read()
     kas.dump(data, self.temp_file, engine=kas.PY_ENGINE)
     with open(self.temp_file, "rb") as f:
         py_file = f.read()
     self.assertEqual(c_file, py_file)
Beispiel #11
0
 def test_bad_arrays(self):
     kas.dump(data={"a": []}, filename=self.temp_file, engine=self.engine)
     for bad_array in [kas, lambda x: x, "1234", None, [[0, 1], [0, 2]]]:
         self.assertRaises(ValueError,
                           kas.dump,
                           data={"a": bad_array},
                           filename=self.temp_file,
                           engine=self.engine)
Beispiel #12
0
 def test_dump(self):
     data = {"a": np.zeros(1)}
     try:
         kas._kastore_loaded = False
         with self.assertRaises(RuntimeError):
             kas.dump(data, self.temp_file, engine=kas.C_ENGINE)
     finally:
         kas._kastore_loaded = True
Beispiel #13
0
 def test_context_manager(self):
     N = 100
     data = {"a": np.arange(N)}
     kas.dump(data, self.temp_file)
     with kas.load(self.temp_file) as store:
         self.assertIn("a", store)
         self.assertTrue(np.array_equal(store["a"], np.arange(N)))
     self.verify_closed(store)
Beispiel #14
0
 def test_manual_close(self):
     N = 100
     data = {"a": np.arange(N)}
     kas.dump(data, self.temp_file)
     store = kas.load(self.temp_file)
     self.assertIn("a", store)
     self.assertTrue(np.array_equal(store["a"], np.arange(N)))
     store.close()
     self.verify_closed(store)
Beispiel #15
0
def simple_example():
    data = {
        "one": np.arange(5, dtype=np.int8),
        "two": np.arange(5, dtype=np.uint64)
    }
    kastore.dump(data, "tmp.kas")

    d2 = kastore.load("tmp.kas")
    print(list(d2.items()))
Beispiel #16
0
 def test_list_empty(self):
     kas.dump({}, self.temp_file)
     stdout, stderr = self.get_output(["ls", self.temp_file])
     self.assertEqual(len(stderr), 0)
     self.assertEqual(len(stdout), 0)
     for opts in ["-l", "-lH"]:
         stdout, stderr = self.get_output(["ls", opts, self.temp_file])
         self.assertEqual(len(stderr), 0)
         self.assertEqual(len(stdout), 0)
Beispiel #17
0
 def test_load_and_dump_file_single_rw(self):
     data = {"a": np.arange(10)}
     with open(self.temp_file, "r+b") as f:
         kas.dump(data, f, engine=self.engine)
         for read_all in [True, False]:
             f.seek(0)
             data_out = kas.load(f, read_all=read_all, engine=self.engine)
             data2 = dict(data_out.items())
             self.verify_dicts_equal(data, data2)
Beispiel #18
0
 def test_py_engine_multi(self):
     data = {"a": np.arange(10), "b": np.zeros(100)}
     n = 10
     fileobj = io.BytesIO()
     for _ in range(n):
         kas.dump(data, fileobj, engine=kas.PY_ENGINE)
     fileobj.seek(0)
     for _ in range(n):
         data_2 = kas.load(fileobj, read_all=True, engine=kas.PY_ENGINE)
         self.verify_dicts_equal(data, data_2)
Beispiel #19
0
 def test_old_version_load_error(self):
     ts = msprime.simulate(10, random_seed=1)
     for bad_version in [(0, 1), (0, 8), (2, 0), (CURRENT_FILE_MAJOR - 1, 0)]:
         ts.dump(self.temp_file)
         with kastore.load(self.temp_file) as store:
             data = dict(store)
         data["format/version"] = np.array(bad_version, dtype=np.uint32)
         kastore.dump(data, self.temp_file)
         with pytest.raises(tskit.VersionTooOldError):
             tskit.load(self.temp_file)
Beispiel #20
0
 def test_format_name_error(self):
     ts = msprime.simulate(10)
     for bad_name in ["tskit.tree", "tskit.treesAndOther", "", "x" * 100]:
         ts.dump(self.temp_file)
         with kastore.load(self.temp_file) as store:
             data = dict(store)
         data["format/name"] = np.array(bytearray(bad_name.encode()), dtype=np.int8)
         kastore.dump(data, self.temp_file)
         with pytest.raises(exceptions.FileFormatError):
             tskit.load(self.temp_file)
Beispiel #21
0
 def test_new_version_load_error(self):
     ts = msprime.simulate(10, random_seed=1)
     for bad_version in [(CURRENT_FILE_MAJOR + j, 0) for j in range(1, 5)]:
         ts.dump(self.temp_file)
         with kastore.load(self.temp_file, use_mmap=False) as store:
             data = dict(store)
         data["format/version"] = np.array(bad_version, dtype=np.uint32)
         kastore.dump(data, self.temp_file)
         self.assertRaises(msprime.VersionTooNewError, msprime.load,
                           self.temp_file)
Beispiel #22
0
 def verify_fields(self, ts):
     ts.dump(self.temp_file)
     with kastore.load(self.temp_file, use_mmap=False) as store:
         all_data = dict(store)
     for key in all_data.keys():
         data = dict(all_data)
         del data[key]
         kastore.dump(data, self.temp_file)
         self.assertRaises(exceptions.FileFormatError, msprime.load,
                           self.temp_file)
Beispiel #23
0
 def test_load_and_dump_fd_single_rw(self):
     data = {"a": np.arange(10)}
     with open(self.temp_file, "r+b") as f:
         fd = f.fileno()
         kas.dump(data, fd, engine=self.engine)
         for read_all in [True, False]:
             os.lseek(fd, 0, os.SEEK_SET)
             data_out = kas.load(fd, read_all=read_all, engine=self.engine)
             data2 = dict(data_out.items())
             self.verify_dicts_equal(data, data2)
Beispiel #24
0
 def verify(self, data):
     kas.dump(data, self.temp_file)
     for read_all in [True, False]:
         new_data = kas.load(self.temp_file, read_all=read_all)
         for key, array in new_data.items():
             info = new_data.info(key)
             s = str(info)
             self.assertGreater(len(s), 0)
             self.assertEqual(array.nbytes, info.size)
             self.assertEqual(array.shape, info.shape)
             self.assertEqual(array.dtype, np.dtype(info.dtype))
Beispiel #25
0
 def test_load_fileobj_single(self):
     data = {"a": np.arange(10)}
     kas.dump(data, self.temp_file, engine=self.engine)
     file_size = os.stat(self.temp_file).st_size
     for read_all in [True, False]:
         with open(self.temp_file, "rb") as f:
             data_out = kas.load(f, read_all=read_all, engine=self.engine)
             data2 = dict(data_out.items())
             file_offset = f.tell()
         self.verify_dicts_equal(data, data2)
         self.assertEqual(file_offset, file_size)
Beispiel #26
0
 def verify_logging(self, args, level):
     # We don't actually check the output here as we're mocking out the
     # call to logging config, but it's convenient to reuse the machinery
     # here in this class
     data = self.get_example_data()
     kas.dump(data, self.temp_file)
     log_format = '%(asctime)s %(message)s'
     with mock.patch("logging.basicConfig") as mocked_config:
         stdout, stderr = self.get_output(args + ["ls", self.temp_file])
         mocked_config.assert_called_once_with(level=level, format=log_format)
     return stderr
Beispiel #27
0
 def test_missing_attr(self, ts_fixture, tmp_path, attr):
     ts1 = ts_fixture
     temp_file = tmp_path / "tmp.trees"
     ts1.dump(temp_file)
     with kastore.load(temp_file) as store:
         all_data = dict(store)
     del all_data[f"reference_sequence/{attr}"]
     kastore.dump(all_data, temp_file)
     ts2 = tskit.load(temp_file)
     assert ts2.has_reference_sequence
     assert getattr(ts2.reference_sequence, attr) == ""
Beispiel #28
0
 def handle(self):
     while True:
         try:
             data = kas.load(self.request.fileno(),
                             engine=self.engine,
                             read_all=True)
         except EOFError:
             break
         kas.dump(dict(data), self.request.fileno(), engine=self.engine)
     # We only read one list, so shutdown the server straight away
     self.server.shutdown()
Beispiel #29
0
 def test_missing_metadata_schema(self, ts_fixture, tmp_path):
     ts1 = ts_fixture
     temp_file = tmp_path / "tmp.trees"
     ts1.dump(temp_file)
     with kastore.load(temp_file) as store:
         all_data = dict(store)
     del all_data["reference_sequence/metadata_schema"]
     kastore.dump(all_data, temp_file)
     ts2 = tskit.load(temp_file)
     assert ts2.has_reference_sequence
     assert repr(ts2.reference_sequence.metadata_schema) == ""
Beispiel #30
0
 def test_load_from_pathlib_Path(self):
     data = {"a": np.arange(10)}
     kas.dump(data, str(self.temp_file), engine=self.engine)
     file_size = self.temp_file.stat().st_size
     for read_all in [True, False]:
         data_out = kas.load(self.temp_file,
                             read_all=read_all,
                             engine=self.engine)
         data2 = dict(data_out.items())
         file_size2 = self.temp_file.stat().st_size
         self.verify_dicts_equal(data, data2)
         self.assertEqual(file_size, file_size2)