def verify_required_columns(self, tables, table_name, required_cols): d = tables.asdict() table_dict = {col: None for col in d[table_name].keys()} for col in required_cols: table_dict[col] = d[table_name][col] lwt = c_module.LightweightTableCollection() d[table_name] = table_dict lwt.fromdict(d) other = lwt.asdict() for col in required_cols: self.assertTrue(np.array_equal(other[table_name][col], table_dict[col])) # Any one of these required columns as None gives an error. for col in required_cols: d = tables.asdict() copy = dict(table_dict) copy[col] = None d[table_name] = copy lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(d) # Removing any one of these required columns gives an error. for col in required_cols: d = tables.asdict() copy = dict(table_dict) del copy[col] d[table_name] = copy lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(d)
def test_missing_sequence_length(self): tables = get_example_tables() d = tables.asdict() del d["sequence_length"] lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(d)
def verify_metadata_schema(self, tables, table_name): d = tables.asdict() d[table_name]["metadata_schema"] = None lwt = c_module.LightweightTableCollection() lwt.fromdict(d) out = lwt.asdict() self.assertNotIn("metadata_schema", out[table_name]) tables = tskit.TableCollection.fromdict(out) self.assertEqual(str(getattr(tables, table_name).metadata_schema), "")
def test_missing_metadata_schema(self): tables = get_example_tables() self.assertNotEqual(str(tables.metadata_schema), "") d = tables.asdict() del d["metadata_schema"] lwt = c_module.LightweightTableCollection() lwt.fromdict(d) tables = tskit.TableCollection.fromdict(lwt.asdict()) self.assertEqual(str(tables.metadata_schema), "")
def test_missing_tables(self): tables = get_example_tables() d = tables.asdict() table_names = set(d.keys()) - {"sequence_length"} for table_name in table_names: d = tables.asdict() del d[table_name] lwt = c_module.LightweightTableCollection() with self.assertRaises(ValueError): lwt.fromdict(d)
def test_bad_top_level_types(self): tables = get_example_tables() d = tables.asdict() for key in set(d.keys()) - {"encoding_version"}: bad_type_dict = tables.asdict() # A list should be a ValueError for both the tables and sequence_length bad_type_dict[key] = ["12345"] lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(bad_type_dict)
def verify_optional_column(self, tables, table_len, table_name, col_name): d = tables.asdict() table_dict = d[table_name] table_dict[col_name] = None lwt = c_module.LightweightTableCollection() lwt.fromdict(d) out = lwt.asdict() self.assertTrue( np.array_equal(out[table_name][col_name], np.zeros(table_len, dtype=np.int32) - 1))
def test_missing_metadata(self): tables = get_example_tables() self.assertNotEqual(tables.metadata, b"") d = tables.asdict() del d["metadata"] lwt = c_module.LightweightTableCollection() lwt.fromdict(d) tables = tskit.TableCollection.fromdict(lwt.asdict()) # Empty byte field still gets interpreted by schema self.assertEqual(tables.metadata, {"top-level": []})
def test_top_level_metadata_schema(self): tables = get_example_tables() d = tables.asdict() # None should give default value d["metadata_schema"] = None lwt = c_module.LightweightTableCollection() lwt.fromdict(d) out = lwt.asdict() self.assertNotIn("metadata_schema", out) tables = tskit.TableCollection.fromdict(out) self.assertEqual(str(tables.metadata_schema), "")
def verify_offset_pair(self, tables, table_len, table_name, col_name): offset_col = col_name + "_offset" d = tables.asdict() table_dict = d[table_name] table_dict[col_name] = None table_dict[offset_col] = None lwt = c_module.LightweightTableCollection() lwt.fromdict(d) out = lwt.asdict() self.assertEqual(out[table_name][col_name].shape, (0, )) self.assertTrue( np.array_equal(out[table_name][offset_col], np.zeros(table_len + 1, dtype=np.uint32))) # Setting one or the other raises a ValueError d = tables.asdict() table_dict = d[table_name] table_dict[col_name] = None lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(d) d = tables.asdict() table_dict = d[table_name] table_dict[offset_col] = None lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(d) d = tables.asdict() table_dict = d[table_name] bad_offset = np.zeros_like(table_dict[offset_col]) bad_offset[:-1] = table_dict[offset_col][:-1][::-1] bad_offset[-1] = table_dict[offset_col][-1] table_dict[offset_col] = bad_offset lwt = c_module.LightweightTableCollection() with self.assertRaises(c_module.LibraryError): lwt.fromdict(d)
def test_pickled_examples(self): seen_msprime = False test_dir = pathlib.Path(__file__).parent / "data/dict-encodings" for filename in test_dir.glob("*.pkl"): if "msprime" in str(filename): seen_msprime = True with open(test_dir / filename, "rb") as f: d = pickle.load(f) lwt = c_module.LightweightTableCollection() lwt.fromdict(d) tskit.TableCollection.fromdict(d) # Check we've done something self.assertTrue(seen_msprime)
def test_top_level_metadata(self): tables = get_example_tables() d = tables.asdict() # None should give default value d["metadata"] = None lwt = c_module.LightweightTableCollection() lwt.fromdict(d) out = lwt.asdict() self.assertNotIn("metadata", out) tables = tskit.TableCollection.fromdict(out) # We only removed the metadata, not the schema. So empty bytefield # still gets interpreted self.assertEqual(tables.metadata, {"top-level": []})
def verify_columns(self, value): tables = get_example_tables() d = tables.asdict() table_names = set(d.keys()) - {"sequence_length"} for table_name in table_names: table_dict = d[table_name] for colname in table_dict.keys(): copy = dict(table_dict) copy[colname] = value lwt = c_module.LightweightTableCollection() d = tables.asdict() d[table_name] = copy with self.assertRaises(ValueError): lwt.fromdict(d)
def verify(self, num_rows): tables = get_example_tables() d = tables.asdict() table_names = set(d.keys()) - {"sequence_length"} for table_name in sorted(table_names): table_dict = d[table_name] for colname in sorted(table_dict.keys()): copy = dict(table_dict) copy[colname] = table_dict[colname][:num_rows].copy() lwt = c_module.LightweightTableCollection() d = tables.asdict() d[table_name] = copy with self.assertRaises(ValueError): lwt.fromdict(d)
def test_missing_tables(self): tables = get_example_tables() d = tables.asdict() table_names = d.keys() - { "sequence_length", "metadata", "metadata_schema", "encoding_version", } for table_name in table_names: d = tables.asdict() del d[table_name] lwt = c_module.LightweightTableCollection() with self.assertRaises(TypeError): lwt.fromdict(d)
def test_mutations(self): tables = get_example_tables() self.verify_required_columns( tables, "mutations", ["site", "node", "derived_state", "derived_state_offset"], ) self.verify_offset_pair(tables, len(tables.mutations), "mutations", "metadata") self.verify_metadata_schema(tables, "mutations") # Verify optional time column d = tables.asdict() d["mutations"]["time"] = None lwt = c_module.LightweightTableCollection() lwt.fromdict(d) out = lwt.asdict() self.assertTrue( all(util.is_unknown_time(val) for val in out["mutations"]["time"]))
def test_version(self): lwt = c_module.LightweightTableCollection() self.assertEqual(lwt.asdict()["encoding_version"], (1, 1))
def verify(self, tables): lwt = c_module.LightweightTableCollection() lwt.fromdict(tables.asdict()) other_tables = tskit.TableCollection.fromdict(lwt.asdict()) self.assertEqual(tables, other_tables)