def test_unsupported_version(self): ts = msprime.simulate(10) with pytest.raises(ValueError): tskit.dump_legacy(ts, self.temp_file, version=4) # Cannot read current files. ts.dump(self.temp_file) # Catch Exception here because h5py throws different exceptions on py2 and py3 with pytest.raises(Exception): tskit.load_legacy(self.temp_file)
def test_no_h5py(self): ts = msprime.simulate(10) path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.3.0_v2.0.hdf5") msg = ( "Legacy formats require h5py. Install via `pip install h5py` or" " `conda install h5py`" ) with mock.patch.dict(sys.modules, {"h5py": None}): with pytest.raises(ImportError, match=msg): tskit.load_legacy(path) with pytest.raises(ImportError, match=msg): tskit.dump_legacy(ts, path)
def test_duplicate_mutation_positions_single_value(self): ts = multi_locus_with_mutation_example() for version in [2, 3]: tskit.dump_legacy(ts, self.temp_file, version=version) root = h5py.File(self.temp_file, "r+") root["mutations/position"][:] = 0 root.close() with pytest.raises(tskit.DuplicatePositionsError): tskit.load_legacy(self.temp_file) tsp = tskit.load_legacy(self.temp_file, remove_duplicate_positions=True) assert tsp.num_sites == 1 sites = list(tsp.sites()) assert sites[0].position == 0
def test_no_h5py(self): ts = msprime.simulate(10) msg = ("Legacy formats require h5py. Install via `pip install h5py` or" " `conda install h5py`") with h5py.File(self.temp_file, "w") as root: root["x"] = np.zeros(10) with mock.patch.dict(sys.modules, {"h5py": None}): with pytest.raises(ImportError, match=msg): tskit.load(self.temp_file) with pytest.raises(ImportError, match=msg): tskit.load_legacy(self.temp_file) with pytest.raises(ImportError, match=msg): tskit.dump_legacy(ts, self.temp_file)
def test_duplicate_mutation_positions(self): ts = multi_locus_with_mutation_example() for version in [2, 3]: tskit.dump_legacy(ts, self.temp_file, version=version) root = h5py.File(self.temp_file, "r+") position = np.array(root["mutations/position"]) position[0] = position[1] root["mutations/position"][:] = position root.close() with pytest.raises(tskit.DuplicatePositionsError): tskit.load_legacy(self.temp_file) tsp = tskit.load_legacy(self.temp_file, remove_duplicate_positions=True) assert tsp.num_sites == position.shape[0] - 1 position_after = list(s.position for s in tsp.sites()) assert list(position[1:]) == position_after
def test_v2_no_samples(self): ts = multi_locus_with_mutation_example() tskit.dump_legacy(ts, self.temp_file, version=2) root = h5py.File(self.temp_file, "r+") del root["samples"] root.close() tsp = tskit.load_legacy(self.temp_file) self.verify_tree_sequences_equal(ts, tsp)
def verify_malformed_json_v2(self, ts, group_name, attr, bad_json): tskit.dump_legacy(ts, self.temp_file, 2) # Write some bad JSON to the provenance string. root = h5py.File(self.temp_file, "r+") group = root[group_name] group.attrs[attr] = bad_json root.close() tsp = tskit.load_legacy(self.temp_file) self.verify_tree_sequences_equal(ts, tsp)
def verify_round_trip(self, ts, version): tskit.dump_legacy(ts, self.temp_file, version=version) tsp = tskit.load_legacy(self.temp_file) simplify = version < 10 self.verify_tree_sequences_equal(ts, tsp, simplify=simplify) tsp.dump(self.temp_file) tsp = tskit.load(self.temp_file) self.verify_tree_sequences_equal(ts, tsp, simplify=simplify) for provenance in tsp.provenances(): tskit.validate_provenance(json.loads(provenance.record))
def run_upgrade(args): try: tree_sequence = tskit.load_legacy(args.source, args.remove_duplicate_positions) except tskit.DuplicatePositionsError: exit( "Error: Duplicate mutation positions in the source file detected.\n\n" "This is not supported in the current file format. Running \"upgrade -d\" " "will remove these duplicate positions. However, this will result in loss " "of data from the original file!") tree_sequence.dump(args.destination)
def run_upgrade(args): try: tree_sequence = tskit.load_legacy(args.source, args.remove_duplicate_positions) tree_sequence.dump(args.destination) except tskit.DuplicatePositionsError: exit( "Error: Duplicate mutation positions in the source file detected.\n\n" "This is not supported in the current file format. Running \"upgrade -d\" " "will remove these duplicate positions. However, this will result in loss " "of data from the original file!")
def test_msprime_v_0_3_0(self): path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.3.0_v2.0.hdf5") ts = tskit.load_legacy(path) self.verify_tree_sequence(ts)
def test_unknown_legacy_version(self): root = h5py.File(self.temp_file, "w") root.attrs["format_version"] = (1024, 0) # Arbitrary unknown version root.close() with pytest.raises(ValueError): tskit.load_legacy(self.temp_file)
def test_no_version_number(self): root = h5py.File(self.temp_file, "w") root.attrs["x"] = 0 root.close() with pytest.raises(ValueError): tskit.load_legacy(self.temp_file)