def test_missing_format_keys(self): values = [{}, {"format_name": ""}, {"format_version": ""}] for attrs in values: self.write_file(attrs, self.path) with self.assertRaises(exceptions.FileFormatError): tszip.decompress(self.path) with self.assertRaises(exceptions.FileFormatError): tszip.print_summary(self.path)
def test_bad_format_name(self): for bad_name in ["", "xyz", [1234]]: self.write_file( {"format_name": bad_name, "format_version": [1, 0]}, self.path ) with self.assertRaises(exceptions.FileFormatError): tszip.decompress(self.path) with self.assertRaises(exceptions.FileFormatError): tszip.print_summary(self.path)
def test_format_too_new(self): self.write_file({ "format_name": "tszip", "format_version": [2, 0] }, self.path) with self.assertRaises(exceptions.FileFormatError): tszip.decompress(self.path) with self.assertRaises(exceptions.FileFormatError): tszip.print_summary(self.path)
def test_keep(self): self.assertTrue(self.trees_path.exists()) self.run_tszip([str(self.trees_path), "--keep"]) self.assertTrue(self.trees_path.exists()) outpath = pathlib.Path(str(self.trees_path) + ".tsz") self.assertTrue(outpath.exists()) ts = tszip.decompress(outpath) self.assertEqual(ts.tables, self.ts.tables)
def test_suffix(self): self.assertTrue(self.trees_path.exists()) self.run_tszip([str(self.trees_path), "-S", ".XYZasdf"]) self.assertFalse(self.trees_path.exists()) outpath = pathlib.Path(str(self.trees_path) + ".XYZasdf") self.assertTrue(outpath.exists()) ts = tszip.decompress(outpath) self.assertEqual(ts.tables, self.ts.tables)
def run_decompress(args): logger.info("Decompressing {}".format(args.file)) if not args.file.endswith(".zarr"): raise ValueError("Compressed file must have .zarr suffix") ts = tszip.decompress(args.file) outfile = args.file[:-5] logger.info("Writing to {}".format(outfile)) ts.dump(outfile)
def verify(self, ts): with tempfile.TemporaryDirectory() as tmpdir: path = pathlib.Path(tmpdir) / "treeseq.tsz" tszip.compress(ts, path) other_ts = tszip.decompress(path) G1 = ts.genotype_matrix() G2 = other_ts.genotype_matrix() self.assertTrue(np.array_equal(G1, G2))
def test_overwrite(self): self.assertTrue(self.trees_path.exists()) outpath = pathlib.Path(str(self.trees_path) + ".tsz") outpath.touch() self.assertTrue(self.trees_path.exists()) self.run_tszip([str(self.trees_path), "--force"]) self.assertFalse(self.trees_path.exists()) self.assertTrue(outpath.exists()) ts = tszip.decompress(outpath) self.assertEqual(ts.tables, self.ts.tables)
def test_variants_only(self): self.assertTrue(self.trees_path.exists()) self.run_tszip([str(self.trees_path), "--variants-only"]) self.assertFalse(self.trees_path.exists()) outpath = pathlib.Path(str(self.trees_path) + ".tsz") self.assertTrue(outpath.exists()) ts = tszip.decompress(outpath) self.assertNotEqual(ts.tables, self.ts.tables) G1 = ts.genotype_matrix() G2 = self.ts.genotype_matrix() self.assertTrue(np.array_equal(G1, G2))
def run_decompress(args): setup_logging(args) for file_arg in args.files: logger.info("Decompressing {}".format(file_arg)) if not file_arg.endswith(args.suffix): exit("Compressed file must have '{}' suffix".format(args.suffix)) infile = pathlib.Path(file_arg) outfile = pathlib.Path(file_arg[:-len(args.suffix)]) check_output(outfile, args) with check_load_errors(file_arg): ts = tszip.decompress(file_arg) ts.dump(str(outfile)) logger.info("Wrote {}".format(outfile)) remove_input(infile, args)
def run_decompress(args): setup_logging(args) for file_arg in args.files: logger.info(f"Decompressing {file_arg}") if not file_arg.endswith(args.suffix): exit(f"Compressed file must have '{args.suffix}' suffix") infile = pathlib.Path(file_arg) if args.stdout: args.keep = True outfile = sys.stdout else: outfile = pathlib.Path(file_arg[:-len(args.suffix)]) check_output(outfile, args) with check_load_errors(file_arg): ts = tszip.decompress(file_arg) ts.dump(outfile) logger.info(f"Wrote {outfile}") remove_input(infile, args)
def verify(self, ts): if ts.num_migrations > 0: raise unittest.SkipTest("Migrations not supported") with tempfile.TemporaryDirectory() as tmpdir: path = pathlib.Path(tmpdir) / "treeseq.tsz" tszip.compress(ts, path, variants_only=True) other_ts = tszip.decompress(path) self.assertEqual(ts.num_sites, other_ts.num_sites) for var1, var2 in zip(ts.variants(), other_ts.variants()): self.assertTrue(np.array_equal(var1.genotypes, var2.genotypes)) self.assertEqual(var1.site.position, var2.site.position) self.assertEqual(var1.alleles, var2.alleles) # Populations, individuals and sites should be untouched if there are no # unreachable individuals. t1 = ts.tables t2 = other_ts.tables self.assertEqual(t1.sequence_length, t2.sequence_length) self.assertEqual(t1.populations, t2.populations) self.assertEqual(t1.individuals, t2.individuals) self.assertEqual(t1.sites, t2.sites) # We should be adding an extra provenance record in here due to simplify. self.assertEqual(len(t1.provenances), len(t2.provenances) - 1)
# In[12]: arg_list = sys.argv model = arg_list[1] + '_' list_ts_files = [ x for x in os.listdir('tree_seq_files/') if x.startswith(model) ] n = len(list_ts_files) # ## 1) Compute statistics from list of .ts files # In[13]: # iterate through that list, getting stats for every file object for file in list_ts_files: ts = tszip.decompress('tree_seq_files/' + file) sample_list = make_sample_list(ts) tajimas_pi.append(ts.diversity(sample_sets=sample_list)) D_DenAnc.append(D_AncEst(ts, "DEN")) D_NeaAnc.append(D_AncEst(ts, "NEA")) f4_DenAnc_a.append(f4_DenAncEst(ts, "a")) f4_DenAnc_b.append(f4_DenAncEst(ts, "b")) f4_Est_c.append(f4_AncEst(ts, "c")) f4_Est_d.append(f4_AncEst(ts, "d")) f4_Est_e.append(f4_AncEst(ts, "e")) mig_segs_DEN_AYT, mig_segs_DEN_PAP, mig_segs_NEA_AYT, mig_segs_NEA_PAP = get_mig_segs( ts) mean_tractL_DEN.append( np.array([np.mean(mig_segs_DEN_PAP), np.mean(mig_segs_DEN_AYT)])) mean_tractL_NEA.append(
def test_load_dir(self): with self.assertRaises(OSError): tszip.decompress(self.path.parent)
def test_missing_file(self): path = "/no/such/file" with self.assertRaises(FileNotFoundError): tszip.decompress(path)
def test_wrong_format(self): for contents in ["", "1234", "X" * 1024]: with open(str(self.path), "w") as f: f.write(contents) with self.assertRaises(exceptions.FileFormatError): tszip.decompress(self.path)
def test_decompress(self): files = pathlib.Path(__file__).parent / "files" ts = tszip.decompress(files / "1.0.0.trees.tsz") ts2 = tskit.load(files / "1.0.0.trees") assert ts == ts2
def verify(self, ts): with tempfile.TemporaryDirectory() as tmpdir: path = pathlib.Path(tmpdir) / "treeseq.tsz" tszip.compress(ts, path) other_ts = tszip.decompress(path) self.assertEqual(ts.tables, other_ts.tables)