def write_prg(output_prefix: str, prg_string: str): """ Writes the prg to outfile. Writes it as a human readable string, and also as an integer vector """ prg_filename = Path(output_prefix + ".prg") with prg_filename.open("w") as prg: regex = re.compile( r"^(?P<sample>.+)\.max_nest(?P<max_nest>\d+)\.min_match(?P<min_match>\d+)" ) match = regex.search(prg_filename.stem) try: sample = match.group("sample") except IndexError: logging.warning( "A sample name couldn't be parsed from the prefix. " "Using 'sample' as sample name." ) sample = "sample" max_nest = int(match.group("max_nest")) min_match = int(match.group("min_match")) header = f"{sample} max_nest={max_nest} min_match={min_match}" print(f">{header}\n{prg_string}", file=prg) prg_ints_fpath = Path(output_prefix + ".bin") prg_encoder = PrgEncoder() prg_ints: PRG_Ints = prg_encoder.encode(prg_string) with prg_ints_fpath.open("wb") as ostream: prg_encoder.write(prg_ints, ostream)
def test_encode_prg_one_site_deletion(self): encoder = PrgEncoder() prg = " 5 6 C 5 " actual = encoder.encode(prg) expected = [5, 6, 2, 5] self.assertEqual(actual, expected)
def test_encode_prg_with_one_snp(self): encoder = PrgEncoder() prg = "5 A 6 C 5" actual = encoder.encode(prg) expected = [5, 1, 6, 2, 5] self.assertEqual(actual, expected)
def test_encode_prg_spacing_no_variants(self): encoder = PrgEncoder() prg = " a " actual = encoder.encode(prg) expected = [1] self.assertEqual(actual, expected)
def test_encode_prg_nonlinear_markers(self): encoder = PrgEncoder() prg = "55 GA 63 Ct 55" actual = encoder.encode(prg) expected = [55, 3, 1, 63, 2, 4, 55] self.assertEqual(actual, expected)
def test_encode_prg_multi_base_alleles(self): encoder = PrgEncoder() prg = "5 GA 6 CT 5" actual = encoder.encode(prg) expected = [5, 3, 1, 6, 2, 4, 5] self.assertEqual(actual, expected)
def test_encode_prg_nested_variation(self): encoder = PrgEncoder() prg = "5 A 7 C 8 T 8 A 7 6 CT 6 TA 5" actual = encoder.encode(prg) expected = [5, 1, 7, 2, 8, 4, 8, 1, 8, 6, 2, 4, 6, 4, 1, 6] self.assertEqual(actual, expected)
def write_prg(prg_fname: Path, prg_string: str, options: ArgumentParser): """ Writes th prg to `output_file`. Writes it as a human readable string, and also as an integer vector """ seqid = options.seqid or options.prg_name if options.output_type.prg: with prg_fname.open("w") as prg: header = f">{seqid} max_nest={options.max_nesting} min_match={options.min_match_length}" print(f"{header}\n{prg_string}", file=prg) if options.output_type.binary: prg_ints_fpath = prg_fname.with_suffix(".bin") prg_encoder = PrgEncoder() prg_ints: PRG_Ints = prg_encoder.encode(prg_string) with prg_ints_fpath.open("wb") as ostream: prg_encoder.write(prg_ints, ostream)
def test_encode_empty_string_returns_empty(self): encoder = PrgEncoder() actual = encoder.encode("") self.assertEqual(actual, [])
def test_permutations_of_valid_input_passes(self, prg): encoder = PrgEncoder() encoder.encode(prg)
def encode_and_write_prg(self, prg_string: str): prg_encoder = PrgEncoder() prg_ints = prg_encoder.encode(prg_string) with open(self.out_fname, "wb") as fhandle_out: prg_encoder.write(prg_ints, fhandle_out)