def test_dna_returns_list_of_ints_between_1_and_4(self, mock_method: Mock): encoder = PrgEncoder() actual = encoder._encode_unit("ACGT") expected = [1, 2, 3, 4] self.assertEqual(actual, expected) self.assertEqual(mock_method.call_args_list, [call(c) for c in "ACGT"])
def write_prg(output_prefix: str, prg_string: str): """ Writes the prg to outfile. Writes it as a human readable string, and also as an integer vector """ prg_filename = Path(output_prefix + ".prg") with prg_filename.open("w") as prg: regex = re.compile( r"^(?P<sample>.+)\.max_nest(?P<max_nest>\d+)\.min_match(?P<min_match>\d+)" ) match = regex.search(prg_filename.stem) try: sample = match.group("sample") except IndexError: logging.warning( "A sample name couldn't be parsed from the prefix. " "Using 'sample' as sample name." ) sample = "sample" max_nest = int(match.group("max_nest")) min_match = int(match.group("min_match")) header = f"{sample} max_nest={max_nest} min_match={min_match}" print(f">{header}\n{prg_string}", file=prg) prg_ints_fpath = Path(output_prefix + ".bin") prg_encoder = PrgEncoder() prg_ints: PRG_Ints = prg_encoder.encode(prg_string) with prg_ints_fpath.open("wb") as ostream: prg_encoder.write(prg_ints, ostream)
def test_encode_prg_nested_variation(self): encoder = PrgEncoder() prg = "5 A 7 C 8 T 8 A 7 6 CT 6 TA 5" actual = encoder.encode(prg) expected = [5, 1, 7, 2, 8, 4, 8, 1, 8, 6, 2, 4, 6, 4, 1, 6] self.assertEqual(actual, expected)
def test_encode_prg_spacing_no_variants(self): encoder = PrgEncoder() prg = " a " actual = encoder.encode(prg) expected = [1] self.assertEqual(actual, expected)
def test_encode_prg_one_site_deletion(self): encoder = PrgEncoder() prg = " 5 6 C 5 " actual = encoder.encode(prg) expected = [5, 6, 2, 5] self.assertEqual(actual, expected)
def test_dnaToInt_default_encoding_int(self): encoder = PrgEncoder() uppercase = encoder._dna_to_int("A") expected = 1 self.assertEqual(uppercase, expected) lowercase = encoder._dna_to_int("a") self.assertEqual(lowercase, expected)
def test_dnaToInt_custom_encoding(self): encoder = PrgEncoder(encoding={"A": 7}) char = "a" actual = encoder._dna_to_int(char) expected = 7 self.assertEqual(actual, expected)
def test_encode_prg_with_one_snp(self): encoder = PrgEncoder() prg = "5 A 6 C 5" actual = encoder.encode(prg) expected = [5, 1, 6, 2, 5] self.assertEqual(actual, expected)
def test_dnaToInt_empty_string_raises_assert_error(self): encoder = PrgEncoder() char = "" with self.assertRaises(ConversionError) as context: encoder._dna_to_int(char) self.assertTrue("Char '' is not in" in str(context.exception))
def test_encode_prg_nonlinear_markers(self): encoder = PrgEncoder() prg = "55 GA 63 Ct 55" actual = encoder.encode(prg) expected = [55, 3, 1, 63, 2, 4, 55] self.assertEqual(actual, expected)
def test_encode_prg_multi_base_alleles(self): encoder = PrgEncoder() prg = "5 GA 6 CT 5" actual = encoder.encode(prg) expected = [5, 3, 1, 6, 2, 4, 5] self.assertEqual(actual, expected)
def write_prg(prg_fname: Path, prg_string: str, options: ArgumentParser): """ Writes th prg to `output_file`. Writes it as a human readable string, and also as an integer vector """ seqid = options.seqid or options.prg_name if options.output_type.prg: with prg_fname.open("w") as prg: header = f">{seqid} max_nest={options.max_nesting} min_match={options.min_match_length}" print(f"{header}\n{prg_string}", file=prg) if options.output_type.binary: prg_ints_fpath = prg_fname.with_suffix(".bin") prg_encoder = PrgEncoder() prg_ints: PRG_Ints = prg_encoder.encode(prg_string) with prg_ints_fpath.open("wb") as ostream: prg_encoder.write(prg_ints, ostream)
def test_encode_empty_string_returns_empty(self): encoder = PrgEncoder() actual = encoder.encode("") self.assertEqual(actual, [])
def test_invalid_string_fails(self): encoder = PrgEncoder() with self.assertRaises(EncodeError): encoder._encode_unit("foo")
def test_single_numeric_chars_converted_to_ints(self, integer): encoder = PrgEncoder() actual = encoder._encode_unit(str(integer)) expected = [integer] self.assertEqual(actual, expected)
def test_encode_empty_string_fails(self): encoder = PrgEncoder() with self.assertRaises(EncodeError): encoder._encode_unit("")
def test_dnaToInt_char_not_valid_raises_assert_error(self, char): encoder = PrgEncoder() with self.assertRaises(ConversionError): encoder._dna_to_int(char)
def test_repeated_odd_marker_fails(self): prg = "5 A 6 C 5 AT 5 T 6 G 5" with self.assertRaises(ValueError): PrgEncoder().encode(prg)
def test_permutations_of_valid_input_passes(self, prg): encoder = PrgEncoder() encoder.encode(prg)
def encode_and_write_prg(self, prg_string: str): prg_encoder = PrgEncoder() prg_ints = prg_encoder.encode(prg_string) with open(self.out_fname, "wb") as fhandle_out: prg_encoder.write(prg_ints, fhandle_out)