Ejemplo n.º 1
0
    def test_dna_returns_list_of_ints_between_1_and_4(self, mock_method: Mock):
        encoder = PrgEncoder()
        actual = encoder._encode_unit("ACGT")
        expected = [1, 2, 3, 4]
        self.assertEqual(actual, expected)

        self.assertEqual(mock_method.call_args_list, [call(c) for c in "ACGT"])
Ejemplo n.º 2
0
def write_prg(output_prefix: str, prg_string: str):
    """
    Writes the prg to outfile.
    Writes it as a human readable string, and also as an integer vector
    """
    prg_filename = Path(output_prefix + ".prg")
    with prg_filename.open("w") as prg:
        regex = re.compile(
            r"^(?P<sample>.+)\.max_nest(?P<max_nest>\d+)\.min_match(?P<min_match>\d+)"
        )
        match = regex.search(prg_filename.stem)
        try:
            sample = match.group("sample")
        except IndexError:
            logging.warning(
                "A sample name couldn't be parsed from the prefix. "
                "Using 'sample' as sample name."
            )
            sample = "sample"

        max_nest = int(match.group("max_nest"))
        min_match = int(match.group("min_match"))
        header = f"{sample} max_nest={max_nest} min_match={min_match}"
        print(f">{header}\n{prg_string}", file=prg)

    prg_ints_fpath = Path(output_prefix + ".bin")
    prg_encoder = PrgEncoder()
    prg_ints: PRG_Ints = prg_encoder.encode(prg_string)

    with prg_ints_fpath.open("wb") as ostream:
        prg_encoder.write(prg_ints, ostream)
Ejemplo n.º 3
0
    def test_empty_encoding_writes_nothing(self):
        encoding = []
        ostream = BytesIO()
        PrgEncoder.write(encoding, ostream)
        ostream.seek(0)

        self.assertEqual(ostream.read(), b"")
Ejemplo n.º 4
0
    def test_encode_prg_nested_variation(self):
        encoder = PrgEncoder()
        prg = "5 A 7 C 8 T 8 A 7 6 CT 6 TA 5"

        actual = encoder.encode(prg)
        expected = [5, 1, 7, 2, 8, 4, 8, 1, 8, 6, 2, 4, 6, 4, 1, 6]

        self.assertEqual(actual, expected)
Ejemplo n.º 5
0
    def test_encode_prg_one_site_deletion(self):
        encoder = PrgEncoder()
        prg = " 5  6 C 5 "

        actual = encoder.encode(prg)
        expected = [5, 6, 2, 5]

        self.assertEqual(actual, expected)
Ejemplo n.º 6
0
    def test_encode_prg_with_one_snp(self):
        encoder = PrgEncoder()
        prg = "5 A 6 C 5"

        actual = encoder.encode(prg)
        expected = [5, 1, 6, 2, 5]

        self.assertEqual(actual, expected)
Ejemplo n.º 7
0
    def test_encode_prg_spacing_no_variants(self):
        encoder = PrgEncoder()
        prg = " a "

        actual = encoder.encode(prg)
        expected = [1]

        self.assertEqual(actual, expected)
Ejemplo n.º 8
0
    def test_dnaToInt_custom_encoding(self):
        encoder = PrgEncoder(encoding={"A": 7})
        char = "a"

        actual = encoder._dna_to_int(char)
        expected = 7

        self.assertEqual(actual, expected)
Ejemplo n.º 9
0
    def test_dnaToInt_empty_string_raises_assert_error(self):
        encoder = PrgEncoder()
        char = ""

        with self.assertRaises(ConversionError) as context:
            encoder._dna_to_int(char)

        self.assertTrue("Char '' is not in" in str(context.exception))
Ejemplo n.º 10
0
    def test_encode_prg_nonlinear_markers(self):
        encoder = PrgEncoder()
        prg = "55 GA 63 Ct 55"

        actual = encoder.encode(prg)
        expected = [55, 3, 1, 63, 2, 4, 55]

        self.assertEqual(actual, expected)
Ejemplo n.º 11
0
    def test_encode_prg_multi_base_alleles(self):
        encoder = PrgEncoder()
        prg = "5 GA 6 CT 5"

        actual = encoder.encode(prg)
        expected = [5, 3, 1, 6, 2, 4, 5]

        self.assertEqual(actual, expected)
Ejemplo n.º 12
0
    def test_dnaToInt_default_encoding_int(self):
        encoder = PrgEncoder()
        uppercase = encoder._dna_to_int("A")
        expected = 1
        self.assertEqual(uppercase, expected)

        lowercase = encoder._dna_to_int("a")
        self.assertEqual(lowercase, expected)
Ejemplo n.º 13
0
    def test_write_single_int(self):
        prg_ints = [1]
        write_to = BytesIO()
        PrgEncoder.write(prg_ints, write_to)
        write_to.seek(0)

        actual = write_to.read()
        expected = to_bytes(1)
        self.assertEqual(actual, expected)
Ejemplo n.º 14
0
    def test_write_two_ints(self):
        encoding = [1, 4]
        write_to = BytesIO()
        PrgEncoder.write(encoding, write_to)
        write_to.seek(0)

        actual = write_to.read()
        expected = to_bytes(1) + to_bytes(4)

        self.assertEqual(actual, expected)
Ejemplo n.º 15
0
def build_from_msas(report, action, build_paths, args):
    """
    For use by gramtools
    """
    log.info(f"Building prg from prgs in {args.prgs_bed}")
    built_intervals, rescaled_prg_ints = standalone_build_from_msas(
        args.prgs_bed,
        args.reference,
        build_paths.coords_file,
        build_paths.built_prg_dirname,
        args.max_threads,
    )
    built_intervals.saveas(build_paths.built_prg_bed)
    with open(build_paths.prg, "wb") as fhandle_out:
        PrgEncoder.write(rescaled_prg_ints, fhandle_out)
Ejemplo n.º 16
0
    def test_write_multiple_ints_function_calls(self):
        num_elems = 100
        prg_ints = [random.randint for _ in range(num_elems)]

        ostream = BytesIO()
        ostream.write = MagicMock(spec=True)

        # Check call to write triggers the production of bytes
        with patch("make_prg.prg_encoder.to_bytes",
                   return_value=b"") as mocked_to_bytes:
            PrgEncoder.write(prg_ints, ostream)
            expected_bytes_calls = [call(i) for i in prg_ints]
            self.assertEqual(expected_bytes_calls,
                             mocked_to_bytes.call_args_list)

            # Check ostream write operation only called once
            ostream.write.assert_called_once()
Ejemplo n.º 17
0
def write_prg(prg_fname: Path, prg_string: str, options: ArgumentParser):
    """
    Writes th prg to `output_file`.
    Writes it as a human readable string, and also as an integer vector
    """
    seqid = options.seqid or options.prg_name

    if options.output_type.prg:
        with prg_fname.open("w") as prg:
            header = f">{seqid} max_nest={options.max_nesting} min_match={options.min_match_length}"
            print(f"{header}\n{prg_string}", file=prg)

    if options.output_type.binary:
        prg_ints_fpath = prg_fname.with_suffix(".bin")
        prg_encoder = PrgEncoder()
        prg_ints: PRG_Ints = prg_encoder.encode(prg_string)

        with prg_ints_fpath.open("wb") as ostream:
            prg_encoder.write(prg_ints, ostream)
Ejemplo n.º 18
0
    def test_encode_empty_string_returns_empty(self):
        encoder = PrgEncoder()
        actual = encoder.encode("")

        self.assertEqual(actual, [])
Ejemplo n.º 19
0
 def test_invalid_string_fails(self):
     encoder = PrgEncoder()
     with self.assertRaises(EncodeError):
         encoder._encode_unit("foo")
Ejemplo n.º 20
0
    def test_single_numeric_chars_converted_to_ints(self, integer):
        encoder = PrgEncoder()
        actual = encoder._encode_unit(str(integer))
        expected = [integer]

        self.assertEqual(actual, expected)
Ejemplo n.º 21
0
 def test_encode_empty_string_fails(self):
     encoder = PrgEncoder()
     with self.assertRaises(EncodeError):
         encoder._encode_unit("")
Ejemplo n.º 22
0
 def test_permutations_of_valid_input_passes(self, prg):
     encoder = PrgEncoder()
     encoder.encode(prg)
Ejemplo n.º 23
0
    def test_dnaToInt_char_not_valid_raises_assert_error(self, char):
        encoder = PrgEncoder()

        with self.assertRaises(ConversionError):
            encoder._dna_to_int(char)
Ejemplo n.º 24
0
 def encode_and_write_prg(self, prg_string: str):
     prg_encoder = PrgEncoder()
     prg_ints = prg_encoder.encode(prg_string)
     with open(self.out_fname, "wb") as fhandle_out:
         prg_encoder.write(prg_ints, fhandle_out)
Ejemplo n.º 25
0
    def test_repeated_odd_marker_fails(self):
        prg = "5 A 6 C 5 AT 5 T 6 G 5"

        with self.assertRaises(ValueError):
            PrgEncoder().encode(prg)