Beispiel #1
0
 def test_e2e_fasta_beginning_end_sites(self, fasta_file, bed_file,
                                        out_file, sequences, argparser):
     from_beginning = 3
     from_end = 1
     arg_sites = [5, 12]
     expected_removals = sorted(
         set(TEST_BED_SEQUENCE + [s - 1 for s in arg_sites]))
     print(expected_removals)
     args = argparser(
         "-s %s -o %s --mask %s --mask-from-beginning %s --mask-from-end %s --mask-sites %s"
         % (fasta_file, out_file, bed_file, from_beginning, from_end,
            " ".join(str(s) for s in arg_sites)))
     mask.run(args)
     output = SeqIO.parse(out_file, "fasta")
     for record in output:
         reference = str(sequences[record.id].seq)
         masked_seq = str(record.seq)
         assert masked_seq[:from_beginning] == "N" * from_beginning
         assert masked_seq[-from_end:] == "N" * from_end
         for idx, site in enumerate(masked_seq[from_beginning:-from_end],
                                    from_beginning):
             if idx in expected_removals:
                 assert site == "N"
             else:
                 assert site == reference[idx]
Beispiel #2
0
    def test_run_with_mask_sites(self, vcf_file, out_file, argparser,
                                 mp_context):
        args = argparser("--mask-sites 2 8 -s %s -o %s" % (vcf_file, out_file))

        def check_mask_sites(mask_sites, *args, **kwargs):
            # mask-sites are passed to the CLI as one-indexed
            assert mask_sites == [1, 7]

        mp_context.setattr(mask, "mask_vcf", check_mask_sites)
        mask.run(args)
Beispiel #3
0
 def test_e2e_fasta_mask_invalid(self, fasta_file, out_file, sequences,
                                 argparser):
     args = argparser("-s %s -o %s --mask-invalid" % (fasta_file, out_file))
     mask.run(args)
     output = SeqIO.parse(out_file, "fasta")
     for record in output:
         reference = str(sequences[record.id].seq)
         for idx, site in enumerate(reference):
             assert record.seq[
                 idx] == site if site in VALID_NUCLEOTIDES else "N"
Beispiel #4
0
 def test_e2e_vcf_minimal(self, vcf_file, bed_file, argparser):
     args = argparser("-s %s --mask %s" % (vcf_file, bed_file))
     mask.run(args)
     with open(vcf_file) as output:
         assert output.readline().startswith("##fileformat")  # is a VCF
         assert output.readline().startswith(
             "#CHROM\tPOS\t")  # have a header
         for line in output.readlines():
             site = int(line.split("\t")[1])  # POS column
             site = site - 1  # shift to zero-indexed site
             assert site not in TEST_BED_SEQUENCE
Beispiel #5
0
 def test_e2e_fasta_minimal(self, fasta_file, bed_file, sequences,
                            argparser):
     args = argparser("-s %s --mask %s" % (fasta_file, bed_file))
     mask.run(args)
     output = SeqIO.parse(fasta_file, "fasta")
     for record in output:
         reference = sequences[record.id].seq
         for idx, site in enumerate(record.seq):
             if idx in TEST_BED_SEQUENCE:
                 assert site == "N"
             else:
                 assert site == reference[idx]
Beispiel #6
0
    def test_run_with_mask_sites_and_mask_file(self, vcf_file, out_file,
                                               bed_file, argparser,
                                               mp_context):
        args = argparser("--mask-sites 20 21 --mask %s -s %s -o %s" %
                         (bed_file, vcf_file, out_file))

        def check_mask_sites(mask_sites, *args, **kwargs):
            # mask-sites are passed to the CLI as one-indexed
            assert mask_sites == sorted(set(TEST_BED_SEQUENCE + [19, 20]))

        mp_context.setattr(mask, "mask_vcf", check_mask_sites)
        mask.run(args)
Beispiel #7
0
    def test_run_fasta_mask_from_beginning_or_end(self, fasta_file, out_file,
                                                  argparser, mp_context):
        args = argparser(
            "-s %s -o %s --mask-from-beginning 2 --mask-from-end 3" %
            (fasta_file, out_file))

        def check_mask_from(*args, mask_from_beginning=0, mask_from_end=0):
            assert mask_from_beginning == 2
            assert mask_from_end == 3

        mp_context.setattr(mask, "mask_fasta", check_mask_from)
        mask.run(args)
Beispiel #8
0
    def test_run_recognize_vcf(self, bed_file, vcf_file, argparser,
                               mp_context):
        """Ensure we're handling vcf files correctly"""
        args = argparser("--mask=%s -s %s --no-cleanup" % (bed_file, vcf_file))

        def fail(*args, **kwargs):
            assert False, "Called mask_fasta incorrectly"

        mp_context.setattr(mask, "mask_vcf", lambda *a, **k: None)
        mp_context.setattr(mask, "mask_fasta", fail)
        mp_context.setattr(mask, "copyfile", lambda *args: None)
        mask.run(args)
Beispiel #9
0
    def test_run_normal_case(self, bed_file, vcf_file, out_file, argparser,
                             mp_context):
        def check_args(mask_sites, in_file, _out_file, cleanup):
            assert mask_sites == TEST_BED_SEQUENCE, "Wrong mask sites provided"
            assert in_file == vcf_file, "Incorrect input file provided"
            assert _out_file == out_file, "Incorrect output file provided"
            assert cleanup is True, "Cleanup erroneously passed in as False"

        mp_context.setattr(mask, "mask_vcf", check_args)
        args = argparser("--mask=%s --sequences=%s --output=%s" %
                         (bed_file, vcf_file, out_file))
        mask.run(args)
        assert os.path.exists(out_file), "Output file incorrectly deleted"
Beispiel #10
0
    def test_run_respect_no_cleanup(self, bed_file, vcf_file, argparser,
                                    mp_context):
        out_file = os.path.join(os.path.dirname(vcf_file),
                                "masked_" + os.path.basename(vcf_file))

        def make_outfile(mask_sites, in_file, out_file, cleanup=True):
            assert cleanup == False
            open(out_file, "w").close()  # need out_file to exist

        mp_context.setattr(mask, "mask_vcf", make_outfile)
        args = argparser("--mask=%s -s %s -o %s --no-cleanup" %
                         (bed_file, vcf_file, out_file))
        mask.run(args)
        assert os.path.exists(out_file), "Output file incorrectly deleted"
Beispiel #11
0
    def test_run_handle_missing_outfile(self, bed_file, fasta_file, argparser,
                                        mp_context):
        args = argparser("--mask=%s -s %s" % (bed_file, fasta_file))
        expected_outfile = os.path.join(
            os.path.dirname(fasta_file),
            "masked_" + os.path.basename(fasta_file))

        def check_outfile(mask_sites, in_file, out_file, **kwargs):
            assert out_file == expected_outfile
            with open(out_file, "w") as fh:
                fh.write("test_string")

        mp_context.setattr(mask, "mask_fasta", check_outfile)
        mask.run(args)
        with open(fasta_file) as fh:
            assert fh.read() == "test_string"
Beispiel #12
0
 def test_e2e_vcf_with_options(self, vcf_file, bed_file, out_file,
                               argparser):
     arg_sites = [5, 12, 14]
     expected_removals = sorted(
         set(TEST_BED_SEQUENCE + [s - 1 for s in arg_sites]))
     args = argparser(
         "-s %s -o %s --mask %s --mask-sites %s" %
         (vcf_file, out_file, bed_file, " ".join(str(s)
                                                 for s in arg_sites)))
     mask.run(args)
     with open(out_file) as output:
         assert output.readline().startswith("##fileformat")  # is a VCF
         assert output.readline().startswith(
             "#CHROM\tPOS\t")  # have a header
         for line in output.readlines():
             site = int(line.split("\t")[1])  # POS column
             site = site - 1  #re-zero-index the VCF sites
             assert site not in expected_removals
Beispiel #13
0
 def test_run_handle_empty_sequence_file(self, vcf_file, argparser):
     open(vcf_file, "w").close()
     args = argparser("-s %s --mask-sites 1" % vcf_file)
     with pytest.raises(SystemExit):
         mask.run(args)
Beispiel #14
0
 def test_run_vcf_cannot_mask_beginning_or_end(self, vcf_file, argparser,
                                               op):
     args = argparser("-s %s --mask-from-%s 2" % (vcf_file, op))
     with pytest.raises(SystemExit) as err:
         mask.run(args)
Beispiel #15
0
 def test_run_requires_some_masking(self, vcf_file, argparser):
     args = argparser("-s %s" % vcf_file)
     with pytest.raises(SystemExit) as err:
         mask.run(args)
Beispiel #16
0
 def test_run_handle_missing_sequence_file(self, vcf_file, argparser):
     os.remove(vcf_file)
     args = argparser("-s %s" % vcf_file)
     with pytest.raises(SystemExit):
         mask.run(args)
Beispiel #17
0
 def test_run_handle_empty_mask_file(self, vcf_file, bed_file, argparser):
     open(bed_file, "w").close()
     args = argparser("-s %s --mask %s" % (vcf_file, bed_file))
     with pytest.raises(SystemExit):
         mask.run(args)
Beispiel #18
0
 def test_run_handle_missing_mask_file(self, vcf_file, bed_file, argparser):
     os.remove(bed_file)
     args = argparser("-s %s --mask %s" % (vcf_file, bed_file))
     with pytest.raises(SystemExit):
         mask.run(args)