Beispiel #1
0
 def test_retrieve_af(self):
     entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
     b1 = Bedpe(entry1)
     self.assertEqual(b1.retrieve_af(), '0.2')
     entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SVTYPE=BND' ]
     b2 = Bedpe(entry2)
     self.assertIsNone(b2.retrieve_af())
Beispiel #2
0
 def test_retrieve_svtype(self):
     entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
     b1 = Bedpe(entry1)
     self.assertEqual(b1.retrieve_svtype(), 'BND')
     entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'AF=0.2', 'AF=0.2' ]
     with self.assertRaises(SystemExit):
         b = Bedpe(entry2)
 def test_adjust_by_tag(self):
     entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
     b1 = Bedpe(entry1)
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200), 200)
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200), 201)
     b1.info1 = 'SVTYPE=BND;AF=0.2;CIPOS=-2,3'
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200), 203)
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200), 202)
Beispiel #4
0
 def test_parse_info_tag(self):
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'SVTYPE'), True)
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'AF='), False)
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'AF='),
                      '0.2')
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'SVTYPE='),
                      'BND')
     self.assertEqual(
         Bedpe.parse_info_tag('SVTYPE=BND;SECONDARY;AF=0.2', 'SECONDARY'),
         True)
    def test_adjust_by_ciend(self):
        entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
        b1 = Bedpe(entry1)
        self.assertEqual(self.converter.adjust_by_ciend(b1), 301)
        b1.o2 = '+'
        self.assertEqual(self.converter.adjust_by_ciend(b1), 300)

        b1.info1 = 'SVTYPE=BND;AF=0.2;CIEND=-2,3'
        self.assertEqual(self.converter.adjust_by_ciend(b1), 302)
        b1.o2 = '-'
        self.assertEqual(self.converter.adjust_by_ciend(b1), 303)
Beispiel #6
0
 def test__combine_sname_values(self):
     self.assertEqual(
         set(
             Bedpe._combine_sname_values('sample1:2',
                                         'sample2:4,sample3:5').split(',')),
         set(['sample1:2', 'sample2:4', 'sample3:5']))
     self.assertEqual(
         Bedpe._combine_sname_values(None, 'sample2:4,sample3:5'),
         'sample2:4,sample3:5')
     self.assertEqual(
         Bedpe._combine_sname_values('sample2:4,sample3:5', None),
         'sample2:4,sample3:5')
Beispiel #7
0
 def test_adjust_by_cipos(self):
     entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
     b1 = Bedpe(entry1)
     self.assertEqual(b1.b1, 200)
     b1.o1 = '-'
     b1.adjust_by_cipos()
     self.assertEqual(b1.b1, 201)
     b1.misc[0] = 'SVTYPE=BND;AF=0.2;CIPOS=-2,3'
     b1.adjust_by_cipos()
     self.assertEqual(b1.b1, 203)
     b1.o1 = '+'
     b1.adjust_by_cipos()
     self.assertEqual(b1.b1, 202)
Beispiel #8
0
 def test_sname_value(self):
     self.assertEqual(Bedpe.sname_value('SNAME=sample1:2,sample2:3'),
                      'sample1:2,sample2:3')
     self.assertIsNone(Bedpe.sname_value('AF'))
     self.assertIsNone(Bedpe.sname_value('SNAME='))
     self.assertEqual(
         Bedpe.sname_value(
             'SNAME1=older_sample1:2,older_sample2:2;SNAME=sample1:2,sample2:3'
         ), 'sample1:2,sample2:3')
     self.assertEqual(
         Bedpe.sname_value(
             'SNAME1=older_sample 1:2,older_sample2:2;SNAME=sample1:2,sample2:3;AF=1'
         ), 'sample1:2,sample2:3')
Beispiel #9
0
    def test_adjust_by_ciend(self):
        entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ]
        b1 = Bedpe(entry1)
        self.assertEqual(b1.b2, 301)
        b1.o2 = '+'
        b1.adjust_by_ciend()
        self.assertEqual(b1.b2, 300)

        b1.misc[0] = 'SVTYPE=BND;AF=0.2;CIEND=-2,3'
        b1.adjust_by_ciend()
        self.assertEqual(b1.b2, 302)
        b1.o2 = '-'
        b1.adjust_by_ciend()
        self.assertEqual(b1.b2, 303)
Beispiel #10
0
 def test_retrieve_svtype(self):
     entry1 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
         'SVTYPE=BND;AF=0.2'
     ]
     b1 = Bedpe(entry1)
     self.assertEqual(b1.retrieve_svtype(), 'BND')
     entry2 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'AF=0.2', 'AF=0.2'
     ]
     with self.assertRaises(SystemExit):
         b = Bedpe(entry2)
Beispiel #11
0
    def test__update_sname_field(self):
        expected = set(['sample 2:4', 'sample3:12'])
        result = Bedpe._update_sname_field('SNAME=sample 2:4', 'SNAME=sample3:12')
        tag_name, values = result.split('=')
        self.assertEqual(tag_name, 'SNAME')
        result_set = set(values.split(','))
        self.assertEqual(result_set, expected)

        # Test to ensure we don't fail if no SNAME is present
        result2 = Bedpe._update_sname_field('AF=0.5', 'AF=0.1')
        self.assertEqual(result2, 'AF=0.5')

        # Test if the first sample is missing SNAME
        result2 = Bedpe._update_sname_field('AF=0.5', 'AF=0.1;SNAME=sample5:12')
        self.assertEqual(result2, 'AF=0.5;SNAME=sample5:12')
Beispiel #12
0
    def cluster_bedpe(self, in_file, bedpe_out, is_sorted):
        # Locally alias instance variables
        max_distance = self.max_distance
        eval_param = self.eval_param

        in_header = True
        for line in in_file:
            if line.startswith('#') and in_header:
                if line.startswith('#CHROM'):
                    bedpe_out.write('##INFO=<ID=RETAINED,Number=0,Type=Flag,Description="Variants clustering with this call were pruned">\n')
                bedpe_out.write(line)
                continue
            in_header = False
            self.bedpe_lines += 1
            bedpe = Bedpe(line.rstrip().split('\t'))
            if bedpe.af is None:
                sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n')
                sys.exit(1)
            if bedpe.af == '.':
                self.skipped_lines += 1
                continue
            matched_clusters = []
            for cluster in self.cluster_list:
                if cluster.can_add(bedpe, max_distance):
                    cluster.add(bedpe, eval_param)
                    matched_clusters.append(cluster)
            if not matched_clusters:
                new_cluster = Cluster()
                new_cluster.add(bedpe, eval_param)
                self.cluster_list.append(new_cluster)
            else:
                if len(matched_clusters) > 1:
                    i = 0
                    pruned_clusters = []
                    while i < (len(matched_clusters) - 1):
                        j = i + 1
                        to_delete = set()
                        while j < len(matched_clusters):
                            if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance):
                                matched_clusters[i].add(matched_clusters[j].elements[0], eval_param)
                                pruned_clusters.append(matched_clusters[j])
                                to_delete.add(j)
                            j += 1
                        for index in sorted(to_delete, reverse=True):
                            del matched_clusters[index]
                        i += 1
                    if pruned_clusters:
                        self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in pruned_clusters]
            #prune and print eligible clusters
            if self.bedpe_lines % 1000 == 0 and is_sorted:
                self.cluster_list = self.prune(bedpe,
                                     False,
                                     bedpe_out)

        self.cluster_list = self.prune(None,
                             True,
                             bedpe_out)

        sys.stderr.write(self.stats_report_string())
        return
 def test_adjust_by_tag(self):
     entry1 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
         'SVTYPE=BND;AF=0.2'
     ]
     b1 = Bedpe(entry1)
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200),
                      200)
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200),
                      201)
     b1.info1 = 'SVTYPE=BND;AF=0.2;CIPOS=-2,3'
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200),
                      203)
     self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200),
                      202)
Beispiel #14
0
 def test__update_sname_field(self):
     expected = set(['sample2:4', 'sample3:12'])
     result = Bedpe._update_sname_field('SNAME=sample2:4', 'SNAME=sample3:12')
     tag_name, values = result.split('=')
     self.assertEqual(tag_name, 'SNAME')
     result_set = set(values.split(','))
     self.assertEqual(result_set, expected)
Beispiel #15
0
def bedpeToVcf(bedpe_file, vcf_out):
    myvcf = Vcf()
    converter = BedpeToVcfConverter(myvcf)
    in_header = True
    # parse the bedpe data
    header = list()
    for line in bedpe_file:
        if in_header:
            if line[0:2] == '##':
                header.append(line)
                continue
            elif line[0] == '#' and line[1] != '#':
                sample_list_str = line.rstrip().split('\t', 20)[-1]
                header.append('\t'.join([
                    '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER',
                    'INFO', sample_list_str
                ]))
                continue
            else:
                in_header = False
                myvcf.add_header(header)
                myvcf.file_format = 'VCFv4.2'
                vcf_out.write(myvcf.get_header() + '\n')
        #
        bedpe = Bedpe(line.rstrip().split('\t'))
        variants = converter.convert(bedpe)
        for v in variants:
            vcf_out.write(v.get_var_string() + '\n')

    # close the VCF output file
    vcf_out.close()

    return
Beispiel #16
0
 def test_malformed(self):
     entry1 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
         'SVTYPE=BND;AF=0.2'
     ]
     b1 = Bedpe(entry1)
     self.assertEqual(b1.malformedFlag, 1)
     entry2 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
         'MISSING'
     ]
     b2 = Bedpe(entry2)
     self.assertEqual(b2.malformedFlag, 2)
     self.assertEqual(b2.info1, entry2[18])
Beispiel #17
0
 def test__update_sname_field(self):
     expected = set(['sample2:4', 'sample3:12'])
     result = Bedpe._update_sname_field('SNAME=sample2:4',
                                        'SNAME=sample3:12')
     tag_name, values = result.split('=')
     self.assertEqual(tag_name, 'SNAME')
     result_set = set(values.split(','))
     self.assertEqual(result_set, expected)
Beispiel #18
0
    def test__update_sname_field(self):
        expected = set(['sample 2:4', 'sample3:12'])
        result = Bedpe._update_sname_field('SNAME=sample 2:4',
                                           'SNAME=sample3:12')
        tag_name, values = result.split('=')
        self.assertEqual(tag_name, 'SNAME')
        result_set = set(values.split(','))
        self.assertEqual(result_set, expected)

        # Test to ensure we don't fail if no SNAME is present
        result2 = Bedpe._update_sname_field('AF=0.5', 'AF=0.1')
        self.assertEqual(result2, 'AF=0.5')

        # Test if the first sample is missing SNAME
        result2 = Bedpe._update_sname_field('AF=0.5',
                                            'AF=0.1;SNAME=sample5:12')
        self.assertEqual(result2, 'AF=0.5;SNAME=sample5:12')
Beispiel #19
0
    def test_parse_info_tag(self):
        self.assertEqual(Bedpe.update_info_tag('SNAME=sample', 'SNAME=', 'sample,sample2'), 'SNAME=sample,sample2')
        self.assertEqual(Bedpe.update_info_tag('SNAME=sample;AF=0.75', 'SNAME=', 'sample,sample2'), 'SNAME=sample,sample2;AF=0.75')
        with self.assertRaises(ValueError):
            Bedpe.update_info_tag('AF=0.75', 'SNAME=', 'sample,sample2')

        with self.assertRaises(ValueError):
            Bedpe.update_info_tag('SECONDARY;AF=0.5', 'SECONDARY', 'NEW_VALUE')

        with self.assertRaises(ValueError):
            Bedpe.update_info_tag('AF=0.5;SECONDARY', 'SECONDARY', 'NEW_VALUE')
Beispiel #20
0
 def test_str(self):
     # Note that we are testing float to float equivalence. Actually passing in an integer will result in it being converted to float with
     # with decimal place
     entry1 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57.0', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
         'SVTYPE=BND;AF=0.2'
     ]
     b1 = Bedpe(entry1)
     self.assertEqual(str(b1), '\t'.join(entry1))
Beispiel #21
0
    def cluster_bedpe(self, in_file, bedpe_out, is_sorted):
        # Locally alias instance variables
        max_distance = self.max_distance
        eval_param = self.eval_param

        in_header = True
        for line in in_file:
            if line.startswith('#') and in_header:
                bedpe_out.write(line)
                continue
            in_header = False
            self.bedpe_lines += 1
            bedpe = Bedpe(line.rstrip().split('\t'))
            if bedpe.af is None:
                sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n')
                sys.exit(1)
            if bedpe.af == '.':
                self.skipped_lines += 1
                continue
            matched_clusters = []
            for cluster in self.cluster_list:
                if cluster.can_add(bedpe, max_distance):
                    cluster.add(bedpe, eval_param)
                    matched_clusters.append(cluster)
            if not matched_clusters:
                new_cluster = Cluster()
                new_cluster.add(bedpe, eval_param)
                self.cluster_list.append(new_cluster)
            else:
                if len(matched_clusters) > 1:
                    i = 0
                    matched_cluster_pruned = False
                    while i < (len(matched_clusters) - 1):
                        j = i + 1
                        while j < len(matched_clusters):
                            if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance):
                                matched_clusters[i].add(matched_clusters[j].elements[0], eval_param)
                                matched_cluster_pruned = True
                                del matched_clusters[j]
                            j += 1
                        i += 1        
                    if matched_cluster_pruned:
                        self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in matched_clusters]
            #prune and print eligible clusters
            if self.bedpe_lines % 1000 == 0 and is_sorted:
                self.cluster_list = self.prune(bedpe,
                                     False,
                                     bedpe_out)
    
        self.cluster_list = self.prune(None,
                             True,
                             bedpe_out)

        sys.stderr.write(self.stats_report_string())
        return
Beispiel #22
0
    def test_add(self):
        bedpe1 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b1 = Bedpe(bedpe1)

        bedpe2 = [
            '1', '195', '305', '2', '295', '405', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.3'
        ]
        b2 = Bedpe(bedpe2)

        c = Cluster()
        c.add(b1, None)
        self.assertEqual(c.size, 1)
        self.assertEqual(c.sv_event, 'BND')
        self.assertEqual(c.filter, '0.2')
        self.assertEqual(c.chrom_a, '1')
        self.assertEqual(c.min_a, 200)
        self.assertEqual(c.max_a, 300)
        self.assertEqual(c.chrom_b, '2')
        self.assertEqual(c.min_b, 300)
        self.assertEqual(c.max_b, 400)
        self.assertEqual(c.strand_a, '+')
        self.assertEqual(c.strand_b, '-')

        c.add(b2, None)
        self.assertEqual(c.size, 2)
        self.assertEqual(c.sv_event, 'BND')
        self.assertEqual(c.filter, '0.3')
        self.assertEqual(c.chrom_a, '1')
        self.assertEqual(c.min_a, 195)
        self.assertEqual(c.max_a, 305)
        self.assertEqual(c.chrom_b, '2')
        self.assertEqual(c.min_b, 295)
        self.assertEqual(c.max_b, 405)
        self.assertEqual(c.strand_a, '+')
        self.assertEqual(c.strand_b, '-')
Beispiel #23
0
def processBEDPE(bedpe_stream, name, dist, output_handle):
    #Process the BEDPE file and convert each entry to SAM.
    converter = BedpetoBlockedBedConverter(name, dist)
    output_handle.write(converter.track_name())
    for line in bedpe_stream:
        # ignore header
        if line[0] == "#":
            continue
        lineList = line.rstrip().split('\t')
        if lineList:
            bedpe = Bedpe(lineList)
            output_handle.write('\n'.join(converter.convert(bedpe)) + '\n')
Beispiel #24
0
 def test_parse_info_tag(self):
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'SVTYPE'), True)
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'AF='), False)
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'AF='), '0.2')
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'SVTYPE='), 'BND')
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;SECONDARY;AF=0.2', 'SECONDARY'), True)
     self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;SECONDARY;BAD_AF=0.3;AF=0.2', 'AF='), '0.2')
Beispiel #25
0
    def test_parse_info_tag(self):
        self.assertEqual(
            Bedpe.update_info_tag('SNAME=sample', 'SNAME=', 'sample,sample2'),
            'SNAME=sample,sample2')
        self.assertEqual(
            Bedpe.update_info_tag('SNAME=sample;AF=0.75', 'SNAME=',
                                  'sample,sample2'),
            'SNAME=sample,sample2;AF=0.75')
        with self.assertRaises(ValueError):
            Bedpe.update_info_tag('AF=0.75', 'SNAME=', 'sample,sample2')

        with self.assertRaises(ValueError):
            Bedpe.update_info_tag('SECONDARY;AF=0.5', 'SECONDARY', 'NEW_VALUE')

        with self.assertRaises(ValueError):
            Bedpe.update_info_tag('AF=0.5;SECONDARY', 'SECONDARY', 'NEW_VALUE')
Beispiel #26
0
    def test_can_add(self):
        bedpe = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b = Bedpe(bedpe)

        c = Cluster()
        c.chrom_a = b.c1
        c.chrom_b = b.c2
        c.min_a = b.s1
        c.max_a = b.e1
        c.min_b = b.s2
        c.max_b = b.e2
        c.strand_a = b.o1
        c.strand_b = b.o2

        self.assertTrue(c.can_add(b, 1))
        c.size = 1

        c.sv_event = 'DEL'
        self.assertFalse(c.can_add(b, 1))

        c.sv_event = 'BND'
        self.assertTrue(c.can_add(b, 1))

        c.chrom_a = 'X'
        self.assertFalse(c.can_add(b, 1))

        c.chrom_a = b.c1
        c.chrom_b = 'X'
        self.assertFalse(c.can_add(b, 1))

        c.chrom_b = b.c2
        c.min_a = 305
        self.assertFalse(c.can_add(b, 1))

        c.min_a = b.s1
        c.max_a = 150
        self.assertFalse(c.can_add(b, 1))

        c.max_a = b.e1
        c.min_b = 405
        self.assertFalse(c.can_add(b, 1))

        c.min_b = b.s1
        c.max_b = 150
        self.assertFalse(c.can_add(b, 1))
Beispiel #27
0
    def test_get_cluster_string(self):
        bedpe = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b = Bedpe(bedpe)

        c = Cluster()

        with self.assertRaises(ValueError):
            c.get_cluster_string()

        c.add(b, None)
        self.assertEqual(c.get_cluster_string(), str(b))
Beispiel #28
0
    def test_info(self):
        entry1 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b1 = Bedpe(entry1)
        self.assertEqual(b1.info, 'SVTYPE=BND;AF=0.2')

        entry2 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
            'MISSING'
        ]
        b2 = Bedpe(entry2)
        self.assertEqual(b2.info, 'SVTYPE=BND;AF=0.2')

        entry3 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
            'SECONDARY'
        ]
        b3 = Bedpe(entry3)
        self.assertEqual(b3.info, 'SVTYPE=BND;AF=0.2')
Beispiel #29
0
 def test_retrieve_af(self):
     entry1 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2',
         'SVTYPE=BND;AF=0.2'
     ]
     b1 = Bedpe(entry1)
     self.assertEqual(b1.retrieve_af(), '0.2')
     entry2 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND',
         'SVTYPE=BND'
     ]
     b2 = Bedpe(entry2)
     self.assertIsNone(b2.retrieve_af())
     entry3 = [
         '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
         'BND', 'PASS', '.', '.', '.', '.', '.', '.',
         'SVTYPE=BND;AF=0.2;FIN_AF=0.01', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01'
     ]
     b3 = Bedpe(entry3)
     self.assertEqual(b3.retrieve_af(), '0.2')
Beispiel #30
0
 def test_parse_score(self):
     self.assertEqual(Bedpe.parse_score('20'), 20)
     self.assertEqual(Bedpe.parse_score('.'), '.')
Beispiel #31
0
 def flag_as_pruned(bedpe):
     '''
     Mark a BEDPE info field(s) as RETAINED
     '''
     if not Bedpe.parse_info_tag(bedpe.info, 'RETAINED'):
         bedpe.set_info('RETAINED', None)
Beispiel #32
0
 def flag_as_pruned(bedpe):
     '''
     Mark a BEDPE info field(s) as RETAINED
     '''
     if not Bedpe.parse_info_tag(bedpe.info, 'RETAINED'):
         bedpe.set_info('RETAINED', None)
Beispiel #33
0
 def test_parse_score(self):
     self.assertEqual(Bedpe.parse_score('20'), 20)
     self.assertEqual(Bedpe.parse_score('.'), '.')
Beispiel #34
0
    def test_set_info(self):
        entry1 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND'
        ]
        b1 = Bedpe(entry1)
        b1.set_info('AF', '0.2')
        self.assertEqual(b1.info, 'SVTYPE=BND;AF=0.2')

        entry2 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND',
            'MISSING'
        ]
        b2 = Bedpe(entry2)
        b2.set_info('AF', '0.2')
        self.assertEqual(b2.info, 'SVTYPE=BND;AF=0.2')

        entry3 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND',
            'SECONDARY'
        ]
        b3 = Bedpe(entry3)
        b3.set_info('AF', '0.2')
        self.assertEqual(b3.info1, 'SVTYPE=BND;AF=0.2')
        self.assertEqual(b3.info2, 'SECONDARY;AF=0.2')

        entry4 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', '.'
        ]
        b4 = Bedpe(entry4)
        b4.set_info('PRESENT', None)
        self.assertEqual(b4.info, 'SVTYPE=BND;PRESENT')
        self.assertEqual(b4.info2, '.')
Beispiel #35
0
def varLookup(aFile, bFile, bedpe_out, max_distance, pass_prefix, cohort_name):
    # FIXME The following code is heavily duplicated with vcftobedpe and bedpetovcf. Harmonize!!!
    bList = list()
    headerObj = Vcf()  #co-opt the VCF header object
    if cohort_name is None:
        cohort_name = str(str(bFile).split('/')[-1])

    if bFile == "stdin":
        bData = sys.stdin
    elif bFile.endswith('.gz'):
        bData = gzip.open(bFile, 'rb')
    else:
        bData = open(bFile, 'r')
    for bLine in bData:
        if bLine.startswith(pass_prefix):
            continue
        bentry = Bedpe(bLine.rstrip().split('\t'))
        if bentry.af is None:
            sys.stderr.write(
                'No allele frequency for variant found in -b file. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n'
            )
            sys.exit(1)
        bList.append(bentry)

    if aFile == "stdin":
        aData = sys.stdin
    elif aFile.endswith('.gz'):
        aData = gzip.open(aFile, 'rb')
    else:
        aData = open(aFile, 'r')
    in_header = True
    header_lines = []
    sample_list = None
    for aLine in aData:
        if pass_prefix is not None and aLine.startswith(pass_prefix):
            if aLine[0] == '#' and aLine[1] != '#':
                sample_list = aLine.rstrip().split('\t', 14)[-1]
            else:
                header_lines.append(aLine)
            continue
        else:
            if in_header == True:
                headerObj.add_header(header_lines)
                headerObj.add_info(
                    cohort_name + '_AF', '.', 'Float',
                    'Allele frequency(ies) for matching variants found in the '
                    + cohort_name + ' vcf' + ' (' +
                    str(str(bFile).split('/')[-1]) + ')')
                headerObj.add_info(
                    cohort_name + '_VarID', '.', 'Integer',
                    'List of Variant ID(s) for matching variants found in the '
                    + cohort_name + ' vcf' + ' (' +
                    str(str(bFile).split('/')[-1]) + ')')

                header = headerObj.get_header()
                bedpe_out.write(header[:header.rfind('\n')] + '\n')
                if len(sample_list) > 0:
                    bedpe_out.write('\t'.join([
                        '#CHROM_A', 'START_A', 'END_A', 'CHROM_B', 'START_B',
                        'END_B', 'ID', 'QUAL', 'STRAND_A', 'STRAND_B', 'TYPE',
                        'FILTER', 'INFO_A', 'INFO_B', sample_list
                    ]) + '\n')
                else:
                    bedpe_out.write('\t'.join([
                        '#CHROM_A', 'START_A', 'END_A', 'CHROM_B', 'START_B',
                        'END_B', 'ID', 'QUAL', 'STRAND_A', 'STRAND_B', 'TYPE',
                        'FILTER', 'INFO_A', 'INFO_B'
                    ]) + '\n')
                in_header = False
            a = Bedpe(aLine.rstrip().split('\t'))
            if a.af is None:
                sys.stderr.write(
                    'No allele frequency for variant found in -a file. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n'
                )
                sys.exit(1)
            for b in bList:
                add(a, b, max_distance)
            bedpe_out.write(get_var_string(a, cohort_name) + '\n')
Beispiel #36
0
 def test__combine_sname_values(self):
     self.assertEqual(set(Bedpe._combine_sname_values('sample1:2', 'sample2:4,sample3:5').split(',')), set(['sample1:2', 'sample2:4', 'sample3:5']))
     self.assertEqual(Bedpe._combine_sname_values(None, 'sample2:4,sample3:5'), 'sample2:4,sample3:5')
     self.assertEqual(Bedpe._combine_sname_values('sample2:4,sample3:5', None), 'sample2:4,sample3:5')
    def convert(self, primary_variant, secondary_variant=None):
        '''
        Convert the passed VCF variant(s) into a BEDPE object
        '''
        vcf_variant = primary_variant
        if primary_variant is None:
            vcf_variant = secondary_variant

        try:
            sv_type = vcf_variant.info['SVTYPE']
        except KeyError:
            raise ValueError('SVTYPE field required for conversion to BEDPE')

        parser = self.simple_breakpoints
        if sv_type == 'BND':
            parser = self.bnd_breakpoints

        c1, s1, e1, c2, s2, e2, o1, o2 = parser(vcf_variant)

        s1, e1 = self.adjust_coordinate(vcf_variant, 'CIPOS', s1, e1)
        s2, e2 = self.adjust_coordinate(vcf_variant, 'CIEND', s2, e2)

        orig_name_a = vcf_variant.var_id
        orig_ref_a = vcf_variant.ref
        orig_alt_a = vcf_variant.alt
        info_a = vcf_variant.get_info_string()
        if primary_variant is None:
            info_a = "MISSING"
            orig_name_a = orig_ref_a = orig_alt_a = '.'
            c1, s1, e1, o1, c2, s2, e2, o2 = c2, s2, e2, o2, c1, s1, e1, o1

        info_b = '.'
        orig_name_b = orig_ref_b = orig_alt_b = '.'
        if sv_type == 'BND':
            if secondary_variant is None:
                info_b = "MISSING"
            else:
                info_b = secondary_variant.get_info_string()
                orig_name_b = secondary_variant.var_id
                orig_ref_b = secondary_variant.ref
                orig_alt_b = secondary_variant.alt
                sc1, ss1, se1, sc2, ss2, se2, so1, so2 = parser(
                    secondary_variant)
                s2, e2 = self.adjust_coordinate(secondary_variant, 'CIPOS',
                                                ss1, se1)

        # For MANTA single-ended BNDs, EVENT is not present.
        # XXX This has probably already been calculated outside of this method. May be a candidate to memoize or otherwise cache?
        # By adding to the variant class, perhaps?
        name = vcf_variant.var_id
        if 'EVENT' in vcf_variant.info:
            name = vcf_variant.info['EVENT']
        elif 'MATEID' in vcf_variant.info and vcf_variant.var_id.startswith(
                'Manta'):
            # Specifically handle Manta
            name, end = vcf_variant.var_id.rsplit(':', 1)

        fields = map(str, [
            c1,
            max(s1, 0),
            max(e1, 0),
            c2,
            max(s2, 0),
            max(e2, 0),
            name,
            vcf_variant.qual,
            o1,
            o2,
            sv_type,
            vcf_variant.filter,
            orig_name_a,
            orig_ref_a,
            orig_alt_a,
            orig_name_b,
            orig_ref_b,
            orig_alt_b,
            info_a,
            info_b,
        ])
        if vcf_variant.get_format_string() is not None:
            fields += [
                vcf_variant.get_format_string(),
                vcf_variant.get_gt_string()
            ]
        return Bedpe(fields)
Beispiel #38
0
 def test_sname_value(self):
     self.assertEqual(Bedpe.sname_value('SNAME=sample1:2,sample2:3'), 'sample1:2,sample2:3')
     self.assertIsNone(Bedpe.sname_value('AF'))
     self.assertIsNone(Bedpe.sname_value('SNAME='))
Beispiel #39
0
    def test_set_info(self):
        entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND' ]
        b1 = Bedpe(entry1)
        b1.set_info('AF', '0.2')
        self.assertEqual(b1.info, 'SVTYPE=BND;AF=0.2')

        entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'MISSING' ]
        b2 = Bedpe(entry2)
        b2.set_info('AF', '0.2')
        self.assertEqual(b2.info, 'SVTYPE=BND;AF=0.2')

        entry3 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SECONDARY' ]
        b3 = Bedpe(entry3)
        b3.set_info('AF', '0.2')
        self.assertEqual(b3.info1, 'SVTYPE=BND;AF=0.2')
        self.assertEqual(b3.info2, 'SECONDARY;AF=0.2')

        entry4 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', '.' ]
        b4 = Bedpe(entry4)
        b4.set_info('PRESENT', None)
        self.assertEqual(b4.info, 'SVTYPE=BND;PRESENT')
        self.assertEqual(b4.info2, '.')