Exemple #1
0
    def test_add(self):
        bedpe1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.2' ]
        b1 = Bedpe(bedpe1)

        bedpe2= [ '1', '195', '305', '2', '295', '405', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.3' ]
        b2 = Bedpe(bedpe2)

        c = Cluster()
        c.add(b1, None)
        self.assertEqual(c.size, 1)
        self.assertEqual(c.sv_event, 'BND')
        self.assertEqual(c.filter, '0.2')
        self.assertEqual(c.chrom_a, '1')
        self.assertEqual(c.min_a, 200)
        self.assertEqual(c.max_a, 300)
        self.assertEqual(c.chrom_b, '2')
        self.assertEqual(c.min_b, 300)
        self.assertEqual(c.max_b, 400)
        self.assertEqual(c.strand_a, '+')
        self.assertEqual(c.strand_b, '-')

        c.add(b2, None)
        self.assertEqual(c.size, 2)
        self.assertEqual(c.sv_event, 'BND')
        self.assertEqual(c.filter, '0.3')
        self.assertEqual(c.chrom_a, '1')
        self.assertEqual(c.min_a, 195)
        self.assertEqual(c.max_a, 305)
        self.assertEqual(c.chrom_b, '2')
        self.assertEqual(c.min_b, 295)
        self.assertEqual(c.max_b, 405)
        self.assertEqual(c.strand_a, '+')
        self.assertEqual(c.strand_b, '-')
Exemple #2
0
    def cluster_bedpe(self, in_file, bedpe_out, is_sorted):
        # Locally alias instance variables
        max_distance = self.max_distance
        eval_param = self.eval_param

        in_header = True
        for line in in_file:
            if line.startswith('#') and in_header:
                if line.startswith('#CHROM'):
                    bedpe_out.write('##INFO=<ID=RETAINED,Number=0,Type=Flag,Description="Variants clustering with this call were pruned">\n')
                bedpe_out.write(line)
                continue
            in_header = False
            self.bedpe_lines += 1
            bedpe = Bedpe(line.rstrip().split('\t'))
            if bedpe.af is None:
                sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n')
                sys.exit(1)
            if bedpe.af == '.':
                self.skipped_lines += 1
                continue
            matched_clusters = []
            for cluster in self.cluster_list:
                if cluster.can_add(bedpe, max_distance):
                    cluster.add(bedpe, eval_param)
                    matched_clusters.append(cluster)
            if not matched_clusters:
                new_cluster = Cluster()
                new_cluster.add(bedpe, eval_param)
                self.cluster_list.append(new_cluster)
            else:
                if len(matched_clusters) > 1:
                    i = 0
                    pruned_clusters = []
                    while i < (len(matched_clusters) - 1):
                        j = i + 1
                        to_delete = set()
                        while j < len(matched_clusters):
                            if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance):
                                matched_clusters[i].add(matched_clusters[j].elements[0], eval_param)
                                pruned_clusters.append(matched_clusters[j])
                                to_delete.add(j)
                            j += 1
                        for index in sorted(to_delete, reverse=True):
                            del matched_clusters[index]
                        i += 1
                    if pruned_clusters:
                        self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in pruned_clusters]
            #prune and print eligible clusters
            if self.bedpe_lines % 1000 == 0 and is_sorted:
                self.cluster_list = self.prune(bedpe,
                                     False,
                                     bedpe_out)

        self.cluster_list = self.prune(None,
                             True,
                             bedpe_out)

        sys.stderr.write(self.stats_report_string())
        return
Exemple #3
0
    def cluster_bedpe(self, in_file, bedpe_out, is_sorted):
        # Locally alias instance variables
        max_distance = self.max_distance
        eval_param = self.eval_param

        in_header = True
        for line in in_file:
            if line.startswith('#') and in_header:
                if line.startswith('#CHROM'):
                    bedpe_out.write('##INFO=<ID=RETAINED,Number=0,Type=Flag,Description="Variants clustering with this call were pruned">\n')
                bedpe_out.write(line)
                continue
            in_header = False
            self.bedpe_lines += 1
            bedpe = Bedpe(line.rstrip().split('\t'))
            if bedpe.af is None:
                sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n')
                sys.exit(1)
            if bedpe.af == '.':
                self.skipped_lines += 1
                continue
            matched_clusters = []
            for cluster in self.cluster_list:
                if cluster.can_add(bedpe, max_distance):
                    cluster.add(bedpe, eval_param)
                    matched_clusters.append(cluster)
            if not matched_clusters:
                new_cluster = Cluster()
                new_cluster.add(bedpe, eval_param)
                self.cluster_list.append(new_cluster)
            else:
                if len(matched_clusters) > 1:
                    i = 0
                    pruned_clusters = []
                    while i < (len(matched_clusters) - 1):
                        j = i + 1
                        to_delete = set()
                        while j < len(matched_clusters):
                            if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance):
                                matched_clusters[i].add(matched_clusters[j].elements[0], eval_param)
                                pruned_clusters.append(matched_clusters[j])
                                to_delete.add(j)
                            j += 1
                        for index in sorted(to_delete, reverse=True):
                            del matched_clusters[index]
                        i += 1
                    if pruned_clusters:
                        self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in pruned_clusters]
            #prune and print eligible clusters
            if self.bedpe_lines % 1000 == 0 and is_sorted:
                self.cluster_list = self.prune(bedpe,
                                     False,
                                     bedpe_out)

        self.cluster_list = self.prune(None,
                             True,
                             bedpe_out)

        sys.stderr.write(self.stats_report_string())
        return
Exemple #4
0
    def cluster_bedpe(self, in_file, bedpe_out, is_sorted):
        # Locally alias instance variables
        max_distance = self.max_distance
        eval_param = self.eval_param

        in_header = True
        for line in in_file:
            if line.startswith('#') and in_header:
                bedpe_out.write(line)
                continue
            in_header = False
            self.bedpe_lines += 1
            bedpe = Bedpe(line.rstrip().split('\t'))
            if bedpe.af is None:
                sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n')
                sys.exit(1)
            if bedpe.af == '.':
                self.skipped_lines += 1
                continue
            matched_clusters = []
            for cluster in self.cluster_list:
                if cluster.can_add(bedpe, max_distance):
                    cluster.add(bedpe, eval_param)
                    matched_clusters.append(cluster)
            if not matched_clusters:
                new_cluster = Cluster()
                new_cluster.add(bedpe, eval_param)
                self.cluster_list.append(new_cluster)
            else:
                if len(matched_clusters) > 1:
                    i = 0
                    matched_cluster_pruned = False
                    while i < (len(matched_clusters) - 1):
                        j = i + 1
                        while j < len(matched_clusters):
                            if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance):
                                matched_clusters[i].add(matched_clusters[j].elements[0], eval_param)
                                matched_cluster_pruned = True
                                del matched_clusters[j]
                            j += 1
                        i += 1        
                    if matched_cluster_pruned:
                        self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in matched_clusters]
            #prune and print eligible clusters
            if self.bedpe_lines % 1000 == 0 and is_sorted:
                self.cluster_list = self.prune(bedpe,
                                     False,
                                     bedpe_out)
    
        self.cluster_list = self.prune(None,
                             True,
                             bedpe_out)

        sys.stderr.write(self.stats_report_string())
        return
Exemple #5
0
    def test_get_cluster_string(self):
        bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.2' ]
        b = Bedpe(bedpe)

        c = Cluster()

        with self.assertRaises(ValueError):
            c.get_cluster_string()
        
        c.add(b, None)
        self.assertEqual(c.get_cluster_string(), str(b))
Exemple #6
0
    def test_get_cluster_string(self):
        bedpe = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b = Bedpe(bedpe)

        c = Cluster()

        with self.assertRaises(ValueError):
            c.get_cluster_string()

        c.add(b, None)
        self.assertEqual(c.get_cluster_string(), str(b))
Exemple #7
0
    def test_add(self):
        bedpe1 = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b1 = Bedpe(bedpe1)

        bedpe2 = [
            '1', '195', '305', '2', '295', '405', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.3'
        ]
        b2 = Bedpe(bedpe2)

        c = Cluster()
        c.add(b1, None)
        self.assertEqual(c.size, 1)
        self.assertEqual(c.sv_event, 'BND')
        self.assertEqual(c.filter, '0.2')
        self.assertEqual(c.chrom_a, '1')
        self.assertEqual(c.min_a, 200)
        self.assertEqual(c.max_a, 300)
        self.assertEqual(c.chrom_b, '2')
        self.assertEqual(c.min_b, 300)
        self.assertEqual(c.max_b, 400)
        self.assertEqual(c.strand_a, '+')
        self.assertEqual(c.strand_b, '-')

        c.add(b2, None)
        self.assertEqual(c.size, 2)
        self.assertEqual(c.sv_event, 'BND')
        self.assertEqual(c.filter, '0.3')
        self.assertEqual(c.chrom_a, '1')
        self.assertEqual(c.min_a, 195)
        self.assertEqual(c.max_a, 305)
        self.assertEqual(c.chrom_b, '2')
        self.assertEqual(c.min_b, 295)
        self.assertEqual(c.max_b, 405)
        self.assertEqual(c.strand_a, '+')
        self.assertEqual(c.strand_b, '-')
Exemple #8
0
    def test_can_add(self):
        bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.2' ]
        b = Bedpe(bedpe)

        c = Cluster()
        c.chrom_a = b.c1
        c.chrom_b = b.c2
        c.min_a = b.s1
        c.max_a = b.e1
        c.min_b = b.s2
        c.max_b = b.e2
        c.strand_a = b.o1
        c.strand_b = b.o2

        self.assertTrue(c.can_add(b, 1))
        c.size = 1

        c.sv_event = 'DEL'
        self.assertFalse(c.can_add(b, 1))

        c.sv_event = 'BND'
        self.assertTrue(c.can_add(b, 1))

        c.chrom_a = 'X'
        self.assertFalse(c.can_add(b, 1))

        c.chrom_a = b.c1
        c.chrom_b = 'X'
        self.assertFalse(c.can_add(b, 1))


        c.chrom_b = b.c2
        c.min_a = 305
        self.assertFalse(c.can_add(b, 1))

        c.min_a = b.s1
        c.max_a = 150
        self.assertFalse(c.can_add(b, 1))

        c.max_a = b.e1
        c.min_b = 405
        self.assertFalse(c.can_add(b, 1))

        c.min_b = b.s1
        c.max_b = 150
        self.assertFalse(c.can_add(b, 1))
Exemple #9
0
    def test_can_add(self):
        bedpe = [
            '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-',
            'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING',
            'SVTYPE=BND;AF=0.2'
        ]
        b = Bedpe(bedpe)

        c = Cluster()
        c.chrom_a = b.c1
        c.chrom_b = b.c2
        c.min_a = b.s1
        c.max_a = b.e1
        c.min_b = b.s2
        c.max_b = b.e2
        c.strand_a = b.o1
        c.strand_b = b.o2

        self.assertTrue(c.can_add(b, 1))
        c.size = 1

        c.sv_event = 'DEL'
        self.assertFalse(c.can_add(b, 1))

        c.sv_event = 'BND'
        self.assertTrue(c.can_add(b, 1))

        c.chrom_a = 'X'
        self.assertFalse(c.can_add(b, 1))

        c.chrom_a = b.c1
        c.chrom_b = 'X'
        self.assertFalse(c.can_add(b, 1))

        c.chrom_b = b.c2
        c.min_a = 305
        self.assertFalse(c.can_add(b, 1))

        c.min_a = b.s1
        c.max_a = 150
        self.assertFalse(c.can_add(b, 1))

        c.max_a = b.e1
        c.min_b = 405
        self.assertFalse(c.can_add(b, 1))

        c.min_b = b.s1
        c.max_b = 150
        self.assertFalse(c.can_add(b, 1))