def test_add(self): bedpe1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(bedpe1) bedpe2= [ '1', '195', '305', '2', '295', '405', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.3' ] b2 = Bedpe(bedpe2) c = Cluster() c.add(b1, None) self.assertEqual(c.size, 1) self.assertEqual(c.sv_event, 'BND') self.assertEqual(c.filter, '0.2') self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 200) self.assertEqual(c.max_a, 300) self.assertEqual(c.chrom_b, '2') self.assertEqual(c.min_b, 300) self.assertEqual(c.max_b, 400) self.assertEqual(c.strand_a, '+') self.assertEqual(c.strand_b, '-') c.add(b2, None) self.assertEqual(c.size, 2) self.assertEqual(c.sv_event, 'BND') self.assertEqual(c.filter, '0.3') self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 195) self.assertEqual(c.max_a, 305) self.assertEqual(c.chrom_b, '2') self.assertEqual(c.min_b, 295) self.assertEqual(c.max_b, 405) self.assertEqual(c.strand_a, '+') self.assertEqual(c.strand_b, '-')
def cluster_bedpe(self, in_file, bedpe_out, is_sorted): # Locally alias instance variables max_distance = self.max_distance eval_param = self.eval_param in_header = True for line in in_file: if line.startswith('#') and in_header: if line.startswith('#CHROM'): bedpe_out.write('##INFO=<ID=RETAINED,Number=0,Type=Flag,Description="Variants clustering with this call were pruned">\n') bedpe_out.write(line) continue in_header = False self.bedpe_lines += 1 bedpe = Bedpe(line.rstrip().split('\t')) if bedpe.af is None: sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n') sys.exit(1) if bedpe.af == '.': self.skipped_lines += 1 continue matched_clusters = [] for cluster in self.cluster_list: if cluster.can_add(bedpe, max_distance): cluster.add(bedpe, eval_param) matched_clusters.append(cluster) if not matched_clusters: new_cluster = Cluster() new_cluster.add(bedpe, eval_param) self.cluster_list.append(new_cluster) else: if len(matched_clusters) > 1: i = 0 pruned_clusters = [] while i < (len(matched_clusters) - 1): j = i + 1 to_delete = set() while j < len(matched_clusters): if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance): matched_clusters[i].add(matched_clusters[j].elements[0], eval_param) pruned_clusters.append(matched_clusters[j]) to_delete.add(j) j += 1 for index in sorted(to_delete, reverse=True): del matched_clusters[index] i += 1 if pruned_clusters: self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in pruned_clusters] #prune and print eligible clusters if self.bedpe_lines % 1000 == 0 and is_sorted: self.cluster_list = self.prune(bedpe, False, bedpe_out) self.cluster_list = self.prune(None, True, bedpe_out) sys.stderr.write(self.stats_report_string()) return
def cluster_bedpe(self, in_file, bedpe_out, is_sorted): # Locally alias instance variables max_distance = self.max_distance eval_param = self.eval_param in_header = True for line in in_file: if line.startswith('#') and in_header: bedpe_out.write(line) continue in_header = False self.bedpe_lines += 1 bedpe = Bedpe(line.rstrip().split('\t')) if bedpe.af is None: sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n') sys.exit(1) if bedpe.af == '.': self.skipped_lines += 1 continue matched_clusters = [] for cluster in self.cluster_list: if cluster.can_add(bedpe, max_distance): cluster.add(bedpe, eval_param) matched_clusters.append(cluster) if not matched_clusters: new_cluster = Cluster() new_cluster.add(bedpe, eval_param) self.cluster_list.append(new_cluster) else: if len(matched_clusters) > 1: i = 0 matched_cluster_pruned = False while i < (len(matched_clusters) - 1): j = i + 1 while j < len(matched_clusters): if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance): matched_clusters[i].add(matched_clusters[j].elements[0], eval_param) matched_cluster_pruned = True del matched_clusters[j] j += 1 i += 1 if matched_cluster_pruned: self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in matched_clusters] #prune and print eligible clusters if self.bedpe_lines % 1000 == 0 and is_sorted: self.cluster_list = self.prune(bedpe, False, bedpe_out) self.cluster_list = self.prune(None, True, bedpe_out) sys.stderr.write(self.stats_report_string()) return
def test_get_cluster_string(self): bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b = Bedpe(bedpe) c = Cluster() with self.assertRaises(ValueError): c.get_cluster_string() c.add(b, None) self.assertEqual(c.get_cluster_string(), str(b))
def test_get_cluster_string(self): bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b = Bedpe(bedpe) c = Cluster() with self.assertRaises(ValueError): c.get_cluster_string() c.add(b, None) self.assertEqual(c.get_cluster_string(), str(b))
def test_add(self): bedpe1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(bedpe1) bedpe2 = [ '1', '195', '305', '2', '295', '405', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.3' ] b2 = Bedpe(bedpe2) c = Cluster() c.add(b1, None) self.assertEqual(c.size, 1) self.assertEqual(c.sv_event, 'BND') self.assertEqual(c.filter, '0.2') self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 200) self.assertEqual(c.max_a, 300) self.assertEqual(c.chrom_b, '2') self.assertEqual(c.min_b, 300) self.assertEqual(c.max_b, 400) self.assertEqual(c.strand_a, '+') self.assertEqual(c.strand_b, '-') c.add(b2, None) self.assertEqual(c.size, 2) self.assertEqual(c.sv_event, 'BND') self.assertEqual(c.filter, '0.3') self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 195) self.assertEqual(c.max_a, 305) self.assertEqual(c.chrom_b, '2') self.assertEqual(c.min_b, 295) self.assertEqual(c.max_b, 405) self.assertEqual(c.strand_a, '+') self.assertEqual(c.strand_b, '-')
def test_can_add(self): bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b = Bedpe(bedpe) c = Cluster() c.chrom_a = b.c1 c.chrom_b = b.c2 c.min_a = b.s1 c.max_a = b.e1 c.min_b = b.s2 c.max_b = b.e2 c.strand_a = b.o1 c.strand_b = b.o2 self.assertTrue(c.can_add(b, 1)) c.size = 1 c.sv_event = 'DEL' self.assertFalse(c.can_add(b, 1)) c.sv_event = 'BND' self.assertTrue(c.can_add(b, 1)) c.chrom_a = 'X' self.assertFalse(c.can_add(b, 1)) c.chrom_a = b.c1 c.chrom_b = 'X' self.assertFalse(c.can_add(b, 1)) c.chrom_b = b.c2 c.min_a = 305 self.assertFalse(c.can_add(b, 1)) c.min_a = b.s1 c.max_a = 150 self.assertFalse(c.can_add(b, 1)) c.max_a = b.e1 c.min_b = 405 self.assertFalse(c.can_add(b, 1)) c.min_b = b.s1 c.max_b = 150 self.assertFalse(c.can_add(b, 1))
def test_can_add(self): bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b = Bedpe(bedpe) c = Cluster() c.chrom_a = b.c1 c.chrom_b = b.c2 c.min_a = b.s1 c.max_a = b.e1 c.min_b = b.s2 c.max_b = b.e2 c.strand_a = b.o1 c.strand_b = b.o2 self.assertTrue(c.can_add(b, 1)) c.size = 1 c.sv_event = 'DEL' self.assertFalse(c.can_add(b, 1)) c.sv_event = 'BND' self.assertTrue(c.can_add(b, 1)) c.chrom_a = 'X' self.assertFalse(c.can_add(b, 1)) c.chrom_a = b.c1 c.chrom_b = 'X' self.assertFalse(c.can_add(b, 1)) c.chrom_b = b.c2 c.min_a = 305 self.assertFalse(c.can_add(b, 1)) c.min_a = b.s1 c.max_a = 150 self.assertFalse(c.can_add(b, 1)) c.max_a = b.e1 c.min_b = 405 self.assertFalse(c.can_add(b, 1)) c.min_b = b.s1 c.max_b = 150 self.assertFalse(c.can_add(b, 1))