def screen_maf(qa_file, maf_file): """ Screen the .maf file based on the cluster info in the qa_file """ clusters = read_clusters(qa_file) filtered_maf = maf_file + ".filtered" screened_alignments = set() for cluster in clusters: for anchor in cluster: score = anchor[-1] if score!=0: screened_alignments.add(anchor) fp = file(maf_file) reader = maf.Reader(fp) fw = file(filtered_maf, "w") writer = maf.Writer(fw) for rec in reader: alignment = [] for c in rec.components: chr, left, right, strand, score = c.src, c.forward_strand_start, \ c.forward_strand_end, c.strand, rec.score alignment.append((chr, left, right, strand, score)) cluster = alignment_to_cluster(alignment) if cluster[0] in screened_alignments: writer.write(rec) fp.close() print >>sys.stderr, "write (%d) alignments to '%s'" % \ (len(screened_alignments), filtered_maf)
def screen_maf(qa_file, maf_file): """ Screen the .maf file based on the cluster info in the qa_file """ clusters = read_clusters(qa_file) filtered_maf = maf_file + ".filtered" screened_alignments = set() for cluster in clusters: for anchor in cluster: score = anchor[-1] if score != 0: screened_alignments.add(anchor) fp = file(maf_file) reader = maf.Reader(fp) fw = file(filtered_maf, "w") writer = maf.Writer(fw) for rec in reader: alignment = [] for c in rec.components: chr, left, right, strand, score = c.src, c.forward_strand_start, \ c.forward_strand_end, c.strand, rec.score alignment.append((chr, left, right, strand, score)) cluster = alignment_to_cluster(alignment) if cluster[0] in screened_alignments: writer.write(rec) fp.close() print >>sys.stderr, "write (%d) alignments to '%s'" % \ (len(screened_alignments), filtered_maf)
def get_depth(fn, q, axis=0): intervals = [] clusters = read_clusters(fn) intervals = [] length = 0 for cluster in clusters: interval = [x[axis][1] for x in cluster] intervals.append(interval) length = max(length, max(interval)) length += 1 print >>sys.stderr, length, "total" depths = np.zeros(length, np.uint8) for interval in intervals: start, stop = min(interval), max(interval) depths[start:stop+1] += 1 cutoff = int(q.split(":")[abs(axis-1)]) exceed_quota = depths[depths>cutoff].shape[0] anchor_num = sum(len(c) for c in clusters) cluster_num = len(clusters) return anchor_num, cluster_num, exceed_quota, length
qa, qb = int(qa), int(qb) except: print >> sys.stderr, "quota string should be the form x:x (2:4, 1:3, etc.)" sys.exit(1) if options.self_match and qa != qb: raise Exception, "when comparing genome to itself, " \ "quota must be the same number " \ "(like 1:1, 2:2) you have %s" % options.quota if qa > 12 or qb > 12: raise Exception, "quota %s too loose, make it <=12 each" % options.quota quota = (qa, qb) self_match = options.self_match clusters = read_clusters(qa_file, fmt=options.format) for cluster in clusters: assert len(cluster) > 0 # below runs `block merging` if options.merge: chain = range(len(clusters)) chain = merge_clusters(chain, clusters, Dmax=options.Dmax, min_size=options.min_size) merged_qa_file = qa_file + ".merged" fw = file(merged_qa_file, "w") clusters = [clusters[c] for c in chain] write_clusters(fw, clusters)
qa, qb = int(qa), int(qb) except: print >>sys.stderr, "quota string should be the form x:x (2:4, 1:3, etc.)" sys.exit(1) if options.self_match and qa!=qb: raise Exception, "when comparing genome to itself, " \ "quota must be the same number " \ "(like 1:1, 2:2) you have %s" % options.quota if qa > 12 or qb > 12: raise Exception, "quota %s too loose, make it <=12 each" % options.quota quota = (qa, qb) self_match = options.self_match clusters = read_clusters(qa_file, fmt=options.format) for cluster in clusters: assert len(cluster) > 0 # below runs `block merging` if options.merge: chain = range(len(clusters)) chain = merge_clusters(chain, clusters, Dmax=options.Dmax, min_size=options.min_size) merged_qa_file = qa_file + ".merged" fw = file(merged_qa_file, "w") clusters = [clusters[c] for c in chain] write_clusters(fw, clusters) total_len_x, total_len_y = calc_coverage(clusters, self_match=self_match)