コード例 #1
0
def screen_maf(qa_file, maf_file):
    """
    Screen the .maf file based on the cluster info in the qa_file
    """
    clusters = read_clusters(qa_file)
    filtered_maf = maf_file + ".filtered"

    screened_alignments = set() 
    for cluster in clusters:
        for anchor in cluster:
            score = anchor[-1]
            if score!=0:
                screened_alignments.add(anchor)

    fp = file(maf_file)
    reader = maf.Reader(fp)

    fw = file(filtered_maf, "w")
    writer = maf.Writer(fw)

    for rec in reader:
        alignment = []
        for c in rec.components:
            chr, left, right, strand, score = c.src, c.forward_strand_start, \
                    c.forward_strand_end, c.strand, rec.score
            alignment.append((chr, left, right, strand, score))

        cluster = alignment_to_cluster(alignment)
        if cluster[0] in screened_alignments:
            writer.write(rec)

    fp.close()

    print >>sys.stderr, "write (%d) alignments to '%s'" % \
            (len(screened_alignments), filtered_maf)
コード例 #2
0
ファイル: maf_utils.py プロジェクト: xdwang1991/coge
def screen_maf(qa_file, maf_file):
    """
    Screen the .maf file based on the cluster info in the qa_file
    """
    clusters = read_clusters(qa_file)
    filtered_maf = maf_file + ".filtered"

    screened_alignments = set()
    for cluster in clusters:
        for anchor in cluster:
            score = anchor[-1]
            if score != 0:
                screened_alignments.add(anchor)

    fp = file(maf_file)
    reader = maf.Reader(fp)

    fw = file(filtered_maf, "w")
    writer = maf.Writer(fw)

    for rec in reader:
        alignment = []
        for c in rec.components:
            chr, left, right, strand, score = c.src, c.forward_strand_start, \
                    c.forward_strand_end, c.strand, rec.score
            alignment.append((chr, left, right, strand, score))

        cluster = alignment_to_cluster(alignment)
        if cluster[0] in screened_alignments:
            writer.write(rec)

    fp.close()

    print >>sys.stderr, "write (%d) alignments to '%s'" % \
            (len(screened_alignments), filtered_maf)
コード例 #3
0
def get_depth(fn, q, axis=0):
    intervals = []
    clusters = read_clusters(fn)
    intervals = []
    length = 0
    for cluster in clusters:
        interval = [x[axis][1] for x in cluster]
        intervals.append(interval)
        length = max(length, max(interval))
    length += 1
    print >>sys.stderr, length, "total"
    
    depths = np.zeros(length, np.uint8)
    for interval in intervals:
        start, stop = min(interval), max(interval)
        depths[start:stop+1] += 1

    cutoff = int(q.split(":")[abs(axis-1)])
    exceed_quota = depths[depths>cutoff].shape[0]

    anchor_num = sum(len(c) for c in clusters) 
    cluster_num = len(clusters)
    return anchor_num, cluster_num, exceed_quota, length
コード例 #4
0
            qa, qb = int(qa), int(qb)
        except:
            print >> sys.stderr, "quota string should be the form x:x (2:4, 1:3, etc.)"
            sys.exit(1)

        if options.self_match and qa != qb:
            raise Exception, "when comparing genome to itself, " \
                    "quota must be the same number " \
                    "(like 1:1, 2:2) you have %s" % options.quota
        if qa > 12 or qb > 12:
            raise Exception, "quota %s too loose, make it <=12 each" % options.quota
        quota = (qa, qb)

    self_match = options.self_match

    clusters = read_clusters(qa_file, fmt=options.format)
    for cluster in clusters:
        assert len(cluster) > 0

    # below runs `block merging`
    if options.merge:
        chain = range(len(clusters))
        chain = merge_clusters(chain,
                               clusters,
                               Dmax=options.Dmax,
                               min_size=options.min_size)

        merged_qa_file = qa_file + ".merged"
        fw = file(merged_qa_file, "w")
        clusters = [clusters[c] for c in chain]
        write_clusters(fw, clusters)
コード例 #5
0
            qa, qb = int(qa), int(qb)
        except:
            print >>sys.stderr, "quota string should be the form x:x (2:4, 1:3, etc.)"
            sys.exit(1)

        if options.self_match and qa!=qb:
            raise Exception, "when comparing genome to itself, " \
                    "quota must be the same number " \
                    "(like 1:1, 2:2) you have %s" % options.quota
        if qa > 12 or qb > 12:
            raise Exception, "quota %s too loose, make it <=12 each" % options.quota
        quota = (qa, qb) 

    self_match = options.self_match

    clusters = read_clusters(qa_file, fmt=options.format)
    for cluster in clusters:
        assert len(cluster) > 0

    # below runs `block merging`
    if options.merge: 
        chain = range(len(clusters))
        chain = merge_clusters(chain, clusters, Dmax=options.Dmax, min_size=options.min_size)

        merged_qa_file = qa_file + ".merged"
        fw = file(merged_qa_file, "w")
        clusters = [clusters[c] for c in chain]
        write_clusters(fw, clusters)

    total_len_x, total_len_y = calc_coverage(clusters, self_match=self_match)