コード例 #1
0
def train_vblur_from_meta(hist_fn, cds_fa, vblur_fname):
    """ procedure pipeline """
    cds_range = get_cds_range(cds_fa)
    tlist = parse_rlen_hist(hist_fn)
    b, ptrue, eps = meta_pipeline(tlist, cds_range, utr5_offset, imax,
                                  rlen_min, rlen_max, converge_cutoff)
    write_vblur(b, vblur_fname)
コード例 #2
0
def deblur_pipeline(bam_fname, cds_fa, oprefix, force):
    """ full pipeline for deblur ribo profiles for a given sample """
    # step 0: prepare input parameters
    odir = os.path.dirname(oprefix)
    if odir and not os.path.exists(odir): os.makedirs(odir)
    if oprefix.endswith("/"): oprefix += "ribo"
    raw_hist = "{}_raw.hist".format(oprefix)
    high_cov_hist = "{}_hc.hist".format(oprefix)
    vblur_fname = "{}.vblur".format(oprefix)
    eps_fname = "{}.eps".format(oprefix)
    profile_fname = "{}.profile".format(oprefix)
    # step 1: generate length-specific profiles
    if not os.path.exists(raw_hist) or force == True:
        group_reads_by_length(bam_fname, raw_hist)
        if not os.path.exists(raw_hist) or os.path.getsize(raw_hist) == 0:
            print("FATAL: deblur_pipeline failed at group_reads_by_len!", file=sys.stderr)
            print("abort program!", file=sys.stderr)
            exit(1)
    else: print("length-specific profile exists, use cached", file=sys.stderr)
    # step 2: filter high-coverage profiles
    if not os.path.exists(high_cov_hist) or force == True:
        filter_high_cover_profile(raw_hist, cds_fa, cover_ratio, cnt_threshold, high_cov_hist)
        if not os.path.exists(high_cov_hist) or os.path.getsize(high_cov_hist) == 0:
            print("FATAL: deblur_pipeline failed at filter_high_cover_profile!", file=sys.stderr)
            print("abort program!", file=sys.stderr)
            exit(1)
    else: print("high-coverage profile exists, use cached", file=sys.stderr)
    # step 3: train blur vector from meta profiles
    if not os.path.exists(vblur_fname) or force == True:
        train_vblur_from_meta(high_cov_hist, cds_fa, vblur_fname)
        if not os.path.exists(vblur_fname) or os.path.getsize(vblur_fname) == 0:
            print("FATAL: deblur_pipeline failed at train_vblur_from_meta!", file=sys.stderr)
            print("abort program!", file=sys.stderr)
            exit(1)
    else: print("vblur file exists, use cached", file=sys.stderr)
    # step 4: deblur high-coverage profiles
    if not os.path.exists(eps_fname) or force == True:
        deblur_transcripts(high_cov_hist, cds_fa, vblur_fname, eps_fname)
        if not os.path.exists(eps_fname) or os.path.getsize(eps_fname) == 0:
            print("FATAL: deblur_pipeline failed at deblur_transcripts!", file=sys.stderr)
            print("abort program!", file=sys.stderr)
            exit(1)
    else: print("deblur file exits, use cached", file=sys.stderr)
    # step 5: combine length-specific profiles
    if not os.path.exists(profile_fname) or force == True:
        cds_range = get_cds_range(cds_fa)
        ctrue_merge = construct_deblur_profiles(eps_fname, vblur_fname, raw_hist, cds_range)
        aprof = batch_build_Aprof(ctrue_merge, cds_range, -utr5_offset, asite_offset) 
        write_profiles(aprof, profile_fname)
        if not os.path.exists(profile_fname) or os.path.getsize(profile_fname) == 0:
            print("FATAL: deblur_pipeline failed at combine_profile!", file=sys.stderr)
            print("abort program!", file=sys.stderr)
            exit(1)
    else: print("final results exists, nothing needs to be done", file=sys.stderr)
コード例 #3
0
def filter_high_cover_profile(hist_fn, cds_fa, cover_ratio, cnt_threshold,
                              ofname):
    """ pipeline for filtering high coverage profiles """
    cds_range = get_cds_range(cds_fa)
    tlist = parse_rlen_hist(hist_fn)
    tprofile = get_transcript_profiles(tlist, cds_range, utr5_offset,
                                       utr3_offset)
    pcelebrity = filter_transcript_profiles(tprofile, cds_range, cnt_threshold,
                                            cover_ratio)
    tid2rid = {t['tid']: rid for rid, t in tlist.items()}
    write_rlen_hist(pcelebrity, cds_range, tid2rid, ofname)
コード例 #4
0
        plt.show()


if __name__ == "__main__":
    if len(sys.argv) != 6:
        print "Usage: python frameshift_celebrity.py rlen.hist cds_range.txt rlen.vblur rlen.eps output_dir"
        exit(1)
    hist_fn = sys.argv[1]
    cds_txt = sys.argv[2]
    vblur_fname = sys.argv[3]
    deblur_fname = sys.argv[4]
    odir = sys.argv[5]
    ensure_dir(odir)
    fname = get_file_core(hist_fn)
    print "get cds range"
    cds_range = get_cds_range(cds_txt)
    print "parse read len hist file"
    tlist = parse_rlen_hist(hist_fn)
    print "get pre-computed blur vector"
    b = read_vblur(vblur_fname)
    print "get pre-computed deblur results"
    ptrue, eps = read_essentials(deblur_fname)
    print "construct cobs all at once"
    tprofile = get_transcript_profiles(tlist, cds_range, utr5_offset,
                                       utr3_offset)
    #cobs = build_cobs_with_shifts(tprofile, cds_range, utr5_offset, utr3_offset, rlen_min, rlen_max, klist)
    cobs = construct_all_cobs(tprofile, cds_range, utr5_offset, utr3_offset,
                              rlen_min, rlen_max)
    print "construct ctrue all at once"
    ctrue = batch_build_ctrue(ptrue, eps, cobs)
    tid_list = np.array(cobs.keys())
コード例 #5
0
    return means, varrs, skews

if __name__ == "__main__":
    if len(sys.argv) != 7:
        print "Usage: python elongation_rate.py transcript.fasta cds_range.txt rlen.hist rlen.vblur rlen.eps output_dir"
        exit(1)
    tfasta = sys.argv[1]
    cds_txt = sys.argv[2]
    hist_fn = sys.argv[3]
    vblur_fname = sys.argv[4]
    deblur_fname = sys.argv[5]
    odir = sys.argv[6]
    ensure_dir(odir)
    fname = get_file_core(hist_fn)
    print "get cds range"
    cds_range = get_cds_range(cds_txt)
    tseq = get_tseq(tfasta, cds_range)
    print "parse read len hist file"
    tlist = parse_rlen_hist(hist_fn)
    print "get pre-computed blur vector"
    b = read_vblur(vblur_fname)
    print "get pre-computed deblur results"
    ptrue, eps = read_essentials(deblur_fname)
    print "construct cobs all at once"
    tprofile = get_transcript_profiles(tlist, cds_range, utr5_offset, utr3_offset)
    # cobs = build_cobs_with_shifts(tprofile, cds_range, utr5_offset, utr3_offset, rlen_min, rlen_max, klist)
    cobs = construct_all_cobs(tprofile, cds_range, utr5_offset, utr3_offset, rlen_min, rlen_max)
    print "construct ctrue all at once"
    ctrue_rlen = batch_build_ctrue(ptrue, eps, cobs)
    mprof = { tid: merge_profiles(plist) for tid, plist in ctrue_rlen.iteritems() }
    base_prof = batch_build_Aprof(mprof, cds_range, -utr5_offset, asite_offset)