Python BigWigFile.summarizeの例、bx.bbi.bigwig_file.BigWigFile.summarize Pythonの例

コード例 #1

0

ファイルを表示

ファイル: scan_footprint_scinal.py プロジェクト: Tarela/RivanaCode

def scan_fp(plusdnase, minusdnase, bed, out, upstream, downstream):
    p = BwIO(plusdnase)
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
    bwHandle1 = BigWigFile(open(plusdnase, 'rb'))
    bwHandle2 = BigWigFile(open(minusdnase, 'rb'))
    inf = open(bed)
    outf = open(out, 'w')
    for line in inf:
        ll = line.split()
        if not chrom_len.has_key(ll[0]):
            continue
        if int(ll[1]) < upstream:
            continue
        signal1 = bwHandle1.summarize(
            ll[0],
            int(ll[1]) - upstream,
            int(ll[2]) + downstream,
            (int(ll[2]) + downstream - int(ll[1]) + upstream))
        signal2 = bwHandle2.summarize(
            ll[0],
            int(ll[1]) - upstream,
            int(ll[2]) + downstream,
            (int(ll[2]) + downstream - int(ll[1]) + upstream))
        #ll.append(str(float(signal.sum_data)))
        newll = ll[:6] + map(str, list(signal1.sum_data)) + map(
            str, list(signal2.sum_data))
        outf.write("\t".join(newll) + "\n")
    inf.close()
    outf.close()

コード例 #2

0

ファイルを表示

ファイル: Score_FTD.py プロジェクト: Tarela/RivanaCode

def getsignal(inputfile,outputfile,pcut,DHT,Veh,pspan):

    
#    p=BwIO(pcut)
#    chrom_len = {}
#    for i in p.chromosomeTree['nodes']:
#        chrom_len[i['key']] = i['chromSize']
    pcutbw = BigWigFile(open(pcut, 'rb'))
    dht = BigWigFile(open(DHT, 'rb'))
    veh = BigWigFile(open(Veh, 'rb'))
    inf = open(inputfile)    
    testll = inf.readline().split()
    ml = int(testll[2]) - int(testll[1])
    inf.seek(0)
    outf = open(outputfile,'w')

    for line in inf:
        ll = line.split()
#        if not chrom_len.has_key(ll[0]):
#            continue
        cut = list(pcutbw.summarize(ll[0],int(ll[1]) + ml/2 -pspan ,int(ll[1]) + ml/2 +pspan ,2*pspan).sum_data)
        TC = sum(cut)
        C = sum(cut[(pspan-ml/2) : (pspan-ml/2+ml)])
        L = sum(cut[(pspan-ml/2-ml):(pspan-ml/2)])
        R = sum(cut[(pspan-ml/2+ml):(pspan-ml/2+2*ml)])
        FOS = -1*( (C+1)/(R+1) + (C+1)/(L+1) )
        dhtnum = sum(list(dht.summarize(ll[0],int(ll[1]) + ml/2 -pspan ,int(ll[1]) + ml/2 +pspan ,2).sum_data)) + 1
        vehnum = sum(list(veh.summarize(ll[0],int(ll[1]) + ml/2 -pspan ,int(ll[1]) + ml/2 +pspan ,2).sum_data)) + 1
        newll = ll + [TC,FOS,dhtnum,vehnum]
        outf.write("\t".join(map(str,newll))+"\n")

    outf.close()

コード例 #3

0

ファイルを表示

ファイル: get_simplexSig_encbiasMat_centerext_permuteSeq.py プロジェクト: Tarela/RivanaCode

def get_regionLevel_simplex_parameters(inputbed, outputbed, plusbw, minusbw,
                                       biasmat, ext, genome2bit):
    simplex_code = encoding()
    biasdict, flank = readBG(biasmat)
    B, B0, B1, B2 = paramest(biasdict)
    permuteSeq = {}
    inf = open("permuteSeq8mer.txt")
    for line in inf:
        ll = line.split()
        permuteSeq[ll[0]] = ll[1]
    inf.close()
    #    outitem = seq2biasParm("ACTCGCAA",B,simplex_code)
    #print B
    genome = twobitreader.TwoBitFile(genome2bit)
    #    seq = genome[chrm][(int(ll[1])-flank):(int(ll[1])+flank)].upper()

    plusBWH = BigWigFile(open(plusbw, 'rb'))
    minusBWH = BigWigFile(open(minusbw, 'rb'))

    inf = open(inputbed)
    outf = open(outputbed, 'w')
    for line in inf:
        ll = line.split()
        chrm = ll[0]
        center = (int(ll[1]) + int(ll[2])) / 2
        start = max(0, center - ext)
        end = center + ext
        plusSig = plusBWH.summarize(ll[0], start, end, end - start).sum_data
        minusSig = minusBWH.summarize(ll[0], start, end, end - start).sum_data
        if type(plusSig) == None or type(minusSig) == None:
            continue
        plusSequence = genome[chrm][(start - flank):(end + flank)].upper()
        minusSequence = genome[chrm][(start - flank + 1):(end + flank +
                                                          1)].upper()
        plus_data = numpy.array([0.0] * len(B))
        minus_data = numpy.array([0.0] * len(B))
        for i in range(len(plusSig)):
            #position = start + i
            pcuts = plusSig[i]
            if pcuts > 0:
                pseq = plusSequence[i:(i + 2 * flank)].upper()
                if not "N" in pseq:
                    p_out = seq2biasParm(permuteSeq[pseq], B, simplex_code)
                    plus_data += pcuts * p_out

        for i in range(len(minusSig)):
            #position = start + i
            mcuts = minusSig[i]
            if mcuts > 0:
                tmpseq = minusSequence[i:(i + 2 * flank)]
                if not "N" in tmpseq:
                    mseq = revcomp(tmpseq).upper()
                    m_out = seq2biasParm(permuteSeq[mseq], B, simplex_code)
                    minus_data += mcuts * m_out

        newll = ll + list(plus_data) + list(minus_data)
        outf.write("\t".join(map(str, newll)) + "\n")

    inf.close()
    outf.close()

コード例 #4

0

ファイルを表示

ファイル: get_signal_dym.py プロジェクト: Tarela/RivanaCode

def get_signal(inputfile, output, vp, vm, dp, dm):
    p = BwIO(vp)
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
    vpBw = BigWigFile(open(vp, 'rb'))
    vmBw = BigWigFile(open(vm, 'rb'))
    dpBw = BigWigFile(open(dp, 'rb'))
    dmBw = BigWigFile(open(dm, 'rb'))
    inf = open(inputfile)
    outf = open(output, 'w')
    colnames = [
        "chrom", "start", "end", "seq", "motifscore", "strand",
        "LncapARsignal", "LncapDNaseCutsite", "LncapDNaseFrag",
        "K562DNaseFrag", "LncapFP", "K562FP", "overARpeak", "VehPlus",
        "VehMinus", "DHTPlus", "DHTMinus"
    ]
    outf.write("\t".join(colnames) + "\n")
    for line in inf:
        if line.startswith("chrom"):
            continue
        ll = line.split()
        if not chrom_len.has_key(ll[0]):
            continue
        signal = vpBw.summarize(ll[0], int(ll[1]) - 50, int(ll[2]) + 50, 1)
        ll.append(str(float(signal.sum_data)))
        signal = vmBw.summarize(ll[0], int(ll[1]) - 50, int(ll[2]) + 50, 1)
        ll.append(str(float(signal.sum_data)))
        signal = dpBw.summarize(ll[0], int(ll[1]) - 50, int(ll[2]) + 50, 1)
        ll.append(str(float(signal.sum_data)))
        signal = dmBw.summarize(ll[0], int(ll[1]) - 50, int(ll[2]) + 50, 1)
        ll.append(str(float(signal.sum_data)))
        outf.write("\t".join(ll) + "\n")
    inf.close()
    outf.close()

コード例 #5

0

ファイルを表示

ファイル: Foot_scan.py プロジェクト: Tarela/RivanaCode

def summary(bwfile,bedfile,topnumber,out):
    total_result = []
    p=BwIO(bwfile)
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
    bwHandle=BigWigFile(open(bwfile, 'rb'))
    inf = open(bedfile)
    t = time.time()
    for line in inf:
        ll = line.split()
        ll[3]="-"
        if chrom_len.has_key(ll[0]):
            summary = bwHandle.summarize(ll[0],int(ll[1]),int(ll[2]),1)
            if summary.valid_count == 0:
                mean_value = 0
            else:
                mean_value = (summary.sum_data/summary.valid_count)[0]
            total_result.append(ll+[mean_value])
    inf.close()   
    total_result.sort(reverse=True,key=lambda x:x[-1])
    outf = open(out,'w')
    print "scaning 1st ",time.time()-t
    t=time.time()
    for i in range(topnumber):
        ll = total_result[i]
        summary = bwHandle.summarize(ll[0],int(ll[1]),int(ll[2]),(int(ll[2])-int(ll[1])))
        additional_value = ",".join(map(str,list(summary.sum_data)))
        result = map(str,(ll+[additional_value]))
        outf.write("\t".join(result)+"\n")
    outf.close()
    print "scaning 2nd ",time.time()-t

コード例 #6

0

ファイルを表示

ファイル: make_bg_bwDHS_twobit.py プロジェクト: Tarela/RivanaCode

def count_cut_nmers(fp, w_plus, w_minus, lflank, rflank, single_nmer_cutoff,
                    sequence):
    """
    count the number of cuts associated with each nmer in sequence covered by X.
    offset is the position of the cut to be associated with each nmer.
    if offset = 0 the first base of the tag is lined up with the nmer start
    """
    w_plus_H = BigWigFile(open(w_plus, 'rb'))
    w_minus_H = BigWigFile(open(w_minus, 'rb'))

    genome = twobitreader.TwoBitFile(sequence)
    # keep count of the number of occurrences of each n-mer

    seq_nmer_dict = {}

    cut_nmer_dict = {}

    for line in fp.readlines():
        ll = line.split()
        chrm = ll[0]
        start = int(ll[1])
        end = int(ll[2])
        seq = genome[chrm][(start - lflank):(end + rflank)].upper()
        cp = list(w_plus_H.summarize(ll[0], start, end, end - start).sum_data)
        cn = list(w_minus_H.summarize(ll[0], start, end, end - start).sum_data)
        #each = (len(ll)-5)/2
        #cp = (map(float,ll[5:(5+each)]))
        #cn = (map(float,ll[(5+each):(5+each*2)]))

        for k in range(len(cp)):

            p_cut = cp[k]
            n_cut = cn[k]

            p_seq = seq[k:(k + lflank + rflank)]
            n_seq = seq[(k + 1):(k + lflank + rflank + 1)]
            #     rev_n_seq = rev(n_seq)
            if 'N' not in p_seq and p_cut <= single_nmer_cutoff:
                try:
                    cut_nmer_dict[p_seq] += p_cut
                except:
                    cut_nmer_dict[p_seq] = p_cut
                try:
                    seq_nmer_dict[p_seq] += 1
                except:
                    seq_nmer_dict[p_seq] = 1
            if 'N' not in n_seq and n_cut <= single_nmer_cutoff:
                rev_n_seq = rev(n_seq)
                try:
                    cut_nmer_dict[rev_n_seq] += n_cut
                except:
                    cut_nmer_dict[rev_n_seq] = n_cut
                try:
                    seq_nmer_dict[rev_n_seq] += 1
                except:
                    seq_nmer_dict[rev_n_seq] = 1
    return seq_nmer_dict, cut_nmer_dict

コード例 #7

0

ファイルを表示

ファイル: scan_reads_proportion_old.py プロジェクト: Tarela/RivanaCode

def sitepro_scan(peak, outname, w_plus, w_minus, Cspan):

    inf = open(peak)
    w_plus_H = BigWigFile(open(w_plus, 'rb'))
    w_minus_H = BigWigFile(open(w_minus, 'rb'))

    outf_propPlus = open(outname + "_propcutPlus.bdg", 'w')
    outf_propMinus = open(outname + "_propcutMinus.bdg", 'w')

    for line in inf:
        ll = line.split()
        chrm = ll[0]
        start = int(ll[1])
        end = int(ll[2])
        if start - Cspan < 0:
            print ll
            continue
        plus_obj = w_plus_H.summarize(chrm, start - Cspan, end + Cspan,
                                      (end - start + 2 * Cspan))
        minus_obj = w_minus_H.summarize(chrm, start - Cspan, end + Cspan,
                                        (end - start + 2 * Cspan))
        if not plus_obj:
            plus_vector = numpy.array([0] * (end - start + 2 * Cspan)) + 1
        else:
            plus_vector = plus_obj.sum_data + 1
        if not minus_obj:
            minus_vector = numpy.array([0] * (end - start + 2 * Cspan)) + 1
        else:
            minus_vector = minus_obj.sum_data + 1

        roundN = 4
        #### assign bias to bp and proportion
        for outpos in range(Cspan, (end - start + Cspan)):

            this_plus_cuts_prop = round(
                plus_vector[outpos] /
                sum(plus_vector[(outpos - Cspan):(outpos + Cspan)]), roundN)
            this_minus_cuts_prop = round(
                minus_vector[outpos] /
                sum(minus_vector[(outpos - Cspan):(outpos + Cspan)]), roundN)

            out_chrm = chrm
            out_start = start + outpos - Cspan
            out_end = out_start + 1

            outf_propPlus.write("\t".join(
                map(str, [out_chrm, out_start, out_end, this_plus_cuts_prop]))
                                + "\n")
            outf_propMinus.write("\t".join(
                map(str, [out_chrm, out_start, out_end, this_minus_cuts_prop]))
                                 + "\n")

    outf_propPlus.close()
    outf_propMinus.close()

    inf.close()

コード例 #8

0

ファイルを表示

def sitepro_scan(peak, outp, outn, w_plus, w_minus, bgmatrix, span, gen,
                 lflank, rflank):
    nmer = lflank + rflank
    genome = twobitreader.TwoBitFile(gen)
    pBG, nBG = readBG(bgmatrix)
    inf = open(peak)
    w_plus_H = BigWigFile(open(w_plus, 'rb'))
    w_minus_H = BigWigFile(open(w_minus, 'rb'))

    outfp = open(outp, 'w')
    outfn = open(outn, 'w')
    for line in inf:  ### chr start end name motifscore strand FP DNase chip
        ll = line.split()  #####  3 below is flanking length
        chrm = ll[0]
        start = int(ll[1])
        end = int(ll[2])
        ## remove overflow
        if start - span - lflank <= 0:
            continue
        ## get cleavage
        p_sum = list(
            w_plus_H.summarize(chrm, start - span, end + span,
                               end - start + 2 * span).sum_data)
        n_sum = list(
            w_minus_H.summarize(chrm, start - span, end + span,
                                end - start + 2 * span).sum_data)
        ## get seqbias
        seq = genome[chrm][(start - span - lflank):(end + span + rflank)]
        if 'N' in seq.upper():
            continue
        pseq = seq[:-1]
        nseq = seq[1:]
        p = []
        n = []  ### bias
        for k in range(len(pseq) + 1 - nmer):
            p.append(pBG[pseq[k:(k + nmer)].upper()])
            n.append(nBG[nseq[k:(k + nmer)].upper()])

        for bp in range(len(p_sum) - 2 * span):
            ptotal = sum(p_sum[bp:(bp + 2 * span)])  ### total
            ntotal = sum(n_sum[bp:(bp + 2 * span)])
            pc = int(p_sum[bp + span])  #### observation cut
            nc = int(n_sum[bp + span])
            pbias = p[bp + span]
            nbias = n[bp + span]
            pbgtotal = sum(p[bp:(bp + span * 2)])
            nbgtotal = sum(n[bp:(bp + span * 2)])
            paraw = (pbias / pbgtotal) * ptotal
            naraw = (nbias / nbgtotal) * ntotal

            outfp.write("\t".join(map(str, [pc, ptotal, pbias, paraw])) + "\n")
            outfn.write("\t".join(map(str, [nc, ntotal, nbias, paraw])) + "\n")
    outfp.close()
    outfn.close()
    inf.close()

コード例 #9

0

ファイルを表示

def get_signal(inputfile, output, Pbw, Nbw, score_range):
    persudo = 0.2
    p = BwIO(Pbw)
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
    PH = BigWigFile(open(Pbw, 'rb'))
    NH = BigWigFile(open(Nbw, 'rb'))
    inf = open(inputfile)
    outf = open(output, 'w')
    for line in inf:
        ll = line.split()
        if not chrom_len.has_key(ll[0]):
            continue
        motif_len = int(ll[2]) - int(ll[1])
        Psignal = list(
            PH.summarize(ll[0], max(int(ll[1]) - 100, 0),
                         int(ll[1]) + 100, 200).sum_data)
        Nsignal = list(
            NH.summarize(ll[0], max(int(ll[1]) - 100, 0),
                         int(ll[1]) + 100, 200).sum_data)
        DNase = sum(Psignal) + sum(Nsignal)

        if ll[5] == '+':
            S_up_same = sum(Psignal[(100 - score_range):100])
            S_up_diff = sum(Nsignal[(100 - score_range):100])
            S_down_same = sum(Psignal[(100 + motif_len):100 + motif_len +
                                      score_range])
            S_down_diff = sum(Nsignal[(100 + motif_len):100 + motif_len +
                                      score_range])

        elif ll[5] == '-':
            S_up_same = sum(Nsignal[(100 + motif_len):100 + motif_len +
                                    score_range])
            S_up_diff = sum(Psignal[(100 + motif_len):100 + motif_len +
                                    score_range])
            S_down_same = sum(Nsignal[(100 - score_range):100])
            S_down_diff = sum(Psignal[(100 - score_range):100])
        else:
            print line
            sys.exit(1)

    #    if S_up_same == 0 or S_up_diff ==0 or S_down_same == 0 or S_down_diff == 0:
    #        continue
        FPscore1 = math.log((S_up_same + persudo) * (S_down_diff + persudo) /
                            ((S_up_diff + persudo) * (S_down_same + persudo)),
                            2)
        FPscore2 = math.sqrt(S_up_same) + math.sqrt(S_down_diff) - math.sqrt(
            S_up_diff) - math.sqrt(S_down_same)

        ll.extend([DNase, FPscore1, FPscore2])
        outf.write("\t".join(map(str, ll)) + "\n")
    inf.close()
    outf.close()

コード例 #10

0

ファイルを表示

def summary(bwfile1, bwfile2, bwfile_add, bedfile, topnumber, out):
    total_result = []
    p = BwIO(bwfile1)
    q = BwIO(bwfile2)
    chrom_len1 = {}
    chrom_len2 = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len1[i['key']] = i['chromSize']
    for i in q.chromosomeTree['nodes']:
        chrom_len2[i['key']] = i['chromSize']
    bwHandle1 = BigWigFile(open(bwfile1, 'rb'))
    bwHandle2 = BigWigFile(open(bwfile2, 'rb'))
    inf = open(bedfile)
    t = time.time()
    for line in inf:
        ll = line.split()
        ll[3] = "-"
        if chrom_len1.has_key(ll[0]) and chrom_len2.has_key(ll[0]):
            summary = bwHandle1.summarize(ll[0], int(ll[1]), int(ll[2]), 1)
            if summary.valid_count == 0:
                mean_value1 = 0
            else:
                mean_value1 = (summary.sum_data / summary.valid_count)[0]
            summary = bwHandle2.summarize(ll[0], int(ll[1]), int(ll[2]), 1)
            if summary.valid_count == 0:
                mean_value2 = 0
            else:
                mean_value2 = (summary.sum_data / summary.valid_count)[0]
            total_result.append(ll + [mean_value1 + mean_value2])
    inf.close()
    total_result.sort(reverse=True, key=lambda x: x[-1])
    bwHs = []
    for i in bwfile_add:
        bwHs.append(BigWigFile(open(i, 'rb')))
    outf = open(out, 'w')
    print "scaning 1st ", time.time() - t
    t = time.time()
    for i in range(min(len(total_result), topnumber)):
        ll = total_result[i]
        summary = bwHandle1.summarize(ll[0], int(ll[1]), int(ll[2]),
                                      (int(ll[2]) - int(ll[1])))
        additional_value1 = ",".join(map(str, list(summary.sum_data)))
        summary = bwHandle2.summarize(ll[0], int(ll[1]), int(ll[2]),
                                      (int(ll[2]) - int(ll[1])))
        additional_value2 = ",".join(map(str, list(summary.sum_data)))
        result = map(str, (ll + [additional_value1, additional_value2]))
        for bwH in bwHs:
            summary = bwH.summarize(ll[0], int(ll[1]), int(ll[2]),
                                    (int(ll[2]) - int(ll[1])))
            additional_value_add = ",".join(map(str, list(summary.sum_data)))
            result.append(additional_value_add)
        outf.write("\t".join(result) + "\n")
    outf.close()
    print "scaning 2nd ", time.time() - t

コード例 #11

0

ファイルを表示

def sitepro_scan(peak, outname, w_plus, w_minus, Cspan):

    inf = open(peak)
    w_plus_H = BigWigFile(open(w_plus, 'rb'))
    w_minus_H = BigWigFile(open(w_minus, 'rb'))

    outf = open(outname + "_Cuts.txt", 'w')

    for line in inf:
        ll = line.split()
        chrm = ll[0]
        start = int(ll[1])
        end = int(ll[2])
        if start - Cspan < 0:
            print ll
            continue
        plus_obj = w_plus_H.summarize(chrm, start - Cspan, end + Cspan,
                                      (end - start + 2 * Cspan))
        minus_obj = w_minus_H.summarize(chrm, start - Cspan, end + Cspan,
                                        (end - start + 2 * Cspan))
        if not plus_obj:
            plus_vector = numpy.array([0] * (end - start + 2 * Cspan))
        else:
            plus_vector = plus_obj.sum_data
        if not minus_obj:
            minus_vector = numpy.array([0] * (end - start + 2 * Cspan))
        else:
            minus_vector = minus_obj.sum_data

        #roundN = 4
        #### assign bias to bp and proportion
        for outpos in range(Cspan, (end - start + Cspan)):
            this_plus = plus_vector[outpos]
            this_minus = minus_vector[outpos]
            this_plus_cuts_sum = sum(plus_vector[(outpos - Cspan):(outpos +
                                                                   Cspan)])
            this_minus_cuts_sum = sum(minus_vector[(outpos - Cspan):(outpos +
                                                                     Cspan)])

            out_chrm = chrm
            out_start = start + outpos - Cspan
            out_end = out_start + 1

            outf.write("\t".join(
                map(str, [
                    out_chrm + ":" + str(out_start) + "-" +
                    str(out_end), this_plus, this_plus_cuts_sum, this_minus,
                    this_minus_cuts_sum
                ])) + "\n")

    outf.close()
    inf.close()

コード例 #12

0

ファイルを表示

ファイル: Sitepro_DNase_cleavage.py プロジェクト: Tarela/RivanaCode

def make_template(data, flank, pflank, topmotif, out, pbw, mbw):
    w_plus_H = BigWigFile(open(pbw, 'rb'))
    w_minus_H = BigWigFile(open(mbw, 'rb'))
    i = 0
    templatelist = []
    pp = []
    pm = []
    inf = open(data)
    l1st = inf.readline().split()
    ml = int(l1st[2]) - int(l1st[1])
    inf.seek(0)
    for line in inf:
        #if i >= topmotif:
        #   break
        ll = line.split()
        templatelist.append(ll)

    inf.close()
    templatelist.sort(key=lambda x: float(x[4]), reverse=True)
    for ll in templatelist:
        p_sum = list(
            w_plus_H.summarize(ll[0],
                               int(ll[1]) - flank,
                               int(ll[1]) + flank, 2 * flank).sum_data)
        m_sum = list(
            w_minus_H.summarize(ll[0],
                                int(ll[1]) - flank,
                                int(ll[1]) + flank, 2 * flank).sum_data)
        if ll[5] == "+":
            pp.append(p_sum[(flank + 1 + ml / 2 - pflank):(flank + 1 + ml / 2 +
                                                           pflank)])
            pm.append(m_sum[(flank + 1 + ml / 2 - pflank):(flank + 1 + ml / 2 +
                                                           pflank)])
        if ll[5] == '-':
            pm.append(p_sum[::-1][(flank + 1 + ml / 2 - ml -
                                   pflank):(flank + 1 + ml / 2 - ml + pflank)])
            pp.append(m_sum[::-1][(flank + 1 + ml / 2 - ml -
                                   pflank):(flank + 1 + ml / 2 - ml + pflank)])

    meanp = apply_mean(pp)
    meanm = apply_mean(pm)
    allsum = sum(meanp) + sum(meanm)
    P = []
    M = []
    for i in range(len(meanp)):
        P.append(meanp[i])  #/allsum)
        M.append(meanm[i])  #/allsum)

    plot_template(P, M, out)

コード例 #13

0

ファイルを表示

def get_signal(inputfile, output, plusBW, minusBW, bwfolder, extend):

    if not bwfolder:
        bwfolder = "./"
    if not bwfolder.endswith('/'):
        bwfolder += '/'

    plus = BigWigFile(open(bwfolder + plusBW, 'rb'))
    minus = BigWigFile(open(bwfolder + minusBW, 'rb'))

    inf = open(inputfile)
    outf = open(output, 'w')
    for line in inf:
        ll = line.split()
        if "_" in ll[0]:
            continue
        if len(ll) >= 6 and ll[5] == "-":
            strand_flap = 1
        else:
            strand_flap = 0
        start = int(ll[1])
        end = int(ll[2])
        S = max(0, start - extend)
        E = end + extend
        #        S = int(ll[1])
        #        E = int(ll[2])
        outdata = ll
        try:
            plus_signal = (plus.summarize(ll[0], S, E, (E - S)))
            minus_signal = (minus.summarize(ll[0], S, E, (E - S)))
            if plus_signal and minus_signal:
                plus_tmp = list(plus_signal.sum_data)
                minus_tmp = list(minus_signal.sum_data)

                if strand_flap == 1:
                    thisdata_tmp = minus_tmp[::
                                             -1] + plus_tmp[::
                                                            -1]  #map(round,thisdata_tmp,[4]*(E-S))[::-1]
                else:
                    thisdata_tmp = plus_tmp + minus_tmp
                thisdata = thisdata_tmp  #map(round,thisdata_tmp,[4]*len(thisdata_tmp))
        except:
            pass
        outdata.extend(thisdata)
        # ll.extend(list(signal.sum_data/signal.valid_count))
        outf.write("\t".join(map(str, outdata)) + "\n")
    inf.close()
    outf.close()

コード例 #14

0

ファイルを表示

    def summarize(self,
                  interval,
                  bins=None,
                  method='summarize',
                  function='mean'):

        # We may be dividing by zero in some cases, which raises a warning in
        # NumPy based on the IEEE 754 standard (see
        # http://docs.scipy.org/doc/numpy/reference/generated/
        #       numpy.seterr.html)
        #
        # That's OK -- we're expecting that to happen sometimes. So temporarily
        # disable this error reporting for the duration of this method.
        orig = np.geterr()['invalid']
        np.seterr(invalid='ignore')

        if (bins is None) or (method == 'get_as_array'):
            bw = BigWigFile(open(self.fn))
            s = bw.get_as_array(
                interval.chrom,
                interval.start,
                interval.stop,
            )
            if s is None:
                s = np.zeros((interval.stop - interval.start, ))
            else:
                s[np.isnan(s)] = 0

        elif method == 'ucsc_summarize':
            if function in ['mean', 'min', 'max', 'std', 'coverage']:
                return self.ucsc_summarize(interval, bins, function=function)
            else:
                raise ValueError('function "%s" not supported by UCSC\'s'
                                 'bigWigSummary')

        else:
            bw = BigWigFile(open(self.fn))
            s = bw.summarize(interval.chrom, interval.start, interval.stop,
                             bins)
            if s is None:
                s = np.zeros((bins, ))
            else:
                if function == 'sum':
                    s = s.sum_data
                if function == 'mean':
                    s = s.sum_data / s.valid_count
                    s[np.isnan(s)] = 0
                if function == 'min':
                    s = s.min_val
                    s[np.isinf(s)] = 0
                if function == 'max':
                    s = s.max_val
                    s[np.isinf(s)] = 0
                if function == 'std':
                    s = (s.sum_squares / s.valid_count)
                    s[np.isnan(s)] = 0

        # Reset NumPy error reporting
        np.seterr(divide=orig)
        return s

コード例 #15

0

ファイルを表示

ファイル: Foot_scan.py プロジェクト: Tarela/RivanaCode

def Readbw(bwfile,chrm,start,end,n):
    bwHandle=BigWigFile(open(bwfile, 'rb'))
    summary = bwHandle.summarize(chrm,int(start),int(end),(int(end)-int(start))/n)
    count = map(sudocount,summary.valid_count)
    sum = summary.sum_data
    scores = list(sum/count)
    return scores

コード例 #16

0

ファイルを表示

def sitepro_scan(peak, out, w_plus, w_minus, bgmatrix, span, gen, lflank,
                 rflank):
    nmer = lflank + rflank
    genome = twobitreader.TwoBitFile(gen)
    pBG, nBG = readBG(bgmatrix)
    inf = open(peak)
    w_plus_H = BigWigFile(open(w_plus, 'rb'))
    w_minus_H = BigWigFile(open(w_minus, 'rb'))
    outf = open(out, 'w')
    for line in inf:  ### chr start end name motifscore strand FP DNase chip
        ll = line.split()  #####  3 below is flanking length
        chrm = ll[0]
        start = int(ll[1])
        end = int(ll[2])
        if start - span - lflank <= 0:
            continue
        p_sum = list(
            w_plus_H.summarize(chrm, start - span, end + span,
                               end - start + 2 * span).sum_data)
        n_sum = list(
            w_minus_H.summarize(chrm, start - span, end + span,
                                end - start + 2 * span).sum_data)
        seq = genome[chrm][(start - span - lflank):(end + span + rflank)]
        if 'N' in seq.upper():
            continue
        pseq = seq[:-1]
        nseq = seq[1:]
        p = []
        n = []
        for k in range(len(pseq) + 1 - nmer):
            p.append(pBG[pseq[k:(k + nmer)].upper()])
            n.append(nBG[nseq[k:(k + nmer)].upper()])
        p_assign = []
        n_assign = []
        for bp in range(len(p_sum) - 2 * span):
            ptotal = sum(p_sum[bp:(bp + 2 * span)])
            ntotal = sum(n_sum[bp:(bp + 2 * span)])
            pbias_per = p[bp + span] * 1.0 / sum(p[bp:(bp + 2 * span)])
            nbias_per = n[bp + span] * 1.0 / sum(n[bp:(bp + 2 * span)])
            p_assign.append(pbias_per * ptotal)
            n_assign.append(nbias_per * ntotal)
        newll = ll + p_sum[span:(len(p_sum) - span)] + n_sum[span:(
            len(n_sum) - span)] + p[span:(len(p) - span)] + n[span:(
                len(n) - span)] + p_assign + n_assign
        outf.write("\t".join(map(str, newll)) + "\n")
    outf.close()

コード例 #17

0

ファイルを表示

ファイル: filetype_adapters.py プロジェクト: bakerwm/metaseq

    def summarize(self, interval, bins=None, method='summarize',
                  function='mean'):

        # We may be dividing by zero in some cases, which raises a warning in
        # NumPy based on the IEEE 754 standard (see
        # http://docs.scipy.org/doc/numpy/reference/generated/
        #       numpy.seterr.html)
        #
        # That's OK -- we're expecting that to happen sometimes. So temporarily
        # disable this error reporting for the duration of this method.
        orig = np.geterr()['invalid']
        np.seterr(invalid='ignore')

        if (bins is None) or (method == 'get_as_array'):
            bw = BigWigFile(open(self.fn))
            s = bw.get_as_array(
                interval.chrom,
                interval.start,
                interval.stop,)
            if s is None:
                s = np.zeros((interval.stop - interval.start,))
            else:
                s[np.isnan(s)] = 0

        elif method == 'ucsc_summarize':
            if function in ['mean', 'min', 'max', 'std', 'coverage']:
                return self.ucsc_summarize(interval, bins, function=function)
            else:
                raise ValueError('function "%s" not supported by UCSC\'s'
                                 'bigWigSummary')

        else:
            bw = BigWigFile(open(self.fn))
            s = bw.summarize(
                interval.chrom,
                interval.start,
                interval.stop, bins)
            if s is None:
                s = np.zeros((bins,))
            else:
                if function == 'sum':
                    s = s.sum_data
                if function == 'mean':
                    s = s.sum_data / s.valid_count
                    s[np.isnan(s)] = 0
                if function == 'min':
                    s = s.min_val
                    s[np.isinf(s)] = 0
                if function == 'max':
                    s = s.max_val
                    s[np.isinf(s)] = 0
                if function == 'std':
                    s = (s.sum_squares / s.valid_count)
                    s[np.isnan(s)] = 0

        # Reset NumPy error reporting
        np.seterr(divide=orig)
        return s

コード例 #18

0

ファイルを表示

ファイル: Sitepro_storeData_peak.py プロジェクト: Tarela/RivanaCode

def sitepro_scan(peak, out, w_plus, w_minus):

    inf = open(peak)
    w_plus_H = BigWigFile(open(w_plus, 'rb'))
    w_minus_H = BigWigFile(open(w_minus, 'rb'))
    outf = open(out, 'w')
    for line in inf:  ### chr start end name motifscore strand FP DNase chip
        ll = line.split()  #####  3 below is flanking length
        p_sum = list(
            w_plus_H.summarize(ll[0], int(ll[1]), int(ll[2]),
                               int(ll[2]) - int(ll[1])).sum_data)
        m_sum = list(
            w_minus_H.summarize(ll[0], int(ll[1]), int(ll[2]),
                                int(ll[2]) - int(ll[1])).sum_data)
        fp = (ll + p_sum + m_sum)
        newline = "\t".join(map(str, fp)) + "\n"
        outf.write(newline)

    outf.close()

コード例 #19

0

ファイルを表示

ファイル: Prediction_ATACvsDNase.py プロジェクト: Tarela/RivanaCode

def getsignal(inputfile, outputfile, ATAC100, ATAC247, ATACall, DNase, pspan):

    #    p=BwIO(pcut)
    #    chrom_len = {}
    #    for i in p.chromosomeTree['nodes']:
    #        chrom_len[i['key']] = i['chromSize']
    ATAC100bw = BigWigFile(open(ATAC100, 'rb'))
    ATAC247bw = BigWigFile(open(ATAC247, 'rb'))
    ATACallbw = BigWigFile(open(ATACall, 'rb'))
    DNasebw = BigWigFile(open(DNase, 'rb'))

    inf = open(inputfile)
    outf = open(outputfile, 'w')

    for line in inf:
        ll = line.split()
        if ll[0] == 'chrY':
            continue
#        print [ll[0],(int(ll[1])+int(ll[2]))/2 -pspan ,(int(ll[1])+int(ll[2]))/2 -pspan]
        ATAC100_signal = float(
            ATAC100bw.summarize(ll[0], (int(ll[1]) + int(ll[2])) / 2 - pspan,
                                (int(ll[1]) + int(ll[2])) / 2 + pspan,
                                1).sum_data) / (2 * pspan)
        ATAC247_signal = float(
            ATAC247bw.summarize(ll[0], (int(ll[1]) + int(ll[2])) / 2 - pspan,
                                (int(ll[1]) + int(ll[2])) / 2 + pspan,
                                1).sum_data) / (2 * pspan)
        ATACall_signal = float(
            ATACallbw.summarize(ll[0], (int(ll[1]) + int(ll[2])) / 2 - pspan,
                                (int(ll[1]) + int(ll[2])) / 2 + pspan,
                                1).sum_data) / (2 * pspan)
        DNase_signal = float(
            DNasebw.summarize(ll[0], (int(ll[1]) + int(ll[2])) / 2 - pspan,
                              (int(ll[1]) + int(ll[2])) / 2 + pspan,
                              1).sum_data) / (2 * pspan)

        newll = ll + [
            ATAC100_signal, ATAC247_signal, ATACall_signal, DNase_signal
        ]
        outf.write("\t".join(map(str, newll)) + "\n")

    outf.close()

コード例 #20

0

ファイルを表示

ファイル: fraglen_predcut.py プロジェクト: Tarela/RivanaCode

def fragment_v_predcut(pefrag, output, bwp, bwn):

    inf = open(pefrag)
    outf = open(output, 'w')
    pH = BigWigFile(open(bwp, 'rb'))
    mH = BigWigFile(open(bwn, 'rb'))
    for line in inf:
        ll = line.split()
        chrm = ll[0]
        pcut = int(ll[1])
        ncut = int(ll[2]) - 1
        fraglen = ncut - pcut + 1
        pPred = float(pH.summarize(chrm, pcut, pcut + 1, 1).sum_data)
        nPred = float(mH.summarize(chrm, ncut, ncut + 1, 1).sum_data)
        if pPred == -1 or nPred == -1:
            continue
        newll = [chrm, pcut, ncut, fraglen, pPred, nPred]
        outf.write("\t".join(map(str, newll)) + "\n")
    inf.close()
    outf.close()

コード例 #21

0

ファイルを表示

ファイル: Score_FT_FPregion.py プロジェクト: Tarela/RivanaCode

def getsignal(inputfile, outputfile, pcut, pspan, FPregion):

    #    p=BwIO(pcut)
    #    chrom_len = {}
    #    for i in p.chromosomeTree['nodes']:
    #        chrom_len[i['key']] = i['chromSize']
    pcutbw = BigWigFile(open(pcut, 'rb'))
    FPbw = BigWigFile(open(FPregion, 'rb'))
    inf = open(inputfile)
    testll = inf.readline().split()
    ml = int(testll[2]) - int(testll[1])
    inf.seek(0)
    outf = open(outputfile, 'w')

    for line in inf:
        ll = line.split()
        #        if not chrom_len.has_key(ll[0]):
        #            continue
        cut = list(
            pcutbw.summarize(ll[0],
                             int(ll[1]) + ml / 2 - pspan,
                             int(ll[1]) + ml / 2 + pspan, 2 * pspan).sum_data)
        TC = sum(cut)
        C = sum(cut[(pspan - ml / 2):(pspan - ml / 2 + ml)])
        L = sum(cut[(pspan - ml / 2 - ml):(pspan - ml / 2)])
        R = sum(cut[(pspan - ml / 2 + ml):(pspan - ml / 2 + 2 * ml)])
        FOS = -1 * ((C + 1) / (R + 1) + (C + 1) / (L + 1))
        try:
            FP_bw = map(
                float,
                list(
                    FPbw.summarize(ll[0], int(ll[1]), int(ll[2]),
                                   int(ll[2]) - int(ll[1])).sum_data))
        except:
            FP_bw = [0.0] * (int(ll[2]) - int(ll[1]))
        minFPbw = min(FP_bw)
        maxFPbw = max(FP_bw)
        newll = ll + [TC, FOS, minFPbw, maxFPbw]
        outf.write("\t".join(map(str, newll)) + "\n")

    outf.close()

コード例 #22

0

ファイルを表示

def extract_phastcons ( bedfile, phas_chrnames, width, pf_res ):
    """Extract phastcons scores from a bed file.

    Return the average scores
    """
    info("read bed file...")
    bfhd = open(bedfile)
    bed = parse_BED(bfhd)

    # calculate the middle point of bed regions then extend left and right by 1/2 width
    bchrs = bed.peaks.keys()
    bchrs.sort()

    chrs = []
    for c in phas_chrnames:
        if c in bchrs:
            chrs.append(c)

    sumscores = []
    for chrom in chrs:
        info("processing chromosome: %s" %chrom)
        pchrom = bed.peaks[chrom]
        bw = BigWigFile(open(chrom+'.bw', 'rb'))
        for i in range(len(pchrom)):
            mid = int((pchrom[i][0]+pchrom[i][1])/2)
            left = int(mid - width/2)
            right = int(mid + width/2)

            if left < 0:
                left = 0
                right = width

            summarize = bw.summarize(chrom, left, right, width/pf_res)
            if not summarize:
                continue
            dat = summarize.sum_data / summarize.valid_count
            #dat = dat.strip().split('\t')
            sumscores.append(dat)

    ## a list with each element is a list of conservation score at the same coordinate
    sumscores = map(list, zip(*sumscores))

    ## exclude na
    sumscores = [[t2 for t2 in t if not math.isnan(t2)] for t in sumscores]
    try:
        conscores = [sum(t)/len(t) for t in sumscores]
    except ZeroDivisionError:
        conscores = [0] * (width/pf_res)

    return conscores

コード例 #23

0

ファイルを表示

class TestBigWig(unittest.TestCase):
    def setUp(self):
        f = open("test_data/bbi_tests/test.bw")
        self.bw = BigWigFile(file=f)

    def test_get_summary(self):
        data = self.bw.query("chr1", 10000, 20000, 10)
        means = [x['mean'] for x in data]
        print means
        assert numpy.allclose(map(float, means), [
            -0.17557571594973645, -0.054009292602539061, -0.056892242431640622,
            -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557,
            0.036949024200439454, 0.076638259887695306, 0.043518108367919923,
            0.01554749584197998
        ])

        # Summarize variant
        sd = self.bw.summarize("chr1", 10000, 20000, 10)
        assert numpy.allclose(sd.sum_data / sd.valid_count, [
            -0.17557571594973645, -0.054009292602539061, -0.056892242431640622,
            -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557,
            0.036949024200439454, 0.076638259887695306, 0.043518108367919923,
            0.01554749584197998
        ])

        # Test min and max for this entire summary region
        data = self.bw.query("chr1", 10000, 20000, 1)
        maxs = [x['max'] for x in data]
        mins = [x['min'] for x in data]
        self.assertEqual(map(float, maxs), [0.289000004529953])
        self.assertEqual(map(float, mins), [-3.9100000858306885])

    def test_get_leaf(self):
        data = self.bw.query("chr1", 11000, 11005, 5)
        means = [x['mean'] for x in data]
        assert numpy.allclose(map(float, means), [
            0.050842501223087311, -2.4589500427246094, 0.050842501223087311,
            0.050842501223087311, 0.050842501223087311
        ])

        # Test min and max for this entire leaf region
        data = self.bw.query("chr1", 11000, 11005, 1)
        maxs = [x['max'] for x in data]
        mins = [x['min'] for x in data]
        self.assertEqual(map(float, maxs), [0.050842501223087311])
        self.assertEqual(map(float, mins), [-2.4589500427246094])

    def test_wrong_nochrom(self):
        data = self.bw.query("chr2", 0, 10000, 10)
        self.assertEqual(data, None)

コード例 #24

0

ファイルを表示

ファイル: conservation_plot.py プロジェクト: asntech/chilin

def extract_phastcons ( bedfile, phas_chrnames, width, pf_res ):
    """Extract phastcons scores from a bed file.

    Return the average scores
    """
    info("read bed file...")
    bfhd = open(bedfile)
    bed = parse_BED(bfhd)

    # calculate the middle point of bed regions then extend left and right by 1/2 width
    bchrs = bed.peaks.keys()
    bchrs.sort()

    chrs = []
    for c in phas_chrnames:
        if c in bchrs:
            chrs.append(c)

    sumscores = []
    for chrom in chrs:
        info("processing chromosome: %s" %chrom)
        pchrom = bed.peaks[chrom]
        bw = BigWigFile(open(chrom+'.bw', 'rb'))
        for i in range(len(pchrom)):
            mid = int((pchrom[i][0]+pchrom[i][1])/2)
            left = int(mid - width/2)
            right = int(mid + width/2)

            if left < 0:
                left = 0
                right = width

            summarize = bw.summarize(chrom, left, right, width/pf_res)
            if not summarize:
                continue
            dat = summarize.sum_data / summarize.valid_count
            #dat = dat.strip().split('\t')
            sumscores.append(dat)

    ## a list with each element is a list of conservation score at the same coordinate
    sumscores = map(list, zip(*sumscores))

    ## exclude na
    sumscores = [[t2 for t2 in t if not math.isnan(t2)] for t in sumscores]
    try:
        conscores = [sum(t)/len(t) for t in sumscores]
    except ZeroDivisionError:
        conscores = [0] * (width/pf_res)

    return conscores

コード例 #25

0

ファイルを表示

ファイル: get_cleavage_pattern_motif.py プロジェクト: Tarela/RivanaCode

def get_signal(inputfile, output, plus, minus, fulllen):

    plusbw = BigWigFile(open(plus, 'rb'))
    minusbw = BigWigFile(open(minus, 'rb'))

    inf = open(inputfile)
    outf = open(output, 'w')
    for line in inf:
        ll = line.split()
        motiflen = int(ll[2]) - int(ll[1])
        upstream_ext = fulllen / 2 - motiflen / 2
        try:
            if ll[5] == "+":
                start = int(ll[1]) - upstream_ext
                end = start + fulllen
                forward_signal = list(
                    plusbw.summarize(ll[0], start, end, end - start).sum_data)
                reverse_signal = list(
                    minusbw.summarize(ll[0], start, end, end - start).sum_data)
            else:
                end = int(ll[2]) + upstream_ext
                start = end - fulllen
                forward_signal = list(
                    minusbw.summarize(ll[0], start, end,
                                      end - start).sum_data)[::-1]
                reverse_signal = list(
                    plusbw.summarize(ll[0], start, end,
                                     end - start).sum_data)[::-1]
        except:
            print ll
            forward_signal = [0] * (end - start)
            reverse_signal = [0] * (end - start)

        newll = ll + forward_signal + reverse_signal
        outf.write("\t".join(map(str, newll)) + "\n")
    inf.close()
    outf.close()

コード例 #26

0

ファイルを表示

ファイル: histmods.py プロジェクト: msrtd/enhancer_prediction

def check_position(chrom, start, end):
    #is there 10% coverage of region [start, end]
    valids = 0.
    wrong = 0.
    for directory in [x[0] for x in os.walk(DATAPATH + "data")]:
        for filename in glob(directory + "/*.bigWig") + glob(directory +
                                                             "/*.bw"):
            f = open(filename, "r")
            bigwig = BigWigFile(file=f)
            summary = bigwig.summarize(chrom, start, end + 1, 1)
            if summary.valid_count * 10 < end - start + 1:
                wrong += 1
            else:
                valids += 1
    return (valids / (valids + wrong) >= 0.75)

コード例 #27

0

ファイルを表示

def get_signal(inputfile, output, signalbw):
    p = BwIO(signalbw)
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
    bwHandle = BigWigFile(open(signalbw, 'rb'))
    inf = open(inputfile)
    outf = open(output, 'w')
    for line in inf:
        ll = line.split()
        if not chrom_len.has_key(ll[0]):
            continue
        signal = bwHandle.summarize(ll[0], max(int(ll[1]) - 50, 0),
                                    int(ll[2]) + 50, 1)
        ll.append(str(float(signal.sum_data)))
        outf.write("\t".join(ll) + "\n")
    inf.close()
    outf.close()

コード例 #28

0

ファイルを表示

ファイル: histmods.py プロジェクト: msrtd/enhancer_prediction

def count_mean_signal(enhancers, bigwig_file, name):
    print "processing", bigwig_file

    if os.path.exists(bigwig_file + ".means." + name.split('/')[-1]):
        print "file exists:", bigwig_file + ".means." + name.split('/')[-1]
        return []

    #output = StringIO.StringIO()

    f = open(bigwig_file, "r")
    print "bigwig file opened"
    bigwig = BigWigFile(file=f)
    mean_all = count_chrom_mean(bigwig)
    print "chromosome means counted"

    output2 = open(bigwig_file + ".means." + name.split('/')[-1], "w")
    print "output file opened", bigwig_file + ".means." + name.split('/')[-1]

    start = time.clock()
    fails = []
    i = 0
    for enh in enhancers:
        if i % 10000 == 0:
            print bigwig_file, name, i

        summary = bigwig.summarize(enh.chromosome, enh.start, enh.end + 1, 1)

        #+1 added 24.09.15 after finding endpoint not included
        if summary.valid_count * 10 < enh.end - enh.start + 1:
            mean = 1  # mean_all[enh.chromosome]
            fails.append(1)
        else:
            mean = 0  # summary.sum_data / summary.valid_count
            fails.append(0)
        i += 1
        output2.write("%d\t%f\n" % (enh.id, mean))
    output2.close()
    f.close()

    print "output written to: %s.means.%s" % (bigwig_file, name.split('/')[-1])
    end = time.clock()
    print "time: %.2f s" % (end - start)
    return fails

コード例 #29

0

ファイルを表示

ファイル: make_footprint_from_bw.py プロジェクト: Tarela/RivanaCode

def summary(bwfile, bedfile, out, central_max, central_min, flanking_max,
            flanking_min, cutoff):
    total_result = []
    p = BwIO(bwfile)
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
    bwHandle = BigWigFile(open(bwfile, 'rb'))
    inf = open(bedfile)
    outf = open(out, 'w')
    t = time.time()
    for line in inf:
        ll = line.split()
        if chrom_len.has_key(ll[0]):
            #t = time.time()
            summary = bwHandle.summarize(ll[0], int(ll[1]), int(ll[2]),
                                         (int(ll[2]) - int(ll[1])))
            #        print "bw sum time",time.time()-t
            #       t=time.time()
            digital = list(summary.sum_data)
            #      print "trans to list time",time.time()-t
            #      t=time.time()
            FT = (caculate_footprint(digital, central_max, central_min,
                                     flanking_max, flanking_min, cutoff))
            #     print "scan footprint time",time.time()-t
            #     time.time()
            for ft in FT:
                bed = "\t".join(
                    map(str, [
                        ll[0],
                        int(ll[1]) + ft[0],
                        int(ll[1]) + ft[1], ll[3], ft[2]
                    ])) + "\n"
                outf.write(bed)
            #print "single time",time.time()-t
            #print (int(ll[2])-int(ll[1]))#*1.0/(time.time()-t)
    inf.close()
    outf.close()
    print "scaning 1st ", time.time() - t

コード例 #30

0

ファイルを表示

ファイル: bigwig_tests.py プロジェクト: jeffhsu3/bx-python

class TestBigWig(unittest.TestCase):
    def setUp(self):
        f = open( "test_data/bbi_tests/test.bw" )
        self.bw = BigWigFile(file=f)
        
    def test_get_summary(self):
        data = self.bw.query("chr1", 10000, 20000, 10)
        means = [ x['mean'] for x in data ]
        print means
        assert numpy.allclose( map(float, means), [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
        
        # Summarize variant
        sd = self.bw.summarize( "chr1", 10000, 20000, 10)
        assert numpy.allclose( sd.sum_data / sd.valid_count, [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
        
        # Test min and max for this entire summary region
        data = self.bw.query("chr1", 10000, 20000, 1)
        maxs = [ x['max'] for x in data ]
        mins = [ x['min'] for x in data ]
        self.assertEqual( map(float, maxs), [0.289000004529953] )
        self.assertEqual( map(float, mins), [-3.9100000858306885] )
        
    def test_get_leaf(self):
        data = self.bw.query("chr1", 11000, 11005, 5)
        means = [ x['mean'] for x in data ]
        assert numpy.allclose( map(float, means), [0.050842501223087311, -2.4589500427246094, 0.050842501223087311, 0.050842501223087311, 0.050842501223087311] )
        
        # Test min and max for this entire leaf region
        data = self.bw.query("chr1", 11000, 11005, 1)
        maxs = [ x['max'] for x in data ]
        mins = [ x['min'] for x in data ]
        self.assertEqual( map(float, maxs), [0.050842501223087311] )
        self.assertEqual( map(float, mins), [-2.4589500427246094] )
        
    def test_wrong_nochrom(self):
        data = self.bw.query("chr2", 0, 10000, 10)
        self.assertEqual( data, None )

コード例 #31

0

ファイルを表示

ファイル: scan_readscount_separate_cutseq_seqtypeonly_alluniqfrag.py プロジェクト: Tarela/RivanaCode

def get_regionLevel_reads(inbed, outputname, plusbw, minusbw, species, flank):
    genome = twobitreader.TwoBitFile("/scratch/sh8tv/Data/Genome/%s/%s.2bit" %
                                     (species, species))
    countdict_template = make_nmer_dict(2 * flank)
    #rddict_template = make_rd_dict(2*flank)

    plusBWH = BigWigFile(open(plusbw, 'rb'))
    minusBWH = BigWigFile(open(minusbw, 'rb'))

    random.seed(1228)

    inf = open(inbed)
    outf = open(outputname + "_seqtype.bed", 'w')
    #outfRD = open(outputname + "_rd.bed",'w')
    seqtypes = sorted(countdict_template.keys())
    newll_seq = []
    #newll_rd = []
    infileLen = len(inf.readline().split())
    for i in range(infileLen):
        newll_seq.append("C" + str(i))
    # newll_rd.append("C"+str(i))
    newll_seq += sorted(countdict_template.keys())
    #newll_rd += sorted(rddict_template.keys())

    outf.write("\t".join(newll_seq) + "\n")
    #outfRD.write("\t".join(newll_rd)+"\n")

    inf.seek(0)
    for line in inf:
        Sdict = deepcopy(countdict_template)
        # Rdict = deepcopy(rddict_template)
        ll = line.split()
        chrm = ll[0]
        #center = (int(ll[1]) + int(ll[2]))/2
        start = int(ll[1])  #max(0,center-ext)
        end = int(ll[2])  #center + ext
        plusSig_obj = plusBWH.summarize(chrm, start, end,
                                        end - start)  #.sum_data
        minusSig_obj = minusBWH.summarize(chrm, start, end,
                                          end - start)  #.sum_data

        #newll_seq = ll + [Sdict[x] for x in sorted(Sdict.keys())]
        #outf.write("\t".join(map(str,newll_seq))+"\n")

        if plusSig_obj and minusSig_obj:
            plusSig = plusSig_obj.sum_data
            minusSig = minusSig_obj.sum_data

            plusSequence = genome[chrm][(start - flank):(end + flank)].upper()
            minusSequence = genome[chrm][(start - flank + 1):(end + flank +
                                                              1)].upper()

            for i in range(len(plusSig)):
                #position = start + i
                pcuts = plusSig[i]
                if pcuts > 0:
                    pseq = plusSequence[i:(i + 2 * flank)].upper()
                    #pseqRV = revcomp(plusSequence_reverse[i:(i+2*flank)]).upper()
                    if not "N" in pseq:  #and not 'N' in pseqRV:
                        #    p_out = seq2biasParm(pseq,B,simplex_code)
                        #    plus_data += pcuts*p_out
                        Sdict[pseq] += pcuts
                #   Rdict["rd"+str(random.randint(1,4**(2*flank)))] += 1#pcuts
                #plus_readscount += pcuts
                #plus_biassum += biasdict[pseq]*pcuts
                #plus_biasCB += (biasdict[pseq]+biasdict[pseqRV] ) *pcuts/2

                #print i,pcuts,plus_readscount
            for i in range(len(minusSig)):
                #position = start + i
                mcuts = minusSig[i]
                if mcuts > 0:
                    #                tmpseq = minusSequence[i:(i+2*flank)]
                    mseq = revcomp(minusSequence[i:(i + 2 * flank)]).upper()
                    #mseqRV = minusSequence_reverse[i:(i+2*flank)].upper()
                    if not "N" in mseq:  #and not "N" in mseqRV:
                        #    m_out = seq2biasParm(mseq,B,simplex_code)
                        #    minus_data += mcuts*m_out
                        Sdict[mseq] += mcuts
                    #Rdict["rd"+str(random.randint(1,4**(2*flank)))] += 1#mcuts
                    #minus_readscount += mcuts
                    #minus_biassum += biasdict[mseq]*mcuts
                    #minus_biasCB += (biasdict[mseq]+biasdict[mseqRV] ) *mcuts/2
#                print chrm,start,end,i,mcuts,minus_biassum,minus_biasCB
#plus_biasave = plus_biassum / plus_readscount
#minus_biasave = minus_biassum / minus_readscount
#newll = ll + [plus_readscount,minus_readscount,plus_biassum,minus_biassum]#plus_biassum,minus_biassum,plus_biasCB,minus_biasCB] #+ list(plus_data) + list(minus_data)

        newll_seq = ll + [Sdict[x] for x in sorted(Sdict.keys())]
        #newll_rd = ll + [Rdict[x] for x in sorted(Rdict.keys())]
        outf.write("\t".join(map(str, newll_seq)) + "\n")
        #outfRD.write("\t".join(map(str,newll_rd))+"\n")

    inf.close()
    outf.close()

コード例 #32

0

ファイルを表示

ファイル: Sitepro_scan_multinomial.py プロジェクト: Tarela/RivanaCode

def sitepro_scan(pattern,peak,out,w_plus,w_minus,trunk):
    inf = open(pattern)
    pattern_plus = map(float,inf.readline().strip().split(","))
    pattern_minus = map(float,inf.readline().strip().split(","))
    all = sum(pattern_plus)+sum(pattern_minus)
    p_plus = []
    p_minus= []
    for i in pattern_plus:
        p_plus.append(i/all)
    for i in pattern_minus:
        p_minus.append(i/all)
    inf.close()
    l = len(pattern_plus)
    p0 = [1.0/(2*l)]*l
    inf = open(peak)
    p=BwIO(w_plus)
    q=BwIO(w_minus)
    chrom_len1 = {}
    chrom_len2 = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len1[i['key']] = i['chromSize']
    for i in q.chromosomeTree['nodes']:
        chrom_len2[i['key']] = i['chromSize']
    w_plus_H=BigWigFile(open(w_plus, 'rb'))
    w_minus_H=BigWigFile(open(w_minus, 'rb'))
    footprint = []
    count = 0
    t=time.time()
    for line in inf:
        ll = line.split()
        if chrom_len1.has_key(ll[0])  and chrom_len2.has_key(ll[0]):
#            print ll[0],int(ll[1])-l,int(ll[2])+l,(int(ll[2])-int(ll[1])+2*l)
            p_sum = list(w_plus_H.summarize(ll[0],int(ll[1]),int(ll[2]),(int(ll[2])-int(ll[1]))).sum_data)
            m_sum = list(w_minus_H.summarize(ll[0],int(ll[1]),int(ll[2]),(int(ll[2])-int(ll[1]))).sum_data)
            #print len(p_sum)
            last_start = "NA"
            last_end = "NA"
            last_value = "NA"
            for i in range(len(p_sum)-l):
                o_plus = map(float,p_sum[i:i+l])
                o_minus = map(float,m_sum[i:i+l])
                for k in range(len(o_plus)):
                    if o_plus[k] > trunk:
                        o_plus[k]=trunk
                    if o_minus[k] > trunk:
                        o_minus[k] = trunk
                #print pattern_plus,p0
                score =  match_pattern(p_plus,p_minus,p0,p0,o_plus,o_minus,l)
                if score == "NA":
                    continue
        #        print score#i,i+l,score,last_start,last_end,last_value
                if last_start == "NA" :
                    last_start = i
                    last_end = i+l
                    last_value = score
                elif i >= last_end:
                    footprint.append([ll[0],int(ll[1])+last_start+3,int(ll[1])+last_end-3,last_value])
                    last_start = i
                    last_end = i+l
                    last_value = score
                elif score > last_value:
                    last_start = i
                    last_end = i+l
                    last_value = score
            footprint.append([ll[0],int(ll[1])+last_start+3,int(ll[1])+last_end-3,last_value])
        if count%100 ==0:
            print time.time()-t
            t = time.time()
        count += 1
    outf = open(out,'w')
    for fp in footprint:
        newline = "\t".join(map(str,fp))+"\n"
        outf.write(newline)
    outf.close()

コード例 #33

0

ファイルを表示

def sitepro_scan(peak,out_bed,w_plus,w_minus,bg0,span,gen,lflank,rflank,offset,bpshift,weight):
 
    nmer = lflank + rflank
    genome = twobitreader.TwoBitFile(gen)
    
    pBG,nBG = readBG(bg0)
    #p2BG,n2BG = readBG(bg2)

    code = encoding() 
    b0s0,b1s0,b2s0 = paramest(pBG)
    new_b0,forward_b1,forward_b2,reverse_b1,reverse_b2 = apply_weight( b0s0,b1s0,b2s0,weight)
    
    inf = open(peak)
    w_plus_H=BigWigFile(open(w_plus, 'rb'))
    w_minus_H=BigWigFile(open(w_minus, 'rb'))
    
    outf = open(out_bed,'w')
    for line in inf:### chr start end name motifscore strand FP DNase chip
        ll = line.split()#####  3 below is flanking length
        chrm = ll[0]
        start = int(ll[1])
        end = int(ll[2])

        p_sum = list(w_plus_H.summarize(chrm,start-span,end+span,end-start+2*span).sum_data)
        n_sum = list(w_minus_H.summarize(chrm,start-span,end+span,end-start+2*span).sum_data)

        praw=[]
        nraw=[]
        px = []
        nx = []
        pnew=[]
        nnew=[]
        if 'N' in genome[chrm][min((start-span+1-offset +bpshift-lflank),(start-span+1 -bpshift-rflank) ):max((end+span+offset+lflank-bpshift),(end+span + bpshift + rflank))].upper():
#            print line
#            print genome[chrm][min((start-span+1-offset +bpshift-lflank),(start-span+1 -bpshift-rflank) ):max((end+span+offset+lflank-bpshift),(end+span + bpshift + rflank))].upper()
            continue
        
        for bp1 in range(-span,end-start+span):
            loci = start + bp1
            pseq = genome[chrm][(loci + bpshift - lflank) : (loci + bpshift + rflank)].upper()
            pseq_apart = genome[chrm][(loci+offset -bpshift-rflank):(loci+offset -bpshift+lflank)].upper()
            nseq = genome[chrm][(loci+1 -bpshift-rflank) : (loci+1 -bpshift+lflank)].upper()
            nseq_apart = genome[chrm][(loci+1-offset +bpshift-lflank):(loci+1-offset +bpshift+rflank)].upper()
#            praw.append(pBG[pseq])
#            nraw.append(nBG[nseq])
            px.append(pBG[pseq] * nBG[pseq_apart])
            nx.append(nBG[nseq] * pBG[nseq_apart])
            
#            p_yf = predict2(pseq,b0s0,b1s0,b2s0,8)
#            p_yr = predict2(revcomp(pseq_apart),b0s0,b1s0,b2s0,8)[::-1]
#            pnew.append(pow(numpy.e,(p_yf+p_yr)[0]))
            
#            n_yr= predict2(nseq_apart,b0s0,b1s0,b2s0,8)
#            n_yf = predict2(revcomp(nseq),b0s0,b1s0,b2s0,8)[::-1]
#            nnew.append(pow(numpy.e,(n_yf+n_yr)[0]))
            
        ### new method
        ### 1. fetch seq
        seq_start_plus = start - span - offset + 1 + bpshift - lflank
        seq_start_minus = start - span + 1 - bpshift - rflank
        seq_end_plus = end + span -1 + bpshift + rflank
        seq_end_minus = end + span -1 + offset - bpshift + lflank
        
        seq_start = min(seq_start_plus,seq_start_minus)
        if seq_start < 0 :
            #print line
            continue
        seq_end = max(seq_end_plus,seq_end_minus)
        code_seq = genome[chrm][seq_start:seq_end].upper()
        
        if 'N' in code_seq :
            #print line 
            continue
        
        ### 2. fetch base
        seq_f = code_seq[ (seq_start_plus - seq_start) : (seq_end_plus - seq_start) ]
        seq_r = code_seq[ (seq_start_minus - seq_start) : (seq_end_minus - seq_start) ]
        
        yf  = predict2(seq_f,b0s0,b1s0,b2s0,8)
   #     print len(seq_f),len(yf)
        yr  = predict2(revcomp(seq_r),b0s0,b1s0,b2s0,8)[::-1]
        z   = yf + yr
        pnew = list(pow(numpy.e,z[(offset-1):]))
        nnew = list(pow(numpy.e,z[:(-offset+1)]))
       

        yfp_weight = predict2(seq_f,new_b0,forward_b1,forward_b2,8)
        yrp_weight = predict2(revcomp(seq_r),0,reverse_b1,reverse_b2,8)[::-1]
        z_p = yfp_weight + yrp_weight
        
        yfn_weight = predict2(seq_f,new_b0,reverse_b1,reverse_b2,8)
        yrn_weight = predict2(revcomp(seq_r),0,forward_b1,forward_b2,8)[::-1]
        z_n = yfn_weight + yrn_weight
        pweight = list(pow(numpy.e,z_p[(offset-1):]))
        nweight = list(pow(numpy.e,z_n[:(-offset+1)]))
        ## get predicted seqbias
        praw_assign =[]
        nraw_assign =[]
        px_assign  = []
        nx_assign  = []
        pnew_assign =[]
        nnew_assign =[]
        pweight_assign=[]
        nweight_assign=[]

        for bp in range(len(p_sum)- 2*span):
            
            ptotal = sum(p_sum[bp:(bp+2*span)])*1.0
            ntotal = sum(n_sum[bp:(bp+2*span)])*1.0
            
            px_assign.append(ptotal * px[bp+span])
            nx_assign.append(ntotal * nx[bp+span])
            pnew_assign.append(ptotal * pnew[bp+span])
            nnew_assign.append(ntotal * nnew[bp+span])
            pweight_assign.append(ptotal * pweight[bp+span])
            nweight_assign.append(ntotal * nweight[bp+span])
    #        pnewlin_assign.append(ptotal * pnew_linear[bp+span]/sum(pnew_linear[bp:(bp+2*span)]))
    #        nnewlin_assign.append(ntotal * nnew_linear[bp+span]/sum(nnew_linear[bp:(bp+2*span)]))
            #pf6_assign.append(ptotal * pnew_f6[bp+span]/sum(pnew_f6[bp:(bp+2*span)]))
            #pf8_assign.append(ptotal * pnew_f8[bp+span]/sum(pnew_f8[bp:(bp+2*span)]))
                
        #print type(pnew)
        #print type(pnew_assign)
        ### write  real cleavage , seqbias , seqbias predicted cleavage
        newll = ll +  p_sum[span:(len(p_sum)-span)] + n_sum[span:(len(n_sum)-span)] +px_assign +nx_assign+ pnew_assign + nnew_assign + pweight_assign + nweight_assign
#        newll = ll + p_sum[span:(len(p_sum)-span)] + n_sum[span:(len(n_sum)-span)] + praw_assign +nraw_assign +px_assign +nx_assign+  pnew_assign + nnew_assign
#        newll = ll + p_sum[span:(len(p_sum)-span)] + n_sum[span:(len(n_sum)-span)] + pnew[span:(len(n_sum)-span)] + nnew[span:(len(n_sum)-span)] + pnew_assign + nnew_assign + pf6_assign + pf8_assign
        outf.write("\t".join(map(str,newll))+"\n")
    #print "predict cut time :",time.time()-t

    outf.close()
    inf.close()

コード例 #34

0

ファイルを表示

ファイル: hilbert.py プロジェクト: arq5x/scurgen

class HilbertMatrixBigWig(HilbertMatrix):
    # Need to override build(), but otherwise just like a HilbertMatrix
    def __init__(self, *args, **kwargs):
        """
        Subclass of HilbertMatrix specifically for bigWig format files
        """
        super(HilbertMatrixBigWig, self).__init__(*args, **kwargs)

    def build(self):
        """
        Build the matrix.

        Since bigWig files are essentially pre-summarized, this just extracts
        the chrom/start/stop represented by each cell in the matrix and fills
        it with the value from the bigWig file.
        """
        self.bigwig = BigWigFile(open(self.file))

        chrom_rc, chrom_bins = self.chrom2rc()

        if self.chrom == 'genome':
            chroms = self.chromdict.keys()

        else:
            chroms = [self.chrom]

        for chrom in chroms:
            rc = chrom_rc[chrom]
            nbins = chrom_bins[chrom]

            start, stop = self.chromdict[chrom]
            results = self.bigwig.summarize(chrom, start, stop, nbins)
            values = results.sum_data / results.valid_count
            values[np.isnan(values)] = 0

            self.matrix[rc[:,0], rc[:, 1]] = values

        self._cleanup()


    def chrom2rc(self):
        """
        Return a dictionary of {chrom: (rows, cols)} and {chrom: nbins}
        """
        precomputed = np.load(
            os.path.join(
                os.path.dirname(__file__),
                'precomputed.npz'))
        rc = precomputed['_%s' % self.matrix_dim]

        d = {}
        bins = {}
        last_stop = 0
        for chrom, startstop in self.chromdict.items():
            start, stop = startstop
            frac = self.chromdict[chrom][1] / float(self.chrom_length)
            nbins = int(frac * (self.matrix_dim * self.matrix_dim))
            d_start = last_stop
            d_stop = d_start + nbins
            d[chrom] = rc[d_start:d_stop, :]
            bins[chrom] = nbins
            last_stop += nbins
        return d, bins

コード例 #35

0

ファイルを表示

ファイル: bigwig_correlation.py プロジェクト: qinqian/coloncancer

def main():
    usage = "usage: %prog <-r rfile> [options] <bigwig files> ..."
    description = "Draw correlation plot for many bigwig files. Based on qc_chIP_whole.py"
    
    optparser = OptionParser(version="%prog 0.1",description=description,usage=usage,add_help_option=False)
    optparser.add_option("-h","--help",action="help",help="Show this help message and exit.")
    #optparser.add_option("-d","--db",type="str",dest="dbname",help="UCSC db name for the assembly. Default: ce4",default="ce4")
    optparser.add_option("-r","--rfile",dest="rfile",
                         help="R output file. If not set, do not save R file.")
    optparser.add_option("-s","--step",dest="step",type="int",
                         help="sampling step in kbps. default: 100, minimal: 1",default=100)
    optparser.add_option("-z","--imgsize",dest="imgsize",type="int",
                         help="image size in inches, note the PNG dpi is 72. default: 10, minimal: 10",default=10)    
    optparser.add_option("-f","--format",dest="imgformat",type="string",
                         help="image format. PDF or PNG",default='PDF')
    #optparser.add_option("-m","--method",dest="method",type="string",default="median",
    #                     help="method to process the paired two sets of data in the sampling step. Choices are 'median', 'mean', and 'sample' (just take one point out of a data set). Default: median")
    optparser.add_option("-l","--wig-label",dest="wiglabel",type="string",action="append",
                         help="the wiggle file labels in the figure. No space is allowed. This option should be used same times as wiggle files, and please input them in the same order as -w option. default: will use the wiggle file filename as labels.")
    optparser.add_option("--min-score",dest="minscore",type="float",default=-10000,
                         help="minimum score included in calculation. Points w/ score lower than this will be discarded.")
    optparser.add_option("--max-score",dest="maxscore",type="float",default=10000,
                         help="maximum score included in calculation. Points w/ score larger than this will be discarded.")
    optparser.add_option("-H","--heatmap",dest="heatmap",action="store_true",default=False,
                         help="If True, a heatmap image will be generated instead of paired scatterplot image.")
    
    (options,wigfiles) = optparser.parse_args()

    imgfmt = options.imgformat.upper()
    if imgfmt != 'PDF' and imgfmt != 'PNG':
        print "unrecognized format: %s" % imgfmt
        sys.exit(1)

    medfunc = mean

    wigfilenum = len(wigfiles)
    if wigfilenum < 2 or not options.rfile:
        error("must provide >=2 wiggle files")
        optparser.print_help()
        sys.exit(1)

    # wig labels
    if options.wiglabel and len(options.wiglabel) == wigfilenum:
        wiglabel = options.wiglabel
    else:  # or use the filename
        wiglabel = map(lambda x:os.path.basename(x),wigfiles)
        
    if options.step < 1:
        error("Step can not be lower than 1!")
        sys.exit(1)
    if options.imgsize < 10:
        error("Image size can not be lower than 10!")
        sys.exit(1)

    # check the files
    for f in wigfiles:
        if not os.path.isfile(f):
            error("%s is not valid!" % f)
            sys.exit(1)
        
    info("number of bigwig files: %d" % wigfilenum)

    #get chromosome length from optins.wig[0]:
    p=BwIO(wigfiles[0])
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
        
    # get the common chromosome list:
    chrset = set([t['key'] for t in p.chromosomeTree['nodes']])
    for bw in wigfiles[1:]:
        p=BwIO(bw)
        chrset = chrset.intersection(set([t['key'] for t in p.chromosomeTree['nodes']]))
    chroms = list(chrset)

    if not chroms:
        error('No common chrom found')
        sys.exit()
    info("common chromosomes are %s." % ",".join(chroms))

    # Start writing R file
    if options.rfile:
        rfhd = open(options.rfile,"w")
        rfhd.write('''require("RColorBrewer") ## from CRAN\n''')

    # for each wig file, sample...
    for i in range(len(wigfiles)):
        bw = BigWigFile(open(wigfiles[i],'rb'))
        
        info("read wiggle track from bigwig file #%d" % (i+1))
        profile = []
        for chrom in chroms:

            # The too-short chromosome will cause error in bw.summarize function below
            # So filter them out
            if chrom_len[chrom]/options.step/1000==0:
                warn("A very-short chromosome (%s) found and skipped"%chrom)
                continue
            
            summary = bw.summarize(chrom, 0, chrom_len[chrom], chrom_len[chrom]/options.step/1000)
            if not summary:
                continue
            profile_chr = summary.sum_data / summary.valid_count
            profile_chr = [str(t).replace('nan', 'NA') for t in profile_chr]
            profile.extend(profile_chr)
            
        info("write values to r file")
        rfhd.write("p%d <- c(%s)\n" %(i, ','.join(profile)))
        
    rfhd.write("c <- cbind(p0")
    for i in range(wigfilenum-1):
        rfhd.write(",p%d" % (i+1))
    rfhd.write(")\n")
    
    rfhd.write("c <- c[ c[,1]<=%f & c[,1]>=%f " % (options.maxscore,options.minscore))
    for i in range(wigfilenum-1):
        rfhd.write("& c[,%d]<=%f & c[,%d]>=%f " % (i+2,options.maxscore,i+2,options.minscore))
    rfhd.write(",]\n")
    if imgfmt == 'PDF':
        rfhd.write("pdf(\"%s.pdf\",width=%d,height=%d)\n" % (options.rfile,options.imgsize,options.imgsize))
    elif imgfmt == 'PNG':
        rfhd.write("png(\"%s.png\",units=\"in\",res=150,width=%d,height=%d)\n" % (options.rfile,options.imgsize,options.imgsize))

    if options.heatmap:                 # heatmap
        rfhd.write('library(gplots)\n')
        rfhd.write('''
m <- cor(c, method="pearson", use="pairwise.complete.obs")
''')
        labels = ",".join(map(lambda x:"\""+x+"\"",wiglabel))
        rfhd.write("rownames(m) <- c(%s)\n" % labels)
        rfhd.write("colnames(m) <- c(%s)\n" % labels)         
        rfhd.write('# draw the heatmap using gplots heatmap.2\n') 
        rfhd.write('mn <- -1\n')
        rfhd.write('mx <- 1\n')
        rfhd.write('n <- 98\n')
        rfhd.write('bias <- 1\n')
        rfhd.write('mc <- matrix(as.character(round(m, 2)), ncol=dim(m)[2])\n')
        rfhd.write('breaks <- seq(mn, mx, (mx-mn)/(n))\n')
        rfhd.write('cr <- colorRampPalette(colors = c("#2927FF","#FFFFFF","#DF5C5C"), bias=bias)\n')
        rfhd.write('heatmap.2(m, col = cr(n), breaks=breaks, trace="none", cellnote=mc, notecol="black", notecex=1.8, keysize=0.5, density.info="histogram", margins=c(27.0,27.0), cexRow=2.20, cexCol=2.20, revC=T, symm=T)\n')
    else:                               # scatterplot
        rfhd.write('''
panel.plot <- function( x,y, ... )
{
  par(new=TRUE)
  m <- cbind(x,y)
  plot(m,col=densCols(m),pch=20)
  lines(lowess(m[!is.na(m[,1])&!is.na(m[,2]),]),col="red")  
}
    
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- cor(x, y,use="complete.obs")
  txt <- format(round(r,2),width=5,nsmall=2)
  #format(c(r, 0.123456789), digits=digits)[1]
  txt <- paste(prefix, txt, sep="")
  if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
  #text(0.5, 0.5, txt, cex = cex.cor * abs(r))
  text(0.5, 0.5, txt, cex = cex.cor)
}
''')
        labels = ",".join(map(lambda x:"\""+x+"\"",wiglabel))
        rfhd.write('''
pairs(c, lower.panel=panel.plot, upper.panel=panel.cor, labels=c(%s))
''' % (labels))

    rfhd.write("dev.off()\n")
    rfhd.close()

    # try to call R
    try:
        subprocess.call(['Rscript',options.rfile])
    except:
        info("Please check %s" % options.rfile)
    else:
        info("Please check %s" % (options.rfile+'.'+imgfmt))