Example #1
0
def run(args):
    schema_template=schema_templates[args.input_format]
    SQL_template=insert_templates[args.input_format]
    db_filename=args.db
    out=IO.fopen(args.output,"w")
    if db_filename=="guess":
        db_filename=args.input.strip(".gz")+".db"
    db_is_new = not os.path.exists(db_filename)
    print("Database file : %s"%db_filename,file=out)
    with sqlite3.connect(db_filename) as conn:
        cursor=conn.cursor()
        if db_is_new:
            print ('Creating table %s if not exists\n________________________________'%args.table_name,file=out)
            S=schema_template.substitute({"table_name":args.table_name})
            print (S,file=out)
            print ("_______________________________",file=out)
            cursor.execute(S)
        else:
            S=schema_template.substitute({"table_name":args.table_name})
            cursor.execute(S)

        fin=IO.fopen(args.input,"r")
        S1=SQL_template.substitute({"table_name":args.table_name})
        print(S1,file=out)
        s=TableIO.parse(args.input,"simple")
        cursor.executemany(S1,s)
        conn.commit()
        print("loaded",file=out)
Example #2
0
def run(args):
    dbi=DBI.init(args.bw,"bigwig")
    out=IO.fopen(args.output,"w")
    for i in TableIO.parse(IO.fopen(args.input,"r"),args.format):
        ht=[ j for j in dbi.query(i,method=args.method) ]
        print("QR",i,file=out)
        print("HT",ht,file=out)
Example #3
0
def run(args):
    bedformat="bed"+str(args.bed_column_number)
    dbi=DBI.init(args.genome,"genome")
    out=IO.fopen(args.output,"w")
    for i in TableIO.parse(IO.fopen(args.input,"r"),bedformat):
        print (">",i.id+"_"+args.method,file=out)
        print (seq_wrapper(dbi.query(i,method=args.method)),file=out)
Example #4
0
def run(args):
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    r=[]
    m=0
    ideograms=[]
    qr=""
    for i in TableIO.parse(fin,sep=","):
        if len(i)==1:
            a=i[0].split("\t")
            if len(a)==2:
                if args.query=="all" or args.query==qr:
                    if m > 0:
                        ideograms.append({"id":qr,"length":m})
                qr=a[1].strip()
        else:
            if args.query=="all" or args.query==qr:
                r.append({"chr":qr,"start":i[0],"length":i[2],"value":i[1]})
                m=int(i[0])+int(i[2])
    if args.query=="all" or args.query==qr:
        if m > 0:
           ideograms.append({"id":qr,"length":m})
    j={
        "ideograms":ideograms,
        "tracks":
        [
         {
             "name":args.input,
             "type":"bedgraph",
             "values":r
         }
        ]
    }
    print(json.dumps(j,indent=4),file=out)
Example #5
0
File: read.py Project: HaoKuo/bam2x
def run(args):
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    if args.format=="guess":
        args.format=IO.guess_format(args.input)
    s=TableIO.parse(args.input,args.format)
    for i in s:
        print(i,file=out)
Example #6
0
def parse_bed(f):
    a=[]
    for i in TableIO.parse(f,"bed12"):
        h={}
        for v,key in izip(i,hclass["bed12"]._fields):
            h[key]=v
        a.append(h)
    return a
Example #7
0
def run(local_args):
    '''
    IO TEMPLATE
    '''
    global args,out
    args=local_args
    out=IO.fopen(args.output,"w")
    fin=IO.fopen(args.input,"r")
    print("# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,file=out)
    print("# in bam2x ( https://github.com/nimezhu/bam2x )",file=out)
    print("# Date: ",time.asctime(),file=out)
    print("# The command line is :",file=out)
    print("#\t"," ".join(sys.argv),file=out)
    gene=DBI.init(args.genetab,"binindex",cls="bed12");
    upstream_list=[]
    downstream_list=[]
    exons_list=[]
    introns_list=[]
    utr3_list=[]
    utr5_list=[]
    for g in gene:
        upstream_list.append(g.upstream(args.upstream));
        downstream_list.append(g.downstream(args.downstream));
        for e in g.Exons():
            exons_list.append(e)
        for i in g.Introns():
            introns_list.append(i)
        if not (g.utr3() is None):
            utr3_list.append(g.utr3())
        if not (g.utr5() is None):
            utr5_list.append(g.utr5())
    upstream=DBI.init(upstream_list,"binindex",cls="bed6")
    downstream=DBI.init(downstream_list,"binindex",cls="bed6")
    exons=DBI.init(exons_list,"binindex",cls="bed6")
    introns=DBI.init(introns_list,"binindex",cls="bed6")
    utr3=DBI.init(utr3_list,"binindex",cls="bed6")
    utr5=DBI.init(utr5_list,"binindex",cls="bed6")



    if args.format=="guess":
        args.format=IO.guess_format(args.input)
    for (i0,i) in enumerate(TableIO.parse(fin,args.format)):
        if i0==0:
            if isinstance(i,Bed12):
                print("#chr\tstart\tend\tname\tscore\tstrand\tthick_start\tthick_end\titem_rgb\tblock_count\tblock_sizes\tblock_starts\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5",file=out)
            else:
                print("#chr\tstart\tend\tname\tscore\tstrand\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5",file=out)

        print(i,file=out,end="")
        print("\t",toIDs(gene.query(i)),file=out,end="")

        print("\t",toIDs(upstream.query(i)),file=out,end="")
        print("\t",toIDs(downstream.query(i)),file=out,end="")
        print("\t",toIDs(exons.query(i)),file=out,end="")
        print("\t",toIDs(introns.query(i)),file=out,end="")
        print("\t",toIDs(utr3.query(i)),file=out,end="")
        print("\t",toIDs(utr5.query(i)),file=out)
Example #8
0
def parse_orthologs(f):
    ortholog_fields=["lnc","lncGeneSymbol","ortholog","orthologGeneSymbol","alignNo","exonID","locusID","indelRate","lncExonsAlinged","orthExonsAligned","category"]
    a=[]
    for i in TableIO.parse(f):
        h={}
        for v,key in izip(i,ortholog_fields):
            h[key]=v
        a.append(h)
    return a
Example #9
0
def run(args):
    bedformat="bed"+str(args.bed_column_number)
    dbi=DBI.init(args.bam,"bam")
    out=IO.fopen(args.output,"w")
    for i in TableIO.parse(IO.fopen(args.input,"r"),bedformat):
        print("QR",i,file=out)
        for j in dbi.query(i,method=args.method):
            print("HT",j,file=out)
        print("",file=out)
Example #10
0
def _generate_db(filename,db_filename,table_name):
    with sqlite3.connect(db_filename) as conn:
        cursor=conn.cursor()
        S=schema_t.substitute({"table_name":table_name})
        cursor.execute(S)
        LOAD_S=insert_t.substitute({"table_name":table_name})
        s=TableIO.parse(IO.fopen(filename,"r"),"simple")
        cursor.executemany(LOAD_S,s)
        conn.commit()
Example #11
0
def run(args):
    out=IO.fopen(args.output,"w")
    if args.annotation=="exon":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            for j in i.Exons():
                print(j,file=out)
    elif args.annotation=="intron":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            for j in i.Introns():
                print(j,file=out)
    elif args.annotation=="cds":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.cds()
            if j is not None and j.cdna_length() > 0:
                print(j,file=out)

    elif args.annotation=="cds":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.cds()
            if j is not None and j.cdna_length() > 0:
                print(j,file=out)

    elif args.annotation=="utr5":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.utr5()
            if j is not None and j.cdna_length() > 0:
                print(j,file=out)

    elif args.annotation=="utr3":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.utr3()
            if j is not None and j.cdna_length() > 0:
                print(j,file=out)
    elif args.annotation=="utr":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.utr5()
            if j is not None and j.cdna_length() > 0:
                print(j,file=out)
            j=i.utr3()
            if j is not None and j.cdna_length() > 0:
                print(j,file=out)
    elif args.annotation=="upstream":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.upstream(args.bp)
            print(j,file=out)
    elif args.annotation=="downstream":
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
            j=i.downstream(args.bp)
            print(j,file=out)
Example #12
0
def run(args):
    bedformat = "bed" + str(args.bed_column_number)
    dbi = DBI.init(args.genome, "genome")
    out = IO.fopen(args.output, "w")
    for i in TableIO.parse(IO.fopen(args.input, "r"), bedformat):
        seq = dbi.query(i, method=args.method)
        if len(seq) == 0:
            continue
        print(">{}".format(i.id + "_" + args.method), file=out)
        print(seq_wrapper(seq), file=out, end="")
Example #13
0
def run(args):
    # logging.basicConfig(level=logging.DEBUG)
    dbi = DBI.init(args.bam, "bam")
    out = IO.fopen(args.output, "w")
    for i in TableIO.parse(IO.fopen(args.input, "r"), "bed12"):
        print("QR\t", i, file=out)
        for j in dbi.query(i, method="bam1", strand=args.strand):
            if compatible_with_transcript(j, i):
                print("HT\t{}".format(_translate_to_meta(i, j)), file=out)
            elif not args.hit:
                print("OP\t{}".format(j), file=out)
        print("", file=out)
Example #14
0
def run(args):
    out=IO.fopen(args.output,"w")
    cls=None
    if hclass.has_key(args.type):
        cls=hclass[args.type]
        dbi=DBI.init(args.db,"tabix",cls=cls)
    else:
        dbi=DBI.init(args.db,"tabix")
    for i in TableIO.parse(IO.fopen(args.input,"r"),args.format):
        print("QR",i,file=out)
        
        for j,ht in enumerate(dbi.query(i)):
            print("HT_{k}\t{ht}".format(k=j+1,ht=ht),file=out)
Example #15
0
def run(args):
    if os.path.isfile(args.bed + ".tbi"):
        dbi = DBI.init(args.bed, "tabix", cls=BED12)
    else:
        dbi = DBI.init(args.bed, "binindex", cls=BED12)
    out = IO.fopen(args.output, "w")
    for i in TableIO.parse(IO.fopen(args.input, "r"), "bed12"):
        print("QR\t", i, file=out)
        for j in dbi.query(i):
            if compatible_with_transcript(j, i):
                print("HT\t{}".format(_translate_to_meta(i, j)), file=out)
            elif not args.hit:
                print("OP\t{}".format(j), file=out)
        print("", file=out)
Example #16
0
def run(args):
    fin=IO.fopen(args.input,"r")
    outfile=args.input
    if not args.sorted:
        l = [ i for i in TableIO.parse(fin,args.format) ]
        l.sort()
        name=splitext(args.input)
        outfile = "{name[0]}.sorted{name[1]}".format(name=name)
        out = IO.fopen(outfile,"w")
        for i in l:
            print(i,file=out)
        out.close()
    format=args.format.translate(None,digits)
    tabix_index(outfile,preset=format)
Example #17
0
def run(args):
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    beds=[i for i in TableIO.parse(fin,"bed12")]
    beds.sort()
    for i,x in enumerate(iter_cluster(beds)):
        id=find_prefix_consensus([i0.id for i0 in x[1]])
        strand=find_consensus_strand([i0.strand for i0 in x[1]])
        print("REGION\tCL_{index}\t{chr}\t{start}\t{end}\t{id}\t{score}\t{strand}".format(strand=strand,score=len(x[1]),chr=x[1][0].chr,start=x[1][0].start,end=x[0],index=str(i+1),id=id),file=out)
        
        for j,y in enumerate(greedy_iter_compatible_group(x[1])):
            print("\tGROUP{j}\t{bed}".format(j=j+1,bed=merge_beds(y,id="CL.{i}_GP.{j}".format(i=i+1,j=j+1))),file=out)
            for k,z in enumerate(sorted(y,key= lambda x0:x0.cdna_length(), reverse=True)):
                print("\t\tCL.{i}_GP.{j}_TR.{k}\t{l}\t{z}".format(i=i+1,j=j+1,k=k+1,l=z.cdna_length(),z=z),file=out)
Example #18
0
 def __init__(self,bamfiles,**dict):
     '''
     '''
     if type(bamfiles)==type("string"):
         filename=bamfiles
         bamfiles=[]
         for i in TableIO.parse(filename,"simple"):
             bamfiles.append(i[0])
     self.bamfiles=[]
     for bamfile in bamfiles:
         if type(bamfile)==type("str"):
             try:
                 bamfile=pysam.Samfile(bamfile,"rb")
             except:
                 print >>sys.stderr,"WARNING: Can't init the bam file",bamfile
         self.bamfiles.append(bamfile)
Example #19
0
def run(args):
    logging.basicConfig(level=logging.DEBUG) 
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    bam=DBI.init(args.bam,"bam");
    beds=[i for i in TableIO.parse(fin,"bed12")]
    beds.sort()
    bp=args.bp
    print("mapped:{}".format(bam.mapped))
    print("unmapped:{}".format(bam.unmapped))
    data={}
    for i,x in enumerate(iter_cluster(beds)):
        print("{}\t{}:{}-{}".format(i+1,x["chr"],x["start"]+1,x["stop"]))
        '''
        cds=[z.cds() for z in x["beds"] if z.cds()]
        utr3=[z.utr3() for z in x["beds"] if z.utr3()]
        utr5=[z.utr5() for z in x["beds"] if z.utr5()]
        '''
        
        coords = [ up_down_coordinate(gene,args.bp,args.bp) for gene in x["beds"] ]
        for j,y in enumerate(coords):
            data[y.id]={}
            data[y.id]["coord"]=y
            data[y.id]["values"]=[0.0 for l in range(y.cdna_length())];
        coord_beds = [ _translate(coord,bed) for coord,bed in itertools.izip(coords,x["beds"])]
        for j,read in enumerate(bam.query(method="bam1",chr=x["chr"],start=x["start"]-args.bp,stop=x["stop"]+args.bp,strand=args.strand)):
            NM=getNM(read)  # number of hits
            NC=0            # number of compatible 
            c_coords=[]
            for k,coord in enumerate(coords):
                if overlap(read,coord) and compatible(read,coord): # don't consider the reads extend out of coords.
                    NC+=1
                    c_coords.append(k)
            for k,c in enumerate(c_coords):
                coord=coords[c]
                if read.start < coord.start or read.stop > coord.stop:
                    start=max(read.start,coord.start)
                    stop=min(read.stop,coord.stop)
                    read=read._slice(start,stop)
                read_in_coord = _translate(coord,read)
                for l in xrange(read_in_coord.start,read_in_coord.stop):
                    data[coord.id]["values"][l]+=1.0/NC/NM
        for j,y in enumerate(coords):
            print(data[y.id]["coord"])
            print(data[y.id]["values"])

    '''
Example #20
0
File: sort.py Project: HaoKuo/bam2x
def run(args):
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    if args.format=="guess":
        args.format=IO.guess_format(args.input)
    s=TableIO.parse(args.input,args.format)
    l=[]
    for i,x in enumerate(s):
        if i/10000==0:
            logging.info("reading %s entrys in %s",i,args.input)
        l.append(x)
    logging.info("begin sorting")
    l.sort()
    logging.info("sorting done")
    for i in l:
        print(i,file=out)
    logging.info("completed")
Example #21
0
def run(args):
    logging.basicConfig(level=logging.INFO)
    global bam,out
    bam=DBI.init(args.bam,"bam")
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    p=mp.Pool(processes=args.num_cpus)
    beds_list=[[] for i in xrange(args.num_cpus)]
    for i0,bed in enumerate(TableIO.parse(fin,"bed12")):
        beds_list[i0%args.num_cpus].append(bed)
    gene_num=i0+1
    print("bin_id\tmean\tentropy\treverse_strand_mean\treverse_strand_entropy",file=out)
    up_results=p.map(count_flank_star,itertools.izip(beds_list,itertools.repeat(args.bp),itertools.repeat(args.strand),itertools.repeat(True)))
    output(up_results,args.bp,gene_num,"UP")
    results = p.map(count_list_star,itertools.izip(beds_list,itertools.repeat(args.bin_num),itertools.repeat(args.strand)))
    output(results,args.bin_num,gene_num,"TR")
    down_results=p.map(count_flank_star,itertools.izip(beds_list,itertools.repeat(args.bp),itertools.repeat(args.strand),itertools.repeat(False)))
    output(down_results,args.bp,gene_num,"DN")
Example #22
0
def run(local_args):
    logging.basicConfig(level=logging.WARNING)
    global args,out,dbi_bam,g, MIN_INTRON_LENGTH, MIN_SPLICING_SITES_SCORE, MIN_FPK_RATIO,query_num
    MIN_INTRON_LENGTH=10
    MIN_SPLICING_SITES_SCORE=2
    '''
    IO TEMPLATE
    '''
    '''
    mySorts={ 0:sort_by_intron_and_abundance,
              1:sort_by_intron,
              2:sort_by_abundance
    }
    '''
    args=local_args
    #print "debug:",args.report_seq
    MIN_FPK_RATIO=args.min_uniq_fpk_increase #TO TEST
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    # header=["chr","start","end","id","score","strand","seq"];
    # dbi_splicing_sites=DBI.init(args.splicing_sites,"tabix",tabix="metabed",header=header);
    #if args.format=="guess":
    #    args.format=IO.guess_format(args.input)
    reader=TableIO.parse(fin,args.format)
    query_list=[]
    query_lists=[[] for i in range(args.num_cpus)]
    query_num=0
    for i,x in enumerate(reader):
        query_lists[i%args.num_cpus].append(x)
    query_num=i+1
    #querys(query_lists[0])  #DEBUG
    pool=Pool(processes=args.num_cpus)
    results=pool.map(querys,query_lists)
    #print results
    output(results)
Example #23
0
def run(args):
    logging.basicConfig(level=logging.DEBUG)
    db_filename=args.translator
    t_name,t_ext=splitext(args.translator)
    '''
    test if it is db file
    generate db file if it doesn't exists.
    '''
    if t_ext!="db":
        #possible_db=args.translator.strip("\\.gz")+".db"
        possible_db=args.translator+".db"
        print(possible_db)
        if os.path.exists(possible_db):
            db_filename=possible_db
        else:
            _generate_db(args.translator,possible_db,args.table_name)
            db_filename=possible_db
    
    '''
    query db file
    '''
    out=IO.fopen(args.output,"w")
    with sqlite3.connect(db_filename) as conn:
        conn.row_factory=lambda conn,x: Bed12._make(Bed12._types(x[1:]))
        cursor=conn.cursor()
        for i in TableIO.parse(IO.fopen(args.input,"r"),"bed"):
            s=template.substitute({"table_name":args.table_name,"name":i.chr.strip()})
            print(s)
            cursor.execute(s)
            gene=None
            try:
                gene=cursor.fetchone()
                logging.debug(i)
                logging.debug(i.cdna_length())
                logging.debug(gene)
                logging.debug(gene.cdna_length())
            except:
                raise
                logging.warning("can't find gene %s"%i.chr)
                continue
            assert gene.cdna_length() > i.cdna_length()
            print(reverse_translate(gene,i),file=out) 
Example #24
0
def iterate(fin):
    buf = fin.next()
    x = buf.split("\t")[1:]
    qr = BED12._make(BED12._types(x))
    hits = []
    overlap = []
    i = 0
    for x in TableIO.parse(fin):
        if x[0] == "QR":
            if i % 100 == 0:
                logging.info("processing " + str(i) + "  genes")
            i += 1
            yield qr, hits, overlap
            qr = BED12._make(BED12._types(x[1:]))
            hits = []
            overlap = []
        elif x[0] == "HT":
            hits.append(BED12._make(BED12._types(x[1:])))
        elif x[0] == "OP":
            overlap.append(BED12._make(BED12._types(x[1:])))
    yield qr, hits, overlap
Example #25
0
def run(args):
    #logging.basicConfig(level=logging.DEBUG)
    dbi=DBI.init(args.bam,"bam")
    mapped=dbi.mapped
    out=IO.fopen(args.output,"w")
    print("Gene\tRPKM",file=out);
    for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"):
        print(i.id,"\t",end="",file=out)
        s=0.0
        l=i.cdna_length()
        if args.uniq:
            for j in dbi.query(i,method="bam1",strand=args.strand,uniq=args.uniq):
                if compatible_with_transcript(j,i):
                    s+=1.0
        else:
            for j in dbi.query(i,method="bam1",strand=args.strand,uniq=args.uniq):
                if compatible_with_transcript(j,i):
                    (nh,_,_)=j.itemRgb.split(",")
                    nh=int(nh)
                    s+=1.0/nh
        rpkm=float(s)*(1000000.0/mapped)*(1000.0/float(l))
        print(rpkm,file=out)
Example #26
0
def test():
    if len(sys.argv)==1:
        print >>sys.stderr,"Usage: binindex.py file.bed"
        exit()
    a=TableIO.parse(sys.argv[1],'bed12')
    data=binindex(a)
    data2=binindex()
    bed=Bed("chr1",100000,2000000,".",".",".")
    for i in data.query(bed):
        print "before remove:",len(data)
        data.remove(i)
        print "after remove:",len(data)
        data2.append(i)
        print data2
    for i in data2:
        print i
    print "data finalize:"
    data.merge(data2)
    print data
    print data+data2
    print data
    print data.uniq()
    print data
Example #27
0
 def toBed12(self,chr="unknown_chr",strand="read2",**dict):
     from bam2x import TableIO
     x=list()
     for i in TableIO.parse(self.reads,"bam2bed12",references=chr,strand=strand,**dict):
         x.append(i)
     return x
Example #28
0
def run(args):
    logging.basicConfig(level=logging.INFO)
    up=args.up
    down=args.down
    bp_num=up+down
    offset=-up
    bam=DBI.init(args.bam,"bam")
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    bin_sum=[0 for i in xrange(bp_num)]
    bin_e=[0.0 for i in xrange(bp_num)]
    bin_dis=[[] for i in xrange(bp_num)]
    for i0,bed in enumerate(TableIO.parse(fin,args.format)):
        bed_bin=[0 for i in xrange(bp_num)]
        if args.tts:
            pos=bed.tts()
        else:
            pos=bed.tss()
        pos_flank=get_flank_region(pos,up,down)
        for read in bam.query(pos_flank,"bam1",strand="read1"):
            a=translate_coordinates(pos,read)
            #print(a,file=out)
            for e in a.Exons():
                #print(e,file=out)
                start=e.start-offset
                end=e.stop-offset
                if start < 0: start=0
                if end > bp_num: end=bp_num
                for j in xrange(start,end):
                    bed_bin[j]+=1
        for  i in xrange(bp_num):
            bin_sum[i]+=bed_bin[i]
            bin_dis[i].append(bed_bin[i])
    bed_num=i0+1
    for i in xrange(bp_num):
        bin_e[i]=gini_coefficient(bin_dis[i])
    if args.tts:
        print("pos_to_tts\taggregation_mean\tgini_coefficient",file=out)
    else:
        print("pos_to_tss\taggregation_mean\tgini_coefficient",file=out)
    for i in xrange(bp_num):
        print("{bin}\t{aggregation}\t{E}".format(bin=i+offset,aggregation=float(bin_sum[i])/bed_num,E=bin_e[i]),file=out)
    
    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        matplotlib.rcParams.update({'font.size':9})
        ax1=plt.subplot2grid((7,1),(6,0))
        plt.ylabel('gini coeffecient')
        plt.fill_between(range(-up,down),bin_e,color="r",alpha=0.2,y2=0)
        ax1.set_ylim(0,1)
        ax1.set_xlim(-up,down)
        ax1.axes.get_xaxis().set_visible(False)
        plt.axvline(x=0,linewidth=1, color='y')
        ax2=plt.subplot2grid((7,1),(0,0),rowspan=5)
        ax2.set_xlim(-up,down)
        plt.plot(range(-up,down),[float(i)/bed_num for i in bin_sum])
        plt.ylabel('mean coverage')
        if args.tts:
            plt.xlabel('pos to tts (bp)')
        else:
            plt.xlabel('pos to tss (bp)')
        plt.axvline(x=0,linewidth=1, color='y')
        plt.grid(True)
        plt.savefig(args.output+".png")
    except:
        pass
Example #29
0
def run(args):
    logging.basicConfig(level=logging.INFO)
    def process():
        if len(buff)==1: return 0
        max_score=0.0
        total_score=0.0
        e=[]
        for i in buff:
            total_score+=i.score
            e.append(i.score)
        e=[i/total_score for i in e]
        gini=gini_coefficient(e)
        if total_score < args.min_reads_number:
            return 0
        record={}
        meta=BED6(buff[0].chr,buff[0].start,buff[-1].stop,args.prefix+"."+str(group_id),total_score,buff[0].strand)
        peak=max(buff,key=lambda x:x.score)
        record["peak"]=peak._replace(score=peak.score/total_score)
        record["meta"]=meta._replace(strand=peak.strand)
        record["gini"]=gini
        records.append(record)
        return 1
    
    
    def simple_output():
        print("# formats: bayes_prob_model2, gini, [ region bed, score is total reads], [peak bed , score is proportion ]",file=out)
        for i,x in enumerate(records):
            print("{p2}\t{gini}\t{meta}\t".format(p2=p2[i],meta=x["meta"],gini=x["gini"]),end="",file=out)
            print(x["peak"],file=out)
    def bed12_output():
        print("# formats: bed12 , [R,G,B] are corresponding to [ TTS_GINI_PVALUE*200, TSS_GINI_PALUE*200, PROPORTION_OF_PEAK*200 ]",file=out)
        for i,x in enumerate(records):
            if args.tts:
                g=0
                r=int(p2[i]*200)
            else:
                g=int(p2[i]*200)
                r=0
            b=int(x["gini"]*200)
            if p2[i]>0.5:
                meta=x["meta"]._replace(id=x["meta"].id+".end")
            else:
                meta=x["meta"]
            rgb="{r},{g},{b}".format(r=r,g=g,b=b)
            print("{bed6}\t{thickStart}\t{thickEnd}\t{itemRgb}\t{blockCount}\t{blockSizes}\t{blockStarts}".format(bed6=meta,thickStart=x["peak"].start,thickEnd=x["peak"].end,itemRgb=rgb,blockSizes=x["meta"].stop-x["meta"].start,blockCount=1,blockStarts=0),file=out)

    
    records=[]
    GAP=args.gap
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    iterator=TableIO.parse(fin,"bed6")
    last=iterator.next()
    last_stop=last.stop
    group_id=0
    buff=[last]
    last_chr=last.chr
    for x,i in enumerate(iterator):
        if x%10000==0: logging.info("processing {x} reads".format(x=x));
        if i.chr!=last_chr or i.start-last_stop > GAP:
            group_id+=process()
            buff=[i]
            last_chr=i.chr
            last_stop=i.stop
        else:
            buff.append(i)
            if i.stop>last_stop:
                last_stop=i.stop

    process()
    gini=array([i["gini"] for i in records])
    model=fit_two_peaks_EM(gini)
    p2=bayes_p2(gini,model)
    print("# Date: ",time.asctime(),file=out)
    print("# Program Version ",VERSION,file=out)
    print("# The command line is :",file=out)
    print("#\t"," ".join(sys.argv),file=out)
    print("# learning model:",file=out)
    print("#",model_str(model),file=out)
    #simple_output()
    bed12_output()
Example #30
0
def run(args):
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    for i in TableIO.parse(fin,"bed12"):
        print(remove_small_introns(i,args.cutoff),file=out)