Ejemplo n.º 1
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    dbi=DBI.init(args.db,Tools.guess_format(args.db))
    references=dbi.bamfiles[0].references
    for i in TableIO.parse(fin,args.format):
        print i
        n=0
        c_count=0
        reads=dbi.query(i,args.method)
        for read in reads:
            compatible=Tools.compatible_with_transcript(read,i,references=references,strand=args.strand)
            print "HT:"
            for i0,r in enumerate(TableIO.parse(read.reads,"bam2bed12",references=references)):
                print "READ"+str(i0)+"\t",r
            print "COMPATIBLE:",compatible,"\n\n"
            if compatible: c_count+=1
            n+=1
        print "COMPATIBLE / ALL OVERLAP READS =  ",c_count,"/",n
        print "RATIO\t%.4f"%float(c_count)/n
Ejemplo n.º 2
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    dbi = DBI.init(args.db, Tools.guess_format(args.db))
    references = dbi.bamfiles[0].references
    for i in TableIO.parse(fin, args.format):
        print i
        n = 0
        c_count = 0
        reads = dbi.query(i, args.method)
        for read in reads:
            compatible = Tools.compatible_with_transcript(
                read, i, references=references, strand=args.strand)
            print "HT:"
            for i0, r in enumerate(
                    TableIO.parse(read.reads,
                                  "bam2bed12",
                                  references=references)):
                print "READ" + str(i0) + "\t", r
            print "COMPATIBLE:", compatible, "\n\n"
            if compatible: c_count += 1
            n += 1
        print "COMPATIBLE / ALL OVERLAP READS =  ", c_count, "/", n
        print "RATIO\t%.4f" % float(c_count) / n
Ejemplo n.º 3
0
def Main():
    global args,out
    args=ParseArg()
    dict={}
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    argv=sys.argv
    argv[0]=argv[0].split("/")[-1]
    print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :\n#\t"," ".join(argv)
    if args.query_method:
        dict["method"]=args.query_method
    dbi=DBI.init(args.db,args.dbformat)
    hits=0
    query=0
    if args.input=="stdin":
        input=sys.stdin
    else:
        input=args.input

    query_length=0
    hits_number=0
    for (i0,x) in enumerate(TableIO.parse(input,args.input_format)):
        if i0%10==0:
            print >>sys.stderr,"query ",i0," entries\r",
        print >>out,"QR\t",x
        hit=0
        query+=1
        query_length+=len(x)
        results=dbi.query(x,**dict)
        compatible=0
        #print >>sys.stderr,type(results)
        if isinstance(results,numpy.ndarray) or isinstance(results,list):
            if not args.silence:
                print >>out,"HT\t",
                for value in results:
                    print >>out,str(value)+",",
                print >>out,""
            hit=1
            hits_number+=1
        elif isinstance(results,str):
            if not args.silence:
                print >>out,"HT\t",
                print >>out,results
            hit=1
            hits_number+=1

        else:
            this_query_hits=0
            for j in results:
                
                if not args.silence:
                    print >>out,"HT\t",j,
                hit=1
                hits_number+=1
                this_query_hits+=1
                if isinstance(j,xplib.Annotation.Bed12) and isinstance(x,xplib.Annotation.Bed12):
                    compatible_binary=Tools.compatible_with_transcript(j,x)
                    if not args.silence:
                        print >>out,"\tCompatible:",compatible_binary
                    if compatible_binary:
                        compatible+=1
                else:
                    if not args.silence:
                        print >>out,""
            print >>out,"HN\t",this_query_hits
            if compatible>0:
                print >>out,"CP\t",compatible

        if args.dbformat=="tabix":
            x.chr=x.chr.replace("chr","")
            for j in dbi.query(x,**dict):
                print >>out,"HT\t",j
                hit=1
                hits_number+=1
        hits+=hit
    print >>out,"# Query Number:",query,"\n# Query Have Hits:",hits
    print >>out,"# Query Length:",query_length
    print >>out,"# Hits Number:",hits_number
Ejemplo n.º 4
0
def compare_reads(isoforms):
    # global dbi,out
    isoforms_set=[]
    chr=isoforms[0].chr
    min_start=isoforms[0].start
    max_stop=isoforms[0].stop
    for i in isoforms:
        if i.start < min_start: min_start=i.start
        if i.stop > max_stop: max_stop=i.stop
        isoforms_set.append(i)
#        print >>sys.stderr,"debug",i
    transcript_region=Bed([chr,min_start,max_stop]);
    print >>out,"REGION\t",chr,"\t",min_start,"\t",max_stop
    print >>out,"ISOFORM_INPUT_NUMBER\t",len(isoforms_set)

    
    '''
    reading all the reads in this transcript region
    '''
    reads_set=[]
    reads_num=0
    for i in dbi.query(transcript_region,method="fetch12"):
        reads_set.append(i)
        reads_num+=1


    '''
    compare two sets
    '''
    l=len(isoforms_set)
    bincodes={}
    total=reads_num
    if total==0: total=0.001
    for i in reads_set:
        bincode=0
        for j in isoforms_set:
            k=Tools.compatible_with_transcript(i,j)
            if k:
                bincode = (bincode<<1)+1
            else:
                bincode = bincode<<1
        if  bincodes.has_key(bincode):
            bincodes[bincode]+=1
        else:
            bincodes[bincode]=1
        
    
    init=[ 1.0/l for i in range(l) ]
    proportion=init
    '''
    EM Initialize
    '''

    '''
    E step
    '''
    totals=[0.0 for i in range(l)]
    new_proportion=[0.0 for i in range(l)]
    iterate_time=0;
    while(1):
        totals=[0.0 for i in range(l)]
        for code in bincodes.keys():
            row_total=0.0
            for j in range(l):
                if get_bit_n(j,l,code):
                    row_total+=proportion[j]
            for j in range(l):
                if get_bit_n(j,l,code):
                    totals[j]+=bincodes[code] * proportion[j] / row_total

        for i in range(l):
            new_proportion[i]=totals[i]/total
        '''
        M step
        '''
        #print >>sys.stderr,"proportion",proportion #debug
        #print >>sys.stderr,"new_proportion",new_proportion #debug
        #print >>sys.stderr,"total",total #debug
        #print >>sys.stderr,"totals",totals #debug
        dis=distance(proportion,new_proportion)
        proportion=new_proportion
        iterate_time+=1
        if(dis<1e-05): break;
        if(args.BYY and iterate_time > 10): break;
   
    '''
    BYY Hard Cut Algorithm
    '''
    while(args.BYY):
        totals=[0.0 for i in range(l)]
        for code in bincodes.keys():
            maxj=-1
            for j in range(l):
                if get_bit_n(j,l,code):
                   # totals[j]+=bincodes[code] * proportion[j] / row_total
                   if maxj==-1: maxj=j
                   elif proportion[j] > proportion[maxj]: maxj=j
            if maxj!=-1: totals[maxj]+=bincodes[code]
        #new_proportion=[0.0 for i in range(l)]
        for i in range(l):
            new_proportion[i]=totals[i]/total
        '''
        M step
        '''
        dis=distance(proportion,new_proportion)
        if(dis<1e-05): break;
        proportion=new_proportion
    # print >>out,proportion

    '''
    print isoforms
    '''
    for i,x in enumerate(isoforms_set):
        if proportion[i] > args.threshold:  
            if x.score==0.0:
                x.score=proportion[i]
                print >>out,"HT\t",x,"\t",proportion[i]
            else:
                print >>out,"HT\t",x,"\t",proportion[i]
        else:
            if not args.hits_only:
                if x.score==0.0:
                    x.score=proportion[i]
                    print >>out,"NT\t",x,"\t",proportion[i]
                else:
                    print >>out,"NT\t",x,"\t",proportion[i]
    print >>out,"//"
Ejemplo n.º 5
0
def Main():
    global args, out
    args = ParseArg()
    dict = {}
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    argv = sys.argv
    argv[0] = argv[0].split("/")[-1]
    print >> out, "# This data was generated by program ", argv[
        0], "(version %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :\n#\t", " ".join(argv)
    if args.query_method:
        dict["method"] = args.query_method
    dbi = DBI.init(args.db, args.dbformat)
    hits = 0
    query = 0
    if args.input == "stdin":
        input = sys.stdin
    else:
        input = args.input

    query_length = 0
    hits_number = 0
    for (i0, x) in enumerate(TableIO.parse(input, args.input_format)):
        if i0 % 10 == 0:
            print >> sys.stderr, "query ", i0, " entries\r",
        print >> out, "QR\t", x
        hit = 0
        query += 1
        query_length += len(x)
        results = dbi.query(x, **dict)
        compatible = 0
        #print >>sys.stderr,type(results)
        if isinstance(results, numpy.ndarray) or isinstance(results, list):
            if not args.silence:
                print >> out, "HT\t",
                for value in results:
                    print >> out, str(value) + ",",
                print >> out, ""
            hit = 1
            hits_number += 1
        elif isinstance(results, str):
            if not args.silence:
                print >> out, "HT\t",
                print >> out, results
            hit = 1
            hits_number += 1

        else:
            this_query_hits = 0
            for j in results:

                if not args.silence:
                    print >> out, "HT\t", j,
                hit = 1
                hits_number += 1
                this_query_hits += 1
                if isinstance(j, xplib.Annotation.Bed12) and isinstance(
                        x, xplib.Annotation.Bed12):
                    compatible_binary = Tools.compatible_with_transcript(j, x)
                    if not args.silence:
                        print >> out, "\tCompatible:", compatible_binary
                    if compatible_binary:
                        compatible += 1
                else:
                    if not args.silence:
                        print >> out, ""
            print >> out, "HN\t", this_query_hits
            if compatible > 0:
                print >> out, "CP\t", compatible

        if args.dbformat == "tabix":
            x.chr = x.chr.replace("chr", "")
            for j in dbi.query(x, **dict):
                print >> out, "HT\t", j
                hit = 1
                hits_number += 1
        hits += hit
    print >> out, "# Query Number:", query, "\n# Query Have Hits:", hits
    print >> out, "# Query Length:", query_length
    print >> out, "# Hits Number:", hits_number