Beispiel #1
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)

    dbi=[];
    for i,bam in enumerate(args.bams):
        print >>out,"# SAMPLE_"+str(i+1)+" BAM File:",bam
        dbi.append(DBI.init(bam,"bam"))
    print >>out,"#",VCF.header(),
    for i,bam in enumerate(args.bams):
        print >>out,"\t","Sample_"+str(i+1),
    print >>out,""
    for i,vcf in enumerate(TableIO.parse(fin,"vcf")):
        vcf.chr=args.chr_prefix+vcf.chr
        if(i%100==0):
            print >>sys.stderr,"processing",i,"vcf\r",
        print >>out,vcf,
        for d in dbi:
            print >>out,"\t",
            for r in d.query(vcf):
                print >>out,format(r),
        print >>out,""
Beispiel #2
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    dbi=DBI.init(args.db,Tools.guess_format(args.db))
    references=dbi.bamfiles[0].references
    for i in TableIO.parse(fin,args.format):
        print i
        n=0
        c_count=0
        reads=dbi.query(i,args.method)
        for read in reads:
            compatible=Tools.compatible_with_transcript(read,i,references=references,strand=args.strand)
            print "HT:"
            for i0,r in enumerate(TableIO.parse(read.reads,"bam2bed12",references=references)):
                print "READ"+str(i0)+"\t",r
            print "COMPATIBLE:",compatible,"\n\n"
            if compatible: c_count+=1
            n+=1
        print "COMPATIBLE / ALL OVERLAP READS =  ",c_count,"/",n
        print "RATIO\t%.4f"%float(c_count)/n
Beispiel #3
0
def Main():
    '''
    This program is a test for TableIO.parse(file.bam,"bam2bed")

    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    if args.format=="guess":
        args.format=Tools.guess_format(args.input)
    s=TableIO.parse(args.input,args.format)
    for i in s:
        print >>out,i
Beispiel #4
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)

    for i in TableIO.parse(fin,args.format):
        print >>out,i
Beispiel #5
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)

    for i in TableIO.parse(fin, args.format):
        print >> out, i
Beispiel #6
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    '''
    END OF IO TEMPLATE 
    '''
    print >> out, "# This data was generated by program ", sys.argv[
        0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)
    dbi = DBI.init(args.db, Tools.guess_format(args.db))
    references = dbi.bamfiles[0].references
    for i in TableIO.parse(fin, args.format):
        print i
        n = 0
        c_count = 0
        reads = dbi.query(i, args.method)
        for read in reads:
            compatible = Tools.compatible_with_transcript(
                read, i, references=references, strand=args.strand)
            print "HT:"
            for i0, r in enumerate(
                    TableIO.parse(read.reads,
                                  "bam2bed12",
                                  references=references)):
                print "READ" + str(i0) + "\t", r
            print "COMPATIBLE:", compatible, "\n\n"
            if compatible: c_count += 1
            n += 1
        print "COMPATIBLE / ALL OVERLAP READS =  ", c_count, "/", n
        print "RATIO\t%.4f" % float(c_count) / n
Beispiel #7
0
def Main():
    """
    IO TEMPLATE
    """
    global args, out
    args = ParseArg()
    fin = IO.fopen(args.input, "r")
    out = IO.fopen(args.output, "w")
    """
    END OF IO TEMPLATE 
    """
    print >> out, "# This data was generated by program ", sys.argv[0], " (version: %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :"
    print >> out, "#\t", " ".join(sys.argv)

    hSites = {}
    donorSites = {}
    acceptorSites = {}
    if args.genome is not None:
        genome = DBI.init(args.genome, "genome")
    else:
        genome = None

    j = 0
    for j, i in enumerate(TableIO.parse(fin, "bam2bed12", references=fin.references, strand=args.strand)):
        # print >>out,i
        if j % 1000 == 0:
            print >>sys.stderr, "processing ", j, "reads               \r",
        for intron in i.Introns():
            if len(intron) < args.intron_min_length:
                continue
            donor = intron.head()
            # print >>sys.stderr,intron
            # print >>sys.stderr,donor
            donorID = bedToID(donor)
            if donorSites.has_key(donorID):
                donorSites[donorID] += 1
            else:
                donorSites[donorID] = 1
            acceptor = intron.tail()
            acceptorID = bedToID(acceptor)
            if acceptorSites.has_key(acceptorID):
                acceptorSites[acceptorID] += 1
            else:
                acceptorSites[acceptorID] = 1
            """ 
            if genome is not None:
                
                s=genome.query(intron.head()).upper()+".."+genome.query(intron.tail()).upper()
                if hSites.has_key(s):
                    hSites[s]+=1
                else:
                    hSites[s]=1
            """
    donors = []
    for key in donorSites.keys():
        a = key.split("\t")
        donors.append(Bed([a[0], a[1], a[2], "noname_donor", donorSites[key], a[3]]))
    donors.sort()
    for i, x in enumerate(donors):
        x.id = "donor_" + str(i)
        print >> out, x, "\t", genome.query(x).upper()

    acceptors = []
    for key in acceptorSites.keys():
        a = key.split("\t")
        acceptors.append(Bed([a[0], a[1], a[2], "noname_acceptor", acceptorSites[key], a[3]]))
    acceptors.sort()
    for i, x in enumerate(acceptors):
        x.id = "acceptor_" + str(i)
        print >> out, x, "\t", genome.query(x).upper()
Beispiel #8
0
def Main():
    '''
    IO TEMPLATE
    '''
    global args,out
    args=ParseArg()
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    '''
    END OF IO TEMPLATE 
    '''
    print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :"
    print >>out,"#\t"," ".join(sys.argv)
    
    hSites={};
    donorSites={};
    acceptorSites={}
    if args.genome is not None:
        genome=DBI.init(args.genome,"genome")
    else:
        genome=None

    j=0
    for j,i in enumerate(TableIO.parse(fin,"bam2bed12",references=fin.references,strand=args.strand)):
        #print >>out,i
        if j%1000==0: print >>sys.stderr,"processing ",j,"reads               \r",
        for intron in i.Introns():
            if len(intron)< args.intron_min_length: continue
            donor=intron.head();
            #print >>sys.stderr,intron
            #print >>sys.stderr,donor
            donorID=bedToID(donor)
            if(donorSites.has_key(donorID)):
                donorSites[donorID]+=1
            else:
                donorSites[donorID]=1
            acceptor=intron.tail();
            acceptorID=bedToID(acceptor)
            if(acceptorSites.has_key(acceptorID)):
                acceptorSites[acceptorID]+=1
            else:
                acceptorSites[acceptorID]=1
            ''' 
            if genome is not None:
                
                s=genome.query(intron.head()).upper()+".."+genome.query(intron.tail()).upper()
                if hSites.has_key(s):
                    hSites[s]+=1
                else:
                    hSites[s]=1
            '''
    donors=[]
    for key in donorSites.keys():
        a=key.split("\t")
        donors.append(Bed([a[0],a[1],a[2],"noname_donor",donorSites[key],a[3]]));
    donors.sort()
    for i,x in enumerate(donors):
        x.id="donor_"+str(i)
        print >>out,x,"\t",genome.query(x).upper() 

    acceptors=[]
    for key in acceptorSites.keys():
        a=key.split("\t")
        acceptors.append(Bed([a[0],a[1],a[2],"noname_acceptor",acceptorSites[key],a[3]]));
    acceptors.sort()
    for i,x in enumerate(acceptors):
        x.id="acceptor_"+str(i)
        print >>out,x,"\t",genome.query(x).upper()