def makeposavg_wf0(genelist, GFFfile, utrgfffile, seqwin, densityfile, outfilestring, riboshift, thresh): gffgen = GFF.parse(GFFfile) GFFlist = seqtools.makeGFFlist(gffgen) doingUTR5 = 0 if type(utrgfffile) == list: utrgff = GFF.parse(utrgfffile[0]) utrtable = seqtools.makeutrtable(utrgff) doingUTR5 = 1 elif utrgfffile == "0": utrtable = 0 else: utrgff = GFF.parse(utrgfffile) utrtable = seqtools.makeutrtable(utrgff) GFFlists = [GFFlist, utrtable, doingUTR5] counts1p = builddense.readcountsf(densityfile + "_plus_") counts1m = builddense.readcountsf(densityfile + "_minus_") readcounts = [counts1p, counts1m] genesinavg = makeposavg(genelist, GFFlists, seqwin, readcounts, outfilestring, riboshift, thresh) print "positions in avg = " + str(genesinavg[0]) print "positions not in avg because zero count = " + str(genesinavg[1]) print "positions with zero ORF count = " + str(genesinavg[2])
def motifavg_2_wf(GFFfile, utr5gfffile, utr3gfffile, counts_filestring, motifsize, inframe, thresh, outfilestring, mismatches, shift, windowsize, avgwindow): codons = {} # Just a placeholder counts0 = builddense.readcountsf(counts_filestring + "_plus_") counts1 = builddense.readcountsf(counts_filestring + "_minus_") counts = [counts0, counts1] GFFgen = GFF.parse(GFFfile) GFFlist = seqtools.makeGFFlist(GFFgen) avglist = [] outlist = [] outlist.append([ "motif", "na", "na", "na", "na", "hitsincluded", "na", "na", "tothits", "Pausescore" ]) GFFs = GFFlist motifdata = motifavg_2_simple(GFFs, motifsize, inframe, thresh, mismatches, codons, shift, counts, windowsize, avgwindow) for mm in motifdata.keys(): print mm motifdata[mm][1] = 0 # Variable not used if motifdata[mm][5] > 0: for i in range(sum(avgwindow)): motifdata[mm][0][i] /= float( motifdata[mm][5]) #Normalization taking place. motifdata[mm][3] = 0 # Variable not used # Get pause scores: numerator = sum( motifdata[mm][0][avgwindow[0] - windowsize:avgwindow[0] + 1 + windowsize]) denominator = sum(motifdata[mm][0]) denominator /= len(motifdata[mm][0]) numerator /= (2 * windowsize + 1) if denominator != 0: pause = numerator / denominator else: pause = 0 motifdata[mm][10] = pause outlist.append([mm] + motifdata[mm][1:9] + [motifdata[mm][10]]) avglist += (motifdata[mm][0]) #Concatenate average files. # WRite out list. gentools.writelisttoexcel(outlist, outfilestring) #Includes new pause scores. # Write out avg file. favg = open(outfilestring + ".bin", "wb") gentools.writelistbinint(avglist, favg) favg.close()
def makeposstats_wf(genelist, GFFfile, utrgfffile, seqwin, pausewin, densityfile, outfilestring, riboshift, motiflen): gffgen = GFF.parse(GFFfile) GFFlist = seqtools.makeGFFlist(gffgen) if utrgfffile == "0": utrtable = 0 else: utrgff = GFF.parse(utrgfffile) utrtable = seqtools.makeutrtable(utrgff) GFFlists = [GFFlist, utrtable] counts1p = builddense.readcountsf(densityfile + "_plus_") counts1m = builddense.readcountsf(densityfile + "_minus_") readcounts = [counts1p, counts1m] newdict = makeposstats(genelist, GFFlists, seqwin, pausewin, readcounts, riboshift, motiflen) listavg.writedicttoexcel(newdict, outfilestring)
def totalquant_wf(filebase,counts_filestring,bp5,bp3,ignoreutr5,ignoreutr3,shift,filtermodule,thresh,GFFgen_filename,utrgfffilename,utr5gfffilename): GFFgen=GFF.parse(GFFgen_filename) GFFlist=seqtools.makeGFFlist(GFFgen) counts0=builddense.readcountsf(counts_filestring+"_plus_") counts1=builddense.readcountsf(counts_filestring+"_minus_") counts=[counts0,counts1] utrgffgen=GFF.parse(utrgfffilename) utrtable=seqtools.makeutrtable(utrgffgen) utrgffgen=GFF.parse(utr5gfffilename) utrtable2=seqtools.makeutrtable(utrgffgen) mgl=makegenelist(counts,GFFlist,utrtable,utrtable2,bp5,bp3,ignoreutr5,ignoreutr3,shift,filtermodule,thresh) writedicttoexcel(mgl,filebase+"_genelist")
def totalavg_wf(regionlength5,regionlength3,filebase,counts_filestring,bp5,bp3,ignoreutr5,ignoreutr3,shift,filtermodule,thresh,equalweight,GFFgen_filename,utrgfffilename,utr5gfffilename,alignpos,goodzone): f=open(filebase+"_avg_"+str(alignpos)+"_","wb") GFFgen=GFF.parse(GFFgen_filename) GFFlist=seqtools.makeGFFlist(GFFgen) if filtermodule=='0': filtermodule=0 counts0=builddense.readcountsf(counts_filestring+"_plus_") counts1=builddense.readcountsf(counts_filestring+"_minus_") counts=[counts0,counts1] utrgffgen=GFF.parse(utrgfffilename) utrtable=seqtools.makeutrtable(utrgffgen) utrgffgen=GFF.parse(utr5gfffilename) utrtable2=seqtools.makeutrtable(utrgffgen) gene=makeavggene(regionlength5,regionlength3,counts,GFFlist,utrtable,utrtable2,ignoreutr5,ignoreutr3,bp5,bp3,shift,filtermodule,thresh,alignpos,equalweight,goodzone) for i in range(0,len(gene)): f.write(struct.pack("f",float(gene[i]))) f.close()