Esempio n. 1
0
def makeposavg_wf0(genelist, GFFfile, utrgfffile, seqwin, densityfile,
                   outfilestring, riboshift, thresh):
    gffgen = GFF.parse(GFFfile)
    GFFlist = seqtools.makeGFFlist(gffgen)

    doingUTR5 = 0
    if type(utrgfffile) == list:
        utrgff = GFF.parse(utrgfffile[0])
        utrtable = seqtools.makeutrtable(utrgff)
        doingUTR5 = 1
    elif utrgfffile == "0":
        utrtable = 0
    else:
        utrgff = GFF.parse(utrgfffile)
        utrtable = seqtools.makeutrtable(utrgff)
    GFFlists = [GFFlist, utrtable, doingUTR5]

    counts1p = builddense.readcountsf(densityfile + "_plus_")
    counts1m = builddense.readcountsf(densityfile + "_minus_")
    readcounts = [counts1p, counts1m]

    genesinavg = makeposavg(genelist, GFFlists, seqwin, readcounts,
                            outfilestring, riboshift, thresh)
    print "positions in avg = " + str(genesinavg[0])
    print "positions not in avg because zero count = " + str(genesinavg[1])
    print "positions with zero ORF count = " + str(genesinavg[2])
Esempio n. 2
0
def motifavg_2_wf(GFFfile, utr5gfffile, utr3gfffile, counts_filestring,
                  motifsize, inframe, thresh, outfilestring, mismatches, shift,
                  windowsize, avgwindow):
    codons = {}  # Just a placeholder
    counts0 = builddense.readcountsf(counts_filestring + "_plus_")
    counts1 = builddense.readcountsf(counts_filestring + "_minus_")
    counts = [counts0, counts1]
    GFFgen = GFF.parse(GFFfile)
    GFFlist = seqtools.makeGFFlist(GFFgen)
    avglist = []
    outlist = []
    outlist.append([
        "motif", "na", "na", "na", "na", "hitsincluded", "na", "na", "tothits",
        "Pausescore"
    ])

    GFFs = GFFlist
    motifdata = motifavg_2_simple(GFFs, motifsize, inframe, thresh, mismatches,
                                  codons, shift, counts, windowsize, avgwindow)

    for mm in motifdata.keys():
        print mm
        motifdata[mm][1] = 0  # Variable not used
        if motifdata[mm][5] > 0:
            for i in range(sum(avgwindow)):
                motifdata[mm][0][i] /= float(
                    motifdata[mm][5])  #Normalization taking place.
        motifdata[mm][3] = 0  # Variable not used

        # Get pause scores:
        numerator = sum(
            motifdata[mm][0][avgwindow[0] - windowsize:avgwindow[0] + 1 +
                             windowsize])
        denominator = sum(motifdata[mm][0])
        denominator /= len(motifdata[mm][0])
        numerator /= (2 * windowsize + 1)
        if denominator != 0:
            pause = numerator / denominator
        else:
            pause = 0
        motifdata[mm][10] = pause

        outlist.append([mm] + motifdata[mm][1:9] + [motifdata[mm][10]])
        avglist += (motifdata[mm][0])  #Concatenate average files.

    # WRite out list.
    gentools.writelisttoexcel(outlist,
                              outfilestring)  #Includes new pause scores.
    # Write out avg file.
    favg = open(outfilestring + ".bin", "wb")
    gentools.writelistbinint(avglist, favg)
    favg.close()
Esempio n. 3
0
def makeposstats_wf(genelist, GFFfile, utrgfffile, seqwin, pausewin,
                    densityfile, outfilestring, riboshift, motiflen):
    gffgen = GFF.parse(GFFfile)
    GFFlist = seqtools.makeGFFlist(gffgen)
    if utrgfffile == "0":
        utrtable = 0
    else:
        utrgff = GFF.parse(utrgfffile)
        utrtable = seqtools.makeutrtable(utrgff)
    GFFlists = [GFFlist, utrtable]

    counts1p = builddense.readcountsf(densityfile + "_plus_")
    counts1m = builddense.readcountsf(densityfile + "_minus_")
    readcounts = [counts1p, counts1m]
    newdict = makeposstats(genelist, GFFlists, seqwin, pausewin, readcounts,
                           riboshift, motiflen)
    listavg.writedicttoexcel(newdict, outfilestring)
Esempio n. 4
0
def totalquant_wf(filebase,counts_filestring,bp5,bp3,ignoreutr5,ignoreutr3,shift,filtermodule,thresh,GFFgen_filename,utrgfffilename,utr5gfffilename):

	GFFgen=GFF.parse(GFFgen_filename)
	GFFlist=seqtools.makeGFFlist(GFFgen)

	counts0=builddense.readcountsf(counts_filestring+"_plus_")		
	counts1=builddense.readcountsf(counts_filestring+"_minus_")
	counts=[counts0,counts1]
	
	utrgffgen=GFF.parse(utrgfffilename)
	utrtable=seqtools.makeutrtable(utrgffgen)
	
	utrgffgen=GFF.parse(utr5gfffilename)
	utrtable2=seqtools.makeutrtable(utrgffgen)
	
	mgl=makegenelist(counts,GFFlist,utrtable,utrtable2,bp5,bp3,ignoreutr5,ignoreutr3,shift,filtermodule,thresh)
	writedicttoexcel(mgl,filebase+"_genelist")
Esempio n. 5
0
def totalavg_wf(regionlength5,regionlength3,filebase,counts_filestring,bp5,bp3,ignoreutr5,ignoreutr3,shift,filtermodule,thresh,equalweight,GFFgen_filename,utrgfffilename,utr5gfffilename,alignpos,goodzone):
	
	f=open(filebase+"_avg_"+str(alignpos)+"_","wb")

	GFFgen=GFF.parse(GFFgen_filename)
	GFFlist=seqtools.makeGFFlist(GFFgen)
	if filtermodule=='0':
		filtermodule=0
	counts0=builddense.readcountsf(counts_filestring+"_plus_")		
	counts1=builddense.readcountsf(counts_filestring+"_minus_")
	counts=[counts0,counts1]
	
	utrgffgen=GFF.parse(utrgfffilename)
	utrtable=seqtools.makeutrtable(utrgffgen)

	utrgffgen=GFF.parse(utr5gfffilename)
	utrtable2=seqtools.makeutrtable(utrgffgen)
	gene=makeavggene(regionlength5,regionlength3,counts,GFFlist,utrtable,utrtable2,ignoreutr5,ignoreutr3,bp5,bp3,shift,filtermodule,thresh,alignpos,equalweight,goodzone)
	for i in range(0,len(gene)):
		f.write(struct.pack("f",float(gene[i])))
	f.close()