seqs.append(allSeqs[gene])
                        memeFile.write('>'+str(gene)+'\n'+str(allSeqs[gene])+'\n')
            memeFile.close()

        # Run this using all cores available
        cpus = cpu_count()
        print 'There are', cpus,'CPUs avialable.' 
        pool = Pool(processes=cpus)
        pool.map(runMeme,[i for i in range(len(clusterMembership))])
        print 'Done with clusters.\n'

        # Compare the random motifs to the original motif in TOMTOM
        similar = 0
        pssms = compilePssms(clusterMemeRuns,maxEValue)
        if len(pssms)>0:
            tomtomUp = tomtom([orig],pssms.values(),c1.getNucFreqsUpstream(),'+',minOverlap=6)
            putMeUp = dict(zip(pssms.keys(),range(len(pssms))))
            pValues = getPValues(tomtomUp)
            for pValue in pValues:
                if float(pValue) <= float(pValueThreshold):
                    similar += 1

        # Write out the results
        outFile.write('\n'+str(orig.getName())+','+region+','+str(orig.getEValue())+','+str(orig.getConsensusMotif())+','+str(len(pssms))+','+str(similar)+','+str(permutations)+','+str(float(similar)/float(permutations)))
        
        # Clean-up and prepare for next run
        shutil.rmtree('tmp/meme/fasta')

# Close the output file
outFile.close()
Example #2
0
postOut = []
for i in range(len(postProcessed)):
    postOut.append([str(i+1)])
    for nm1 in names2:
        postOut[i] += [postProcessed[i+1][nm1]]
postFinal = open('postProcessedV2.csv','w')
postFinal.write(','.join(names)+'\n'+'\n'.join([','.join(i) for i in postOut]))
postFinal.close()

#################################
# Making SIF File for Cytoscape #
#################################
pssms = c1.getPssmsUpstream(maxScore=maxScore,maxEValue=maxEValue,maxSurv=maxSurv, maxNormResid=maxResidual)
pValueThreshold = 0.05/((len(pssms)**2)/2)
from tomtom import tomtom
tomtomUp = tomtom(pssms.values(),pssms.values(),c1.getNucFreqsUpstream(),'+ -',minOverlap=6)
putMeUp = dict(zip(pssms.keys(),range(len(pssms))))
pValues = []
pssmPssmPairs = []
for i in range(len(pssms)):
    pValues.append(range(len(pssms)))
for i in pssms:
    for j in pssms:
        pValue = tomtomUp.getScore(i,j)['pValue']
        pValues[putMeUp[i]][putMeUp[j]] = pValue
        if not i==j and float(pValue) <= float(pValueThreshold):
            pssmPssmPairs.append([i+'_Up','mm',j+'_Up'])
# Now write out the matrix to a csv file for import into R
outFile = open('tmp/tomtom_pValuesUpstream.csv','w')
pssmsNames = pssms.keys()
outFile.write(','+','.join(pssms.keys()))