seqs.append(allSeqs[gene]) memeFile.write('>'+str(gene)+'\n'+str(allSeqs[gene])+'\n') memeFile.close() # Run this using all cores available cpus = cpu_count() print 'There are', cpus,'CPUs avialable.' pool = Pool(processes=cpus) pool.map(runMeme,[i for i in range(len(clusterMembership))]) print 'Done with clusters.\n' # Compare the random motifs to the original motif in TOMTOM similar = 0 pssms = compilePssms(clusterMemeRuns,maxEValue) if len(pssms)>0: tomtomUp = tomtom([orig],pssms.values(),c1.getNucFreqsUpstream(),'+',minOverlap=6) putMeUp = dict(zip(pssms.keys(),range(len(pssms)))) pValues = getPValues(tomtomUp) for pValue in pValues: if float(pValue) <= float(pValueThreshold): similar += 1 # Write out the results outFile.write('\n'+str(orig.getName())+','+region+','+str(orig.getEValue())+','+str(orig.getConsensusMotif())+','+str(len(pssms))+','+str(similar)+','+str(permutations)+','+str(float(similar)/float(permutations))) # Clean-up and prepare for next run shutil.rmtree('tmp/meme/fasta') # Close the output file outFile.close()
postOut = [] for i in range(len(postProcessed)): postOut.append([str(i+1)]) for nm1 in names2: postOut[i] += [postProcessed[i+1][nm1]] postFinal = open('postProcessedV2.csv','w') postFinal.write(','.join(names)+'\n'+'\n'.join([','.join(i) for i in postOut])) postFinal.close() ################################# # Making SIF File for Cytoscape # ################################# pssms = c1.getPssmsUpstream(maxScore=maxScore,maxEValue=maxEValue,maxSurv=maxSurv, maxNormResid=maxResidual) pValueThreshold = 0.05/((len(pssms)**2)/2) from tomtom import tomtom tomtomUp = tomtom(pssms.values(),pssms.values(),c1.getNucFreqsUpstream(),'+ -',minOverlap=6) putMeUp = dict(zip(pssms.keys(),range(len(pssms)))) pValues = [] pssmPssmPairs = [] for i in range(len(pssms)): pValues.append(range(len(pssms))) for i in pssms: for j in pssms: pValue = tomtomUp.getScore(i,j)['pValue'] pValues[putMeUp[i]][putMeUp[j]] = pValue if not i==j and float(pValue) <= float(pValueThreshold): pssmPssmPairs.append([i+'_Up','mm',j+'_Up']) # Now write out the matrix to a csv file for import into R outFile = open('tmp/tomtom_pValuesUpstream.csv','w') pssmsNames = pssms.keys() outFile.write(','+','.join(pssms.keys()))