pValues.append(allScores[o1][i]['pValue']) return pValues # Parameters for a run permutations = 200 # Number of times to run meme, get nMotifs per permutation pValueThreshold = 0.05 # Threshold for TOMTOM similarity # TODO!!! These parameters could be gotten from the cMonkey Run nMotifs = 2 regions = ['upstream', '3pUTR'] motifWidth = { 'upstream': [6, 12], '3pUTR': [4, 9] } revComp = { 'upstream': True, '3pUTR': None } # Load up the cMonkey run to get permuted pValues from cMonkeyWrapper import cMonkeyWrapper c1 = cMonkeyWrapper('cmonkey-run-hsa.RData') # To pick the important clusters maxEValue = 10 maxScore = None # 0 maxSurv = None # 0.05/len(c1.getBiclusterNames()) allPssms = {} if maxScore==None and maxSurv==None: allPssms['upstream'] = c1.getPssmsUpstream(maxEValue=maxEValue) allPssms['3pUTR'] = c1.getPssms3pUTR(maxEValue=maxEValue) elif maxSurv==None: allPssms['upstream'] = c1.getPssmsUpstream(maxEValue=maxEValue,maxScore=maxScore) allPssms['3pUTR'] = c1.getPssms3pUTR(maxEValue=maxEValue,maxScore=maxScore) else: allPssms['upstream'] = c1.getPssmsUpstream(maxEValue=maxEValue,maxScore=maxScore,maxSurv=maxSurv) allPssms['3pUTR'] = c1.getPssms3pUTR(maxEValue=maxEValue,maxScore=maxScore,maxSurv=maxSurv)
# First sort the results sorted1 = qsortBasedOn(dict1.keys(), dict1.values())[0] # Then control based on FDR res1 = [] alpha = float(alpha) #res1 = [sorted1[i] for i in range(len(sorted1)) if dict1[sorted1[i]] <= alpha/float(tests-i)] for i in range(len(sorted1)): if dict1[sorted1[i]] <= alpha/float(tests-i): res1.append(sorted1[i]) else: break return res1 # Load up the cMonkey run to get permuted pValues from cMonkeyWrapper import cMonkeyWrapper c1 = cMonkeyWrapper('iter3000.RData') # Getting allSeqs = c1.getSeqsUpstream() bgFile = 'tmp/meme/bgFile.meme' seed = None print 'Not using a seed.' # Get Sequences for the run if not allSeqs==None: allSeqs = allSeqs else: allSeqs = {} asf = open(allSeqsFile,'r') for line in asf.readlines(): splitUp = line.strip().split(',')