def _customRExecution(self, resDictKey, xlab, main): fromTo = ',from=0,to=1' if resDictKey == 'p-value' else '' rCode = 'plotFunc <- function(ourList, xlab, main) {vec <- unlist(ourList); plot(density(vec'+fromTo+'), xlab=xlab, main=main)}' #print (self._getRawData(resDictKey), xlab, main) rawData = [float(x) for x in self._getRawData(resDictKey)] from proto.RSetup import r r(rCode)(rawData, xlab, main)
def storeState(): from proto.RSetup import r r('runif(1)') random._storedStates = [ random.getstate(), numpy.random.get_state(), r('.Random.seed') ]
def _compute(self): from proto.RSetup import r, robjects summaryStats = robjects.FloatVector([ self._children[0].getResult()[key] for key in ['Neither', 'Only1', 'Only2', 'Both'] ]) r('library("polycor")') tetraCor = r('function(vec){polychor(matrix(vec, nrow=2))}') return tetraCor(summaryStats)
def returnToStoredState(): if random._storedStates is None: return ShouldNotOccurError( 'Tried to return to previous random state without a stored state.') random.setstate(random._storedStates[0]) numpy.random.set_state(random._storedStates[1]) from proto.RSetup import r r('function(state) {.Random.seed <- state}')(random._storedStates[2])
def _customRExecution(self, resDictKey, xlab, main): rCode = 'plotFunc <- function(meanList, plusSdList, minusSdList, xlab,ylab, main, ymin,ymax) {vec1 <- unlist(meanList); vec2 <- unlist(plusSdList); vec3 <- unlist(minusSdList); plot(vec1, type="l", xlab=xlab,ylab=ylab, main=main,ylim=c(ymin,ymax)); lines(vec2,type="l",lty="dashed"); lines(vec3,type="l",lty="dashed");}' mean,plus,minus = self._getRawData(resDictKey) ymin = numpy.concatenate((mean,plus,minus)).min() ymax = numpy.concatenate((mean,plus,minus)).max() xlab = 'Relative bin-position' ylab = self._results.getLabelHelpPair(resDictKey)[0] from proto.RSetup import r r(rCode)(mean,plus,minus, xlab, ylab, main,ymin,ymax)
def setManualSeed(seed): random._seed = seed if seed is None: seed = getRandomSeed() random.seed(seed) numpy.random.seed(seed) from proto.RSetup import r r('function(seed) {set.seed(seed)}')(seed)
def returnToStoredFullState(self): if self._storedFullState is None: return ShouldNotOccurError( 'Tried to return to previous random state without a stored state.' ) self.setstate(self._storedFullState[0]) numpy.random.set_state(self._storedFullState[1]) from proto.RSetup import r r('function(state) {.Random.seed <- state}')(self._storedFullState[2]) self._storedFullState = None
def _seedAll(self): if DebugConfig.VERBOSE: print 'Seeding all randomization algorithms with: {} '.format( self._seed) self.seed(self._seed) numpy.random.seed(self._seed) from proto.RSetup import r r('function(seed) { set.seed(seed) }')(self._seed) r( 'runif(1)' ) # to harmonize with integration test results (based on earlier logic).
def _customRExecution(self, resDictKey, xlab, main): from proto.RSetup import r, robjects xList, yList, xLabel, yLabel = self._getRawData(resDictKey) xVec = robjects.FloatVector(xList) yVec = robjects.FloatVector(yList) rCode = 'plotFunc <- function(xVec, yVec, xlab, ylab, main) {plot(xVec, yVec, type="l", xlab=xlab, ylab=ylab, main=main)}' #print (xs, ys, xlab, main) #print 'rawData: ',self._getRawData(resDictKey) r(rCode)(xVec, yVec, xLabel, yLabel, main) self._plotResultObject = r('dataFunc <- function(xVec, yVec) {list("x"=xVec, "y"=yVec)}')(xVec, yVec)
def execute(choices, galaxyFn=None, username=''): from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile from proto.RSetup import r from quick.application.ExternalTrackManager import ExternalTrackManager from proto.hyperbrowser.HtmlCore import HtmlCore dataFn = ExternalTrackManager.extractFnFromGalaxyTN(choices[0]) sf = GalaxyRunSpecificFile(['fig1.png'], galaxyFn) sf.openRFigure() r(PlotFigure1Tool.rCode)(dataFn) sf.closeRFigure() core = HtmlCore() core.begin() core.image(sf.getURL()) core.end() print str(core)
def _customRExecution(self, resDictKey, xlab, main): from proto.RSetup import r, robjects #rCode = 'ourHist <- function(ourList, xlab, main, numBins) {vec <- unlist(ourList); hist(vec, col="blue", breaks=numBins, xlab=xlab, main=main)}' rCode = \ '''ourHist <- function(vec, xlab, main, numBins) {main = paste(strwrap(main, width=60), collapse="\n"); hist(vec, col="blue", breaks=numBins, xlab=xlab, main=main)}''' #print (self._results.getAllValuesForResDictKey(resDictKey), xlab, main) rawData = robjects.FloatVector(self._getRawData(resDictKey)) #rawData = [float(x) for x in self._getRawData(resDictKey)] numBins = max(10, self._getDataPointCount(resDictKey)/5) ''' import numpy data = numpy.bincount(self._getRawData(resDictKey)) import quick.webtools.restricted.visualization.visualizationPlots as vp from proto.hyperbrowser.HtmlCore import HtmlCore self.__class__.numCount +=1 if self.__class__.numCount==1: htmlCore = HtmlCore() htmlCore.begin() htmlCore.divBegin('plotDiv') htmlCore.line(vp.addJSlibs()) htmlCore.line(vp.useThemePlot()) htmlCore.line(vp.addJSlibsExport()) htmlCore.line(vp.axaddJSlibsOverMouseAxisisPopup()) seriesType = ['column' for x in list(data)] #linear scale """ htmlCore.line(vp.drawChart(list(data), tickInterval=None, type='column', label='x= {point.x} </br> y= {point.y}', seriesType=seriesType, height=400, titleText='Histogram', tickMinValue=1, legend=False, plotNumber=self.__class__.numCount )) """ #log scale htmlCore.line(vp.drawChart(list(data), tickInterval=None, type='column', label='x= {point.x} </br> y= {point.y}', seriesType=seriesType, height=400, titleText='Histogram', typeAxisXScale = 'logarithmic', pointStartLog=1, legend=False, plotNumber=self.__class__.numCount )) htmlCore.divEnd() htmlCore.end() print str(htmlCore) ''' self._plotResultObject = r(rCode)(rawData, xlab, main, numBins)
def countHist(self, newResList): poissonListMean = [] if len(newResList) != 0: from proto.RSetup import r, robjects newResListLog = [] for el in newResList: if el == 0: newResListLog.append(0) else: newResListLog.append(math.log(el, 10)) # , breaks=ceiling(max(vec)) - floor((min(vec))) rCode = 'dataRPois <- function(vec) {' \ 'hist(vec, prob=T)' \ '}' dd = robjects.FloatVector(newResListLog) dataFromRPois = r(rCode)(dd) breaks = list(dataFromRPois.rx2('breaks')) try: counts = list(dataFromRPois.rx2('density')) except: counts = [dataFromRPois.rx2('density')] for elN in range(0, len(counts)): br = (breaks[elN] + breaks[elN + 1]) / 2 ct = counts[elN] poissonListMean.append([br, ct]) return poissonListMean
def getPlotDimensions(self, resDictKey): tableData = self._getRawData(resDictKey) assert isinstance(tableData, TableData) colNames = tableData.columnNamesAsNumpyArray rowNames = tableData.rowNamesAsNumpyArray colClust = tableData.colClust rowClust = tableData.rowClust self._cex = 1.0 * self.LABEL_TEXT_SIZE / self.POINT_SIZE from proto.RSetup import r charWidthHeightRatio = r("par('cin')[1]/par('cin')[2]") marginBottom = int(colNames.dtype.itemsize * charWidthHeightRatio * self.LABEL_TEXT_SIZE) marginRight = int(rowNames.dtype.itemsize * charWidthHeightRatio * self.LABEL_TEXT_SIZE) self._marginBottom = self.RATIO_BOTTOM_RIGHT_MARGIN_ADJUST * marginBottom self._marginRight = self.RATIO_BOTTOM_RIGHT_MARGIN_ADJUST * marginRight blockSize = self.BLOCK_SIZE # maxRatio = self.MAX_RATIO_OF_HEATMAP_VS_BR_MARGIN # if blockSize * len(rowNames) < self._marginBottom * maxRatio: # blockSize = 1.0 * self._marginBottom * maxRatio / len(rowNames) # if blockSize * len(colNames) < self._marginRight * maxRatio: # blockSize = 1.0 * self._marginRight * maxRatio / len(colNames) self._mapHeight = blockSize * len(rowNames) + self._marginBottom self._mapWidth = blockSize * len(colNames) + self._marginRight smallestBottomRightMargin = min(self._marginBottom, self._marginRight) marginTop = self.RATIO_OF_TOP_LEFT_TO_SMALLEST_BR_MARGIN * smallestBottomRightMargin marginLeft = self.RATIO_OF_TOP_LEFT_TO_SMALLEST_BR_MARGIN * smallestBottomRightMargin marginTop = self._adjustMarginByDendSize(marginTop, colClust, self._mapHeight) marginLeft = self._adjustMarginByDendSize(marginLeft, rowClust, self._mapWidth) smallestTopLeftMargin = min(marginTop, marginLeft) topLeftMargin = max(smallestTopLeftMargin, self.MIN_TOP_LEFT_MARGIN) self._marginTop = topLeftMargin self._marginLeft = topLeftMargin ret = self._marginLeft + self._mapWidth, \ self._marginTop + self._mapHeight if self._printDimensions: from proto.hyperbrowser.HtmlCore import HtmlCore print str(HtmlCore().styleInfoBegin(styleClass='debug')) print self._marginLeft, self._mapWidth, \ self._marginTop, self._mapHeight print ret print str(HtmlCore().styleInfoEnd()) return ret
def _compute(self): tv = self._children[0].getResult() # print tv.genomeAnchor.chr points = self._children[0].getResult().startsAsNumpyArray() binSize = self._children[1].getResult() globalPointCount = self._children[2].getResult() rCode = ''' k<-function(x,r,a,b,n) { if (min(x)<a || max(x)>b) stop("Points must be in interval [a,b]!") dmat<-as.matrix(dist(x)) ## calculate distanced between all pairs of points diag(dmat)<-Inf ## distance of point to itself (zero) should not be counted, so set this to infinite (i.e., never less than r) wmat<-outer(x,x,function(x,y) (pmin(b,pmax(x+abs(x-y),a))-pmin(b,pmax(x-abs(x-y),a)))/(2*abs(x-y))) ## calculate edge correction weights iwmat<-1/wmat ## inverse of edge correction weights diag(iwmat)<-0 ## distance of point to itself (zero) should not be counted, so set this to zero (i.e., drops out of sum k<-sum(iwmat*(dmat<r))/n^2 ## final K-function: (sum of inverse weights iw_ij where d_ij<r)/n^2 k/(2*r) } ''' if len(points) == 0: #import numpy #return numpy.nan return None else: from proto.RSetup import r, robjects return r(rCode)(robjects.FloatVector(points), self._bpWindow, 0, binSize, globalPointCount)
def _customRExecution(self, resDictKey, xlab, main): #rCode = 'ourHist <- function(ourList, xlab, main, numBins) {vec <- unlist(ourList); hist(vec, col="blue", breaks=numBins, xlab=xlab, main=main)}' rCode = ''' ourPlotter <- function(l1,l2,l3) { v1 = unlist(l1) v2 = unlist(l2) v3 = unlist(l3) #genes2[genes2==0]=NaN #par(mfrow=c(1,2)) plot(v1, v2,xlim=c(0,1),col='red',ylim=c(0,1),xlab='Inside/Outside', ylab='Outside/Coverage',main='Comparison of scaled mean values inside and outside, as well as coverage') lines(c(0,1),c(0,1),lty='dashed') points(v1, v3,xlim=c(0,1),col='green',ylim=c(0,1)) points(v2, v3,xlim=c(0,1),col='blue',ylim=c(0,1)) #lines(c(0,1),c(0,1),lty='dashed') legend('topleft',c('Inside vs Outside','Inside vs Coverage','Outside vs Coverage'),col=c('red','green','blue'),lty=1) } ''' rawData = self._getRawData(resDictKey) #xList = rawData[0] yLists = rawData[1] assert len(yLists) == 3 scaledYLists = [None] * len(yLists) for i, yl in enumerate(yLists): minY, maxY = min(yl), max(yl) scaledYLists[i] = [(1.0 * (y - minY) / (maxY - minY)) for y in yl] #print (self._results.getAllValuesForResDictKey(resDictKey), xlab, main) #rawData = [float(x) for x in self._getRawData(resDictKey)] ##numBins = max(10, self._getDataPointCount(resDictKey)/5) from proto.RSetup import r #print 'TYPES: ',(str(type(xList)) + str(type(yLists[0]))).replace('<','') self._plotResultObject = r(rCode)(list(yLists[0]), list(yLists[1]), list(yLists[2]))
def _compute(self): n1 = self._children[0].getResult() n2 = self._children[1].getResult() c1 = self._children[2].getResult() c2 = self._children[3].getResult() #print '*',c1,c2,n1,n2 p = 1.0 * c1 / n1 q = 1.0 * c2 / n2 r = 1.0 * (n1 * p + n2 * q) / (n1 + n2) if c1>=DiffRelFreqPValStat.MIN_POP_FOR_GAUSSIAN_APPROXIMATION \ and c2>=DiffRelFreqPValStat.MIN_POP_FOR_GAUSSIAN_APPROXIMATION: se = math.sqrt(r * (1 - r) / n1 + r * (1 - r) / n2) zScore = (p - q) / se if self._tail == 'more': pval = 1.0 - stats.zprob(zScore) elif self._tail == 'less': pval = stats.zprob(zScore) elif self._tail == 'different': #fixme: which of these two solutions are correct? #pval = 2.0*(1.0-zprob(abs(zScore))) pval = min( 1.0, 2.0 * min(1.0 - stats.zprob(zScore), stats.zprob(zScore))) elif c1 + c2 >= DiffRelFreqPValStat.MIN_SUM_OF_POP_FOR_FISHER_TEST and self._tail == 'different': #import traceback #from gold.util.CustomExceptions import ShouldNotOccurError #from gold.util.CommonFunctions import getClassName #try: a = int(c1) #p*n1 b = int(n1 - c1) #n1-a c = int(c2) #q*n2 d = int(n2 - c2) # n2-c from proto.RSetup import r, robjects twoByTwo = r.matrix(robjects.IntVector([a, b, c, d]), nrow=2) res = r('fisher.test')(twoByTwo) pval = r('function(x){x$p.value}')(res) se = None zScore = '(2x2=%i,%i,%i,%i)' % (a, b, c, d) #except Exception,e: # raise ShouldNotOccurError('Repackaged exception.., original was: ' + getClassName(e) + ' - '+str(e) + ' - ' + traceback.format_exc()) else: zScore = pval = se = None return OrderedDict([ ('P-value', pval), ('Test statistic: Z-score', zScore), ('EstDiff', p-q), \ ('SEDiff', se), ('CountTrack1', c1), ('CountTrack2', c2) ])
def _getCluster(self, matrix, transpose): rFunc = ''' function(matrix, distMethod, clustMethod, distMatrix){ if (distMethod == "spearman") { library(bioDist); distMatrix = spearman.dist(matrix, abs=FALSE); } else if (distMethod == "inversedotproduct" | distMethod == "correlation" | distMethod == "absolutecorrelation") { distMatrix = as.dist(distMatrix) } else { distMatrix = dist(matrix, method=distMethod, p=3); } if (clustMethod == "diana") { library(cluster); return(as.hclust(diana(distMatrix))); } else { library(flashClust); return(flashClust(distMatrix, method=clustMethod)); } } ''' matrix = copy(matrix) if transpose: matrix = matrix.transpose() matrix[numpy.isnan(matrix)] = matrix[numpy.isfinite(matrix)].mean() distMethod = self._distMethod if distMethod.endswith('_positive'): matrix = self._filterMatrix(matrix, 0) distMethod = distMethod.split('_')[0] else: minStr = '_min_' index = distMethod.find(minStr) if index >= 0: minVal = float(distMethod[index + len(minStr):]) matrix = self._filterMatrix(matrix, minVal) distMethod = distMethod.split('_')[0] if distMethod == 'inversedotproduct': numVectors = matrix.shape[0] distMatrix = numpy.zeros(shape=[numVectors, numVectors]) assert (matrix < 0).sum( ) == 0, 'Inverse dot product does not work correctly on negative numbers.' for j in xrange(numVectors): for i in xrange(j, numVectors): if not i == j: distMatrix[i, j] = 1.0 / ( numpy.dot(matrix[i, :], matrix[j, :]) + 1) #print distMatrix elif distMethod == 'correlation': distMatrix = 1 - matrix elif distMethod == 'absolutecorrelation': distMatrix = 1 - numpy.absolute(matrix) else: distMatrix = None from proto.RSetup import r return r(rFunc)(matrix, distMethod, self._clustMethod, distMatrix)
def gwPlotting(inFn, outFn): """inFn.bed outFn.pdf""" outDir = os.path.split(outFn)[0] PLOT_BED_FN = os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'rCode', 'plotBed.r']) PLOT_CHR_FN = os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'rCode', 'ChromosomePlot.r']) PLOT_GW_FN = os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'rCode', 'GenomePlot.r']) CYTOBANDS_FN = os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'cytoband_mm8.txt']) from proto.RSetup import r r('source("%s")' % PLOT_BED_FN) #r('source("%s")' % PLOT_CHR_FN) r('source("%s")' % PLOT_GW_FN) r('cytoband = read.table("%s", header=TRUE)' % CYTOBANDS_FN) r('loadedBedData <- plot.bed("%s")' % inFn) #r('plot.chrom(segments=loadedBedData, unit="bp", dir.print="%s", plot.ideo=TRUE,cytoband=cytoband)' % outDir) r('plot.genome(segments=loadedBedData, unit="bp", dir.print="%s")' % outDir) shutil.move(outDir+ os.sep + '.pdf', outFn)
def _customRExecution(self, resDictKey, xlab, main): from proto.RSetup import r, robjects xs, ys = self._getRawData(resDictKey) xVec = robjects.FloatVector(xs) yVec = robjects.FloatVector(ys) rCode = 'plotFunc <- function(xVec, yVec, xlab, ylab, main) {plot(xVec, yVec, xlab=xlab, ylab=ylab, main=main); lines(lowess(xVec, yVec),col="red")}' #print (xs, ys, xlab, main) #print 'rawData: ',self._getRawData(resDictKey) xlab = 'Stat-values on track1' #rename x-lab for scatter-plot case.. ylab = 'Stat-values on track2' r(rCode)(xVec, yVec, xlab, ylab, main) self._plotResultObject = r( 'dataFunc <- function(xVec, yVec) {list("x"=xVec, "y"=yVec)}')( xVec, yVec)
def adjustPvalues(cls, pvals, estimationMethod='Pounds&Cheng', verbose=True): #from sys import stderr #stderr.write('McFdr.adjustPvalues starting') from proto.RSetup import r pvals = [x if x is not None else numpy.nan for x in pvals] #notNoneIndices = [i for i in range(len(pvals)) if pvals[i] is not None] #notNonePvals = #print 'PVALS: ',pvals nonNanPvals = [p for p in pvals if not numpy.isnan(p)] if len(nonNanPvals)==0: #stderr.write('McFdr.adjustPvalues ending early.') return pvals #either empty or only nan-values.. if len(nonNanPvals)<cls.MIN_NUM_TESTS_FOR_PI0_ESTIMATION: pi0 = 1.0 else: if estimationMethod == 'Convest': pi0 = r.convest(pvals) elif estimationMethod == 'Histf1': pi0 = r.histf1(pvals) elif estimationMethod == 'Pounds&Cheng': pi0 = min(1.0, mean( nonNanPvals )*2.0) #r('histf1SeqPerm <- function(p) {histf1(p,seq.perm=TRUE)}') #pi0 = r.histf1SeqPerm(pvals) else: raise Exception('Invalid estimationMethod: ' + str(estimationMethod)) if IS_EXPERIMENTAL_INSTALLATION and verbose: print 'Estimated pi0: ',pi0 #fdrVals = r.fdr(pvals, pi0) nonNanFdrVals = r('fdrFunc <- function(pv,pi0){ vec1 <- unlist(pv); fdr(vec1,pi0)}')(nonNanPvals,pi0) #r('fdrFunc <- function(pv,pi0){ vec1 <- unlist(pv); vec1}')(pvals,pi0) if len(nonNanPvals)==1: nonNanFdrVals = [nonNanFdrVals] #if not type(fdrVals) in (list,tuple): #print 'type(fdrVals): ',type(fdrVals) #fdrVals = [fdrVals] if len(nonNanPvals) != len(pvals): nonNanFdrVals = numpy.array(list(nonNanFdrVals)) #from R vector to python list to numpy array (to avoid numpy being confused by direct conversion from rpy.FloatVector) nonNanPvalIndicator = (numpy.isnan(pvals)==False) fdrVals = numpy.zeros( len(pvals)) fdrVals[:] = numpy.NaN assert sum(nonNanPvalIndicator) == len(nonNanFdrVals) #print repr(nonNanPvalIndicator), repr(nonNanFdrVals) #print 'Types: ', nonNanPvalIndicator.dtype, nonNanFdrVals.dtype fdrVals[nonNanPvalIndicator] = nonNanFdrVals fdrVals = list(fdrVals) #back to python list else: fdrVals = nonNanFdrVals #stderr.write('McFdr.adjustPvalues ending') return fdrVals
def _writeContent(self, resDictKey, fn): from proto.RSetup import r ensurePathExists(fn) silenceRWarnings() self._setOutputDevice(fn, height=100, width=100) width, height = self.getPlotDimensions(resDictKey) r('dev.off()') self._setOutputDevice(fn, height=height, width=width) if resDictKey is not None: xlab = self._results.getLabelHelpPair(resDictKey)[0] else: xlab = None main = self._header self._customRExecution(resDictKey, xlab, main) r('dev.off()')
def _customRExecution(self, resDictKey, xlab, main): from proto.RSetup import r, robjects #rCode = 'ourHist <- function(ourList, xlab, main, numBins) {vec <- unlist(ourList); hist(vec, col="blue", breaks=numBins, xlab=xlab, main=main)}' rCode = ''' plot(2,2) ''' rawData = self._getRawData(resDictKey) #A python list of values.. #rawData = [float(x) for x in self._getRawData(resDictKey)] self._plotResultObject = r(rCode) #()
def execute(cls, choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import datetime print 'Analysis initiated at time: ',datetime.datetime.now() print 'Corresponding batch command line:<br>', '$Tool[mc_fdr_simulation_tool](%s)' % '|'.join(choices) #maxNumSamples, h, fdrThreshold, totalNumTests, stepSize, numReplications,a,b totalNumTests, stepSize, numReplications,a,b, h, maxNumSamples, fdrThreshold,estimatePi0, fdrStoppingCriterion, samplesPerChunk, samplesInitially = choices assert not any([x in ['',None] for x in choices]), 'choices: ' + str(choices) from test.sandbox.extra.McFdr import Experiment, MultipleTestCollection, Simulator if estimatePi0.lower().startswith('no'): MultipleTestCollection.ESTIMATE_PI0 = False else: MultipleTestCollection.ESTIMATE_PI0 = estimatePi0[3:] assert fdrStoppingCriterion in ['Simultaneous','Individual'] MultipleTestCollection.SIMULTANOUS_FDR_STOPPING_CRITERION = (fdrStoppingCriterion == 'Simultaneous') Simulator.NUM_SAMPLES_PER_CHUNK = int(samplesPerChunk) Simulator.NUM_SAMPLES_INITIALLY = int(samplesInitially) print 'Estimate Pi0: ',estimatePi0 print 'FDR Stopping Criterion: ', fdrStoppingCriterion print 'NumSamplesPerChunk: ',samplesPerChunk print 'NumSamplesInitially: ',samplesInitially from proto.RSetup import r if estimatePi0 == 'By Convest': r.source("http://www.nr.no/~egil/convest.R") #elif estimatePi0 == 'By Histf1': r('library(pi0)') if ',' in stepSize: stepSize = [int(x) for x in stepSize.split(',')] else: stepSize = int(stepSize) Experiment.compareCutoffSchemes(int(maxNumSamples), int(h), float(fdrThreshold),\ int(totalNumTests), stepSize, int(numReplications)\ ,float(a),float(b), galaxyFn)
def _combineResults(self): StatisticDictSumResSplittable._combineResults(self) self._result['t1prop'] = float( self._result['t1coverage']) / self._result['NumBps'] self._result['t2prop'] = float( self._result['t2coverage']) / self._result['NumBps'] self._result[ 'ExpBothBpProportionGivenIndividualBinCoverage'] = self._result[ 'ExpBothBpCoverageGivenIndividualBinCoverage'] / self._result[ 'NumBps'] self._result[ 'ExpBothBpProportionFromGlobalIndividualCoverage'] = self._result[ 't1prop'] * self._result['t2prop'] from proto.RSetup import robjects, r t1Props = [x['t1prop'] for x in self._childResults] t2Props = [x['t2prop'] for x in self._childResults] if len(t1Props) > 1: correlation = float( r.cor(robjects.FloatVector(t1Props), robjects.FloatVector(t2Props))) import numpy if numpy.isnan(correlation): correlation = None correlationPval = None else: correlationPval = r( 'function(x,y){res=cor.test(x,y); return(res$p.value)}')( robjects.FloatVector(t1Props), robjects.FloatVector(t2Props)) #print 'CORR: ',type(correlation), correlation else: correlationPval = correlation = None self._result['IndividualCoveragePerBinCorrelation'] = correlation self._result[ 'IndividualCoveragePerBinCorrelationPvalue'] = correlationPval self._result['ObsBpProportionOverlap'] = ( float(self._result['ObsBpOverlap']) / self._result['NumBps']) if self._result['NumBps'] > 0 else None self._result['RatioOfObsToExpGivenGlobalCoverages'] = ( self._result['ObsBpProportionOverlap'] / self._result['ExpBothBpProportionFromGlobalIndividualCoverage'] ) if self._result[ 'ExpBothBpProportionFromGlobalIndividualCoverage'] > 0 else None self._result['RatioOfObsToExpGivenIndividualBinCoverages'] = ( self._result['ObsBpProportionOverlap'] / self._result['ExpBothBpProportionGivenIndividualBinCoverage'] ) if self._result[ 'ExpBothBpProportionGivenIndividualBinCoverage'] > 0 else None
def _getClustFromPickledResult(self, id, clustKey, assertLen): from proto.RSetup import r from proto.hyperbrowser.StaticFile import RunSpecificPickleFile from quick.util.CommonFunctions import createFullGalaxyIdFromNumber res = RunSpecificPickleFile( createFullGalaxyIdFromNumber(id)).loadPickledObject() clust = res[0].getGlobalResult()['Result'][clustKey] hclust = r('function(clust){as.hclust(clust)}')(clust) assert len(hclust.rx2('order')) == assertLen return hclust
def _writeContent(self, resDictKey, fn): #rCode = 'ourHist <- function(ourList, xlab, main, numBins) {vec <- unlist(ourList); hist(vec, col="blue", breaks=numBins, xlab=xlab, main=main)}' #print (self._results.getAllValuesForResDictKey(resDictKey), xlab, main) PLOT_BED_FN = os.sep.join( [HB_SOURCE_CODE_BASE_DIR, 'rCode', 'plotBed.r']) PLOT_CHR_FN = os.sep.join( [HB_SOURCE_CODE_BASE_DIR, 'rCode', 'ChromosomePlot.r']) forHistoryFn = self._historyFilePresenter._getFn(resDictKey) #outDir = self._baseDir outDir = os.path.split(fn)[0] from proto.RSetup import r r('source("%s")' % PLOT_BED_FN) r('source("%s")' % PLOT_CHR_FN) r('loadedBedData <- plot.bed("%s")' % forHistoryFn) resultLabel = self._results.getLabelHelpPair(resDictKey)[0] r('plot.chrom(segments=loadedBedData, unit="bp", dir.print="%s", ylab="%s")' % (outDir, resultLabel)) shutil.move(outDir + os.sep + '.pdf', fn)
def _compute(self): from proto.RSetup import robjects, r #if self._minimal: # return {'Result': OrderedDict([('Matrix', np.array([], dtype='float64')), \ # ('Rows', np.array([], dtype='S1')), \ # ('Cols', np.array([], dtype='S1'))])} # #rawData = self._children[0].getResult() #edges = rawData.edgesAsNumpyArray() #weights = rawData.weightsAsNumpyArray() #ids = rawData.idsAsNumpyArray() # #if len(edges) > 0: # assert all((x==edges[0]).all() for x in edges), 'Edge arrays are not equal for all elements' # #x,y = weights.shape #assert x == y, 'Weight matrix is not square, %s != %s' % (x,y) graph = self._graphStat.getResult() res = graph.getEdgeWeightMatrixRepresentation(completeMatrix=self._complete, \ rowsAsFromNodes=self._rowsAsFromNodes, \ missingEdgeWeight=np.nan) if self._normalizationMethod != 'none': if self._normalizationMethod == 'log': res['Matrix'] = np.log(res['Matrix']) if self._normalizationMethod == 'log+1': res['Matrix'] = np.log(res['Matrix'] + 1) elif self._normalizationMethod == 'p_inverse': res['Matrix'] = 1 - res['Matrix'] else: origShape = res['Matrix'].shape if self._normalizationMethod == 'p_to_normal_onesided': intermed = 1 - res['Matrix'] else: #p_to_normal_twosided intermed = 1 - res['Matrix'] / 2 vec = robjects.FloatVector(intermed.flatten()) # To remove -Inf anf Inf values vec = r( 'f <- function(vec){vec[vec==0] = .Machine$double.eps; vec[vec==1] = 1-.Machine$double.eps;vec}' )(vec) res['Matrix'] = np.array(list(r.qnorm(vec))) res['Matrix'].shape = origShape return {'Result': res}
def _compute(self): numPoints = self._numPointsPerSeg.getResult() #marks = self._markedSegs.getResult().valsAsNumpyArray() marks = self._markPerSeg.getResult() assert len(numPoints) == len(marks) if len(marks) == 0 or numPoints.sum() == 0: return None if len(marks) < self.MIN_NUM_SEGS_FOR_TEST: return None rVecCor = 'ourCor <- function(ourList1, ourList2) '+\ '{ vec1 <- unlist(ourList1); vec2 <- unlist(ourList2); '+\ 'res <- cor.test(vec1,vec2,method="kendall");'+\ "return(c(res[['p.value']], res[['estimate']][['tau']]))}" # wilcox.test... (5 paa hver) #cor.test: 10 tilsammen.. #return r(rVecCor)(list(numPoints), list(marks.astype('i'))) if self._markType == 'number': compatibleMarks = [float(x) for x in marks] elif self._markType == 'tc': compatibleMarks = [int(x) for x in marks] compatibleNumPoints = [int(x) for x in numPoints] #from proto.RSetup import rpy1 #res = rpy1(rVecCor)(compatibleNumPoints, compatibleMarks ) from proto.RSetup import r pval, tau = r(rVecCor)(compatibleNumPoints, compatibleMarks) #print float(res['p.value']) #print type(res) #print 'RES: ', dict(res) #print 'RES: ', res.keys() #print repr(res['estimate']) #tau = res['estimate']['tau'] #pval = float(res['p.value']) return OrderedDict([ ('P-value', pval), ('Test statistic: ObservedTau', tau), \ ('NumberOfSegments', len(marks)), ('AverageNumberOfPointsInSegments', 1.0*numPoints.sum()/len(marks)) ])
def _compute(self): x = self._children[0].getResult() y = self._children[1].getResult() if len(x)<2 or len(y)<2: pval = None testStat = None else: from proto.RSetup import r xAsR = r.unlist([float(num) for num in x]) yAsR = r.unlist([float(num) for num in y]) #corTestRes = r('cor.test')(xAsR, yAsR, alternative=self._rTail, method=self._method) #pval = corTestRes['p.value'] #testStat = corTestRes['statistic'].values()[0] pval, testStat, correlation = r('function(xAsR, yAsR, alternative,method){res = cor.test(xAsR, yAsR, alternative=alternative,method=method); return(list(res$p.value,res$statistic,res$estimate))}')(xAsR, yAsR, alternative=self._rTail, method=self._method) #corTestRes = r('cor.test')(xAsR, yAsR, alternative=self._rTail, method=self._method) #pval = corTestRes.rx('p.value') #testStat = corTestRes.rx('statistic').rx('t') return OrderedDict([ ('P-value', pval), ('Test statistic: ' + self._method, testStat), ('Correlation, '+self._method,correlation)])
def plotRHist(self, vals, breaks, main, saveRawData=True, alsoOpenAndClose=True, **kwArgs): from proto.RSetup import r, robjects rvals = robjects.FloatVector(vals) if type(breaks) in [list, tuple]: rbreaks = robjects.FloatVector(breaks) else: rbreaks = breaks if not 'xlab' in kwArgs: kwArgs['xlab'] = 'Values' if alsoOpenAndClose: self.openRFigure() histRes = r.hist(rvals, breaks=rbreaks, main=main, **kwArgs) if saveRawData: rawFn = self.getDiskPath() + '.raw.txt' f = open(rawFn, 'w') f.write('vals <- c(%s)' % ','.join(str(val) for val in vals) + '\n') if type(breaks) in [list, tuple]: f.write('breaks <- c(%s)' % ','.join(str(b) for b in breaks) + '\n') else: f.write('breaks <- %s' % breaks) f.write('hist(vals, breaks=breaks) \n') #r('prn=print') intensities = r('function(r){r$intensities}')(histRes) f.write('intensities = c(%s)' % ','.join([str(x) for x in intensities]) + '\n') f.close() if alsoOpenAndClose: self.closeRFigure()