def inferValType(valList, shapeOffset=0):
    if valList is None:
        return False
    elif type(valList) in [list, tuple]:
        return 'number'
    elif isinstance(valList, numpy.ndarray) or isinstance(
            valList, SmartMemmap):
        if len(valList.shape) == 2 + shapeOffset and valList.shape[
                1 +
                shapeOffset] == 2 and valList.dtype == numpy.dtype('float128'):
            return 'mean_sd'
        elif any(valList.dtype == numpy.dtype(x)
                 for x in ['float32', 'float64', 'float128']):
            if len(valList.shape) == 1 + shapeOffset:
                return 'number'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'population'
        if any(valList.dtype == numpy.dtype(x) for x in ['int32', 'int64']):
            if len(valList.shape) == 1 + shapeOffset:
                return 'number (integer)'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'population'
        elif any(valList.dtype == numpy.dtype(x) for x in ['int8', 'bool8']):
            if len(valList.shape) == 1 + shapeOffset:
                return 'tc'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'tc_vector'
        elif valList.dtype == numpy.dtype('S1'):
            if len(valList.shape) == 1 + shapeOffset:
                return 'char'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'char_vector'
        elif _dtypeIsStringLongerThanOne(valList.dtype):
            if len(valList.shape) == 1 + shapeOffset:
                return 'category'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'category_vector'

        if valList.shape[1 + shapeOffset] == 0:
            return 'unsupported list'

        logMessage('Shape or dtype not recognized: ' + str(valList.shape) +
                   ' and ' + str(valList.dtype))
        raise ShouldNotOccurError()

    else:
        logMessage('Type of valList not recognized: ' + str(type(valList)))
        raise ShouldNotOccurError()
Ejemplo n.º 2
0
    def __init__(self, region, trackStructure, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION  # @UnresolvedImport
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if x not in ['false', 'true']:
                logMessage('isExperimental has value other than false/true',
                           level=logging.WARN)
                raise ShouldNotOccurError(
                    'isExperimental has value other than false/true.')
            if x == 'true':
                assert IS_EXPERIMENTAL_INSTALLATION, IS_EXPERIMENTAL_INSTALLATION

        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])

        self._region = region
        self._trackStructure = trackStructure

        #TODO:boris 20150924, Code for checking if query and reference (track and track2) are the same track.
        #We should decide if we will allow this in the future.

        #TODO: This should probably instead happen in the default _init method, so that when this is
        # overridden, one needs to explicitly store kwArgs if desired.
        #As it is now, parameters will be handled explicitly in _init while still becoming part of self_kwArgs
        self._kwArgs = kwArgs

        self._init(**kwArgs)

        self._trace('__init__')
    def __init__(self, region, trackStructure, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION  # @UnresolvedImport
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if x not in ['false', 'true']:
                logMessage('isExperimental has value other than false/true',
                           level=logging.WARN)
                raise ShouldNotOccurError(
                    'isExperimental has value other than false/true.')
            if x == 'true':
                assert IS_EXPERIMENTAL_INSTALLATION, IS_EXPERIMENTAL_INSTALLATION

        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])

        self._region = region
        self._trackStructure = trackStructure

        #TODO:boris 20150924, Code for checking if query and reference (track and track2) are the same track.
        #We should decide if we will allow this in the future.

        self._kwArgs = kwArgs
        self._init(**kwArgs)

        self._trace('__init__')
Ejemplo n.º 4
0
 def _track(self):
     if TrackStructure.QUERY_KEY not in self._trackStructure\
     or not self._trackStructure.getQueryTrackList():
         raise ShouldNotOccurError(
             'Track structure must contain a query list of at least one track'
         )
     return self._trackStructure.getQueryTrackList()[0]
Ejemplo n.º 5
0
 def _adjustComplementaryEdgeWeightDict(complementEdgeWeightDict, id, edges, weights):
     for index, edgeId in enumerate(edges):
         weight = weights[index] if weights is not None else ''
             
         if id in complementEdgeWeightDict and edgeId in complementEdgeWeightDict[id]:
             complWeight = complementEdgeWeightDict[id][edgeId]
             try:
                 equal = numpy.all(complWeight == weight | numpy.isnan(complWeight) & numpy.isnan(weight))
             except TypeError:
                 try:
                     equal = (complWeight == weight) or (numpy.isnan(complWeight) and numpy.isnan(weight))
                 except (TypeError, ValueError):
                     equal = numpy.all(complWeight == weight)
             if not equal:
                 raise InvalidFormatError("Error: edge ('%s' <-> '%s') is not undirected. The weight must be equal in both directions (%s != %s)" % (edgeId, id, complementEdgeWeightDict[id][edgeId], weights[index]))
             del complementEdgeWeightDict[id][edgeId]
             if len(complementEdgeWeightDict[id]) == 0:
                 del complementEdgeWeightDict[id]
                     
         elif id == edgeId:
             continue
             
         elif edgeId in complementEdgeWeightDict:
             if id in complementEdgeWeightDict[edgeId]:
                 raise ShouldNotOccurError('Error: the complementary edge(%s) has already been added to complementEdgeWeightDict["%s"] ... ' % (id, edgeId))
             complementEdgeWeightDict[edgeId][id] = weight
         else:
             complementEdgeWeightDict[edgeId] = {id: weight}
Ejemplo n.º 6
0
    def _determineStatClass(self, flushMemoized=True):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList) == 0:
            # if self._reversed:
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: ' + self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage('Checking validity of stat class "{}" for analysisDef "{}".'.format(statClass.__name__, self.getDefAfterChoices()))

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getAllChoices(filterByActivation=True)).run(False, flushMemoized=flushMemoized)

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
 def _resolveFunction(self, summaryFunc):
     if summaryFunc not in self.functionDict:
         raise ShouldNotOccurError(
             str(summaryFunc) + ' not in list, must be one of ' +
             str(sorted(self.functionDict.keys())))
     else:
         return self.functionDict[summaryFunc]
Ejemplo n.º 8
0
    def __init__(self, region, track, track2=None, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if not x in ['false', 'true']:
                logMessage('isExperimental has value other than false/true',
                           level=logging.WARN)
                raise ShouldNotOccurError(
                    'isExperimental has value other than false/true.')
            if x == 'true':
                assert IS_EXPERIMENTAL_INSTALLATION, IS_EXPERIMENTAL_INSTALLATION
        #else:
        #    assert IS_EXPERIMENTAL_INSTALLATION

        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])

        self._region = region
        self._track = track
        if track2 not in [None, []]:
            self._track2 = track2
        self._kwArgs = kwArgs
        self._init(**kwArgs)

        self._trace('__init__')
Ejemplo n.º 9
0
    def _getValInCorrectType(self,
                             val,
                             valueOrEdgeWeight='value',
                             isEmptyElement=False):
        headerDictInFile = self.getHeaderDictInFile()

        valTypeList = ['binary', 'number', 'category', 'character']
        for i, valueType in enumerate(valTypeList):
            if valueOrEdgeWeight in self._valTypeIndexDict and self._valTypeIndexDict[
                    valueOrEdgeWeight] > i:
                continue

            valTypeInfo = GtrackGenomeElementSource.VAL_TYPE_DICT[valueType]

            if self._isValOfParticularType(val, valTypeInfo):
                self._noteIfAllValuesAreMissing(valueOrEdgeWeight, val,
                                                valTypeInfo)
                self._valTypeIndexDict[valueOrEdgeWeight] = i

                valueDim = self._getGtrackValueDim(val, valTypeInfo,
                                                   valueOrEdgeWeight)

                if not '%s type' % valueOrEdgeWeight in headerDictInFile:
                    self._headerDict['%s type' %
                                     valueOrEdgeWeight] = valTypeList[i]
                if not '%s dimension' % valueOrEdgeWeight in headerDictInFile:
                    self._headerDict['%s dimension' %
                                     valueOrEdgeWeight] = valueDim

                return GtrackGenomeElementSource._getValInCorrectType(
                    self, val, valueOrEdgeWeight, isEmptyElement)
        raise ShouldNotOccurError()
 def _compute(self):
     if self._summaryFunction:
         results = []
         for i, child in enumerate(self._children):
             results.append(child.getResult())
         return self._summaryFunction(results)
     else:
         raise ShouldNotOccurError('The summary function is not defined')
Ejemplo n.º 11
0
 def _compute(self):
     if self._multitrackSummaryFunc:
         res = [child.getResult() for child in self._children]
         if self._multitrackSummaryFunc == 'RawResults':
             return res
         else:
             return self._multitrackSummaryFunc(res)
     else:
         raise ShouldNotOccurError('The summary function is not defined')
Ejemplo n.º 12
0
def returnToStoredState():
    if random._storedStates is None:
        return ShouldNotOccurError(
            'Tried to return to previous random state without a stored state.')

    random.setstate(random._storedStates[0])
    numpy.random.set_state(random._storedStates[1])
    from proto.RSetup import r
    r('function(state) {.Random.seed <- state}')(random._storedStates[2])
Ejemplo n.º 13
0
 def createChildren(self):
     if self.hasResult() or self._curChild is not None:
         return
     self._trace('_createChildren')
     #logMessage(str(self._bins))
     try:
         self._curChild = self._getChildObject(self._bins.next())
     except StopIteration,e:
         logException(e)
         raise ShouldNotOccurError('Splittable statistic should not have zero bins!')
Ejemplo n.º 14
0
 def _compute(self):
     if self._summaryFunction:
         if self._summaryFunction == 'RawResults':
             resultList = [child.getResult() for child in self._children]
             return resultList
         else:
             childrenResList = [child.getResult() for child in self._children]
             return self._summaryFunction(childrenResList)
     else:
         raise ShouldNotOccurError('The summary function is not defined. Must be one of %' % str(sorted(self.functionDict.keys())))
Ejemplo n.º 15
0
    def returnToStoredFullState(self):
        if self._storedFullState is None:
            return ShouldNotOccurError(
                'Tried to return to previous random state without a stored state.'
            )

        self.setstate(self._storedFullState[0])
        numpy.random.set_state(self._storedFullState[1])
        from proto.RSetup import r
        r('function(state) {.Random.seed <- state}')(self._storedFullState[2])
        self._storedFullState = None
Ejemplo n.º 16
0
 def _commonGetBpLevelArray(self, vals):
     if self.trackFormat.reprIsDense():
         if self.allowOverlaps:
             raise ShouldNotOccurError()
         return vals
     else:
         bpLevelArray = numpy.zeros(self._bpSize()+1)
         numElements = self.getNumElements()
         if numElements > 0:
             bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals)
             bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals)
             bpLevelArray = bpLevelArray.cumsum(dtype='float64')
         return bpLevelArray[:-1]
Ejemplo n.º 17
0
 def __new__(cls, sortedGeSource):
     hasStart, hasEnd = [attrs in sortedGeSource.getPrefixList() for attrs in ['start', 'end']]
     
     if not hasStart and not hasEnd:
         return GEOverlapClusterer_Function(sortedGeSource)
     elif hasStart and hasEnd:
         return GEOverlapClusterer_Segment(sortedGeSource)
     elif hasStart and not hasEnd:
         return GEOverlapClusterer_Point(sortedGeSource)
     elif not hasStart and hasEnd:
         return GEOverlapClusterer_Partition(sortedGeSource)
     else:
         raise ShouldNotOccurError()
Ejemplo n.º 18
0
 def createBoundingRegionShelve(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     boundingRegionTuples = collector.getBoundingRegionTuples(allowOverlaps)
     if not collector.getTrackFormat().reprIsDense():
         boundingRegionTuples = sorted(boundingRegionTuples)
     
     geChrList = collector.getPreProcessedChrs(allowOverlaps)
     brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)
     brShelve.storeBoundingRegions(boundingRegionTuples, geChrList, not collector.getTrackFormat().reprIsDense())
     
     #Sanity check
     if brShelve.getTotalElementCount() != collector.getNumElements(allowOverlaps):
         raise ShouldNotOccurError("Error: The total element count for all bounding regions is not equal to the total number of genome elements. %s != %s" % \
                                   (brShelve.getTotalElementCount(), collector.getNumElements(allowOverlaps)) )
    def _getArchiveReader(cls, choices):
        from gold.gsuite.GSuiteArchiver import TarArchiveReader, ZipArchiveReader
        from quick.application.ExternalTrackManager import ExternalTrackManager

        suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN\
            (choices.archive, allowUnsupportedSuffixes=True)
        fn = ExternalTrackManager.extractFnFromGalaxyTN(choices.archive)

        if suffix == 'gsuite.tar':
            return TarArchiveReader(fn)
        elif suffix == 'gsuite.zip':
            return ZipArchiveReader(fn)
        else:
            raise ShouldNotOccurError()
def findEmptyVal(valDataType):
    if any(x in valDataType for x in ['str', 'S']):
        baseVal = ''
    elif 'int' in valDataType:
        from gold.util.CommonConstants import BINARY_MISSING_VAL
        baseVal = BINARY_MISSING_VAL
    elif 'float' in valDataType:
        baseVal = numpy.nan
    elif 'bool' in valDataType:
        baseVal = False
    else:
        from gold.util.CustomExceptions import ShouldNotOccurError
        raise ShouldNotOccurError('Error: valDataType (%s) not supported.' % valDataType)
    return baseVal
Ejemplo n.º 21
0
 def _compute(self):
     array = self._children[0].getResult().valsAsNumpyArray()
     if len(array)==0:
         return numpy.nan
     assert array.dtype == "float32" or array.dtype == "float64"
     if self._aggregateOperation == 'sum':
         return float(array.sum(dtype="float64")) #accumulator must be 64-bit or rounding errors occur
     elif self._aggregateOperation == 'min':
         return float(array.min())
     elif self._aggregateOperation == 'max':
         res = float(array.max())
         #assert not any([v.isnan() for v in ])
         return res
     else:
         raise ShouldNotOccurError()
Ejemplo n.º 22
0
def returns(sometype):
    "Return type checking decorator"

    # convert decorator argument into a checker

    checker = Checker.create(sometype)
    if checker is None:
        if RAISE_DEVIANCES:
            raise ShouldNotOccurError(
                "@returns decorator got parameter of unsupported "
                "type %s" % type_name(sometype))
        else:
            logMessageOnce("@returns decorator got parameter of unsupported "
                           "type %s" % type_name(sometype),
                           level=5,
                           logger=SIGNATURE_DEVIANCE_LOGGER)

    if NO_CHECK:  # no type checking is performed, return decorated method itself

        def returns_proxy(method):
            return method

    else:

        def returns_proxy(method):
            def returns_invocation_proxy(*args, **kwargs):

                result = method(*args, **kwargs)

                if not checker.check(result):
                    if RAISE_DEVIANCES:
                        raise ReturnValueError(
                            "%s() has returned an invalid "
                            "value of type %s" %
                            (method.__name__, type_name(result)))
                    else:
                        logMessageOnce("%s() has returned an invalid "
                                       "value of type %s" %
                                       (method.__name__, type_name(result)),
                                       level=5,
                                       logger=SIGNATURE_DEVIANCE_LOGGER)

                return result

            returns_invocation_proxy.__name__ = method.__name__
            return returns_invocation_proxy

    return returns_proxy
    def _getGenome(cls, choices):
        if hasattr(choices, 'genome'):
            return choices.genome
        else:
            gsuites = cls._getAllSelectedGsuites(choices)

            if len(gsuites) > 0:
                genomes = set(gsuite.genome for gsuite in gsuites)

                if len(genomes) == 1:
                    genome = genomes.pop()
                    if genome:
                        return genome

                raise ShouldNotOccurError(
                    'Genome information is not provided in the selected genomes. '
                    'Subclass of UserBinMixin should add a genome choice box using GenomeMixin, '
                    'or override the cls._getGenome method')
Ejemplo n.º 24
0
    def _compute(self):

        tsResult = TSResult(self._computeTrackStructure)
        rawResults = []
        for key, child in self._childrenDict.iteritems():
            childRes = child.getResult()
            tsResult[key] = childRes
            rawResults.append(childRes.getResult())

        if self._multitrackSummaryFunc:
            if self._multitrackSummaryFunc == 'RawResults':
                tsResult.setResult(rawResults)
            else:
                tsResult.setResult(self._multitrackSummaryFunc(rawResults))
        else:
            raise ShouldNotOccurError('The summary function is not defined')

        return tsResult
 def getGlobalSource(globalSourceStr, genome, minimal):
     if minimal == True:
         return MinimalBinSource(genome)
     elif globalSourceStr == 'test':
         return UserBinSource('TestGenome:chr21:10000000-15000000',
                              '1000000')
     elif globalSourceStr == 'chrs':
         return GenomeInfo.getChrRegs(genome)
     elif globalSourceStr == 'chrarms':
         return GenomeInfo.getChrArmRegs(genome)
     elif globalSourceStr == 'ensembl':
         return GenomeInfo.getStdGeneRegs(genome)
     elif globalSourceStr == 'userbins':
         from gold.application.StatRunner import StatJob
         assert StatJob.USER_BIN_SOURCE is not None
         return StatJob.USER_BIN_SOURCE
         #return kwArgs['userBins']
     else:
         raise ShouldNotOccurError('globalSource not recognized')
Ejemplo n.º 26
0
 def _storeMinimalMemoResult(cls, stat, minimalMemoResult):
     minimalMemoDict = cls.memoDataCollection[cls.MINIMAL_MEMO_PATH]
     minimalMemoKey = cls._createMinimalMemoKey(stat)
     if minimalMemoKey in minimalMemoDict:
         raise ShouldNotOccurError(
             'Trying to store minimal memo result for "{}"'.format(
                 minimalMemoKey) +
             'when already present in minimal memoized result dict. The existing result '
             'should have been loaded and used, and no result should subsequently be stored.'
         )
     else:
         if minimalMemoResult.error:
             if DebugConfig.VERBOSE:
                 logMessage('Storing exception "{}" for "{}"'.format(
                     minimalMemoResult.error.exc_value, minimalMemoKey))
         else:
             if DebugConfig.VERBOSE:
                 logMessage('Storing result "{}" for "{}"'.format(
                     minimalMemoResult.result, minimalMemoKey))
         minimalMemoDict[minimalMemoKey] = minimalMemoResult
Ejemplo n.º 27
0
    def parseAndStoreProfile(stdout, testName, revision, diskMemo):
        offset = 0
        if diskMemo:
            offset = 2

        splittedStdout = stdout.split(Profiler.PROFILE_HEADER + os.linesep)

        totStats = re.findall('([0-9\.]+)',
                              splittedStdout[offset + 1].splitlines()[0])
        funcCalls = totStats[0]

        if len(totStats) == 2:
            primCalls = funcCalls
            cpuTime = totStats[1]
        elif len(totStats) == 3:
            primCalls = totStats[1]
            cpuTime = totStats[2]
        else:
            raise ShouldNotOccurError()

        cumProfile = splittedStdout[offset + 1].split(
            Profiler.PROFILE_FOOTER)[0]
        intProfile = splittedStdout[offset + 2].split(
            Profiler.PROFILE_FOOTER)[0]

        storage = ProfilingStorage._getStorage('c')

        if not storage.has_key(testName):
            storage[testName] = {}

        if not storage[testName].has_key(str(revision)):
            storage[testName][str(revision)] = {}

        storage[testName][str(revision)][str(diskMemo)] = {'funcCalls': funcCalls, \
                                                           'primCalls': primCalls, \
                                                           'cpuTime': cpuTime, \
                                                           'cumProfile': cumProfile, \
                                                           'intProfile': intProfile}

        storage.close()
    def __iter__(self):
        try:
            i = 0

            while not self._finished:
                if len(self._countList) > self._curCountListIdx and \
                    i == self._countList[self._curCountListIdx]:
                    self._curCountListIdx += 1
                    if len(self._countList) == self._curCountListIdx:
                        self._finished = True
                    break

                yield self._geIter.next()
                i += 1

        except StopIteration:
            if self._finished:
                raise
            else:
                raise ShouldNotOccurError(
                    'Premature stop. GESource was shorter than sum of countList.'
                )
Ejemplo n.º 29
0
    def _validateAllTracksRead(self):
        if not self.hasResult():
            raise ShouldNotOccurError("At this stage, statistic should either have result, "
                                      "or exception should have been raised")

        tracks = self.getAllTracks()
        trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self.getGenome()) for tr in tracks]

        for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(trackUniqueKeys))):
            track = tracks[trackIndex]
            if track is not None and not track.hasBeenFlaggedAsRead():
                uniqueKeyForRestTracks = \
                    set(trackUniqueKeys[i] for i in restTrackIndexes)

                # If several tracks are the same, memory memoization will only result
                # in one RawDataStat being created, for one Track object. This is a
                # wanted optimization. In other cases, something is probably wrong if
                # a track has not been touched. However, this rule may be revisited
                # when track structure functionality is implemented.
                if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                    raise IncompatibleTracksError(
                        'Track ' + prettyPrintTrackName(track.trackName) +
                        ' was created, but not touched by statistic')
Ejemplo n.º 30
0
 def getZeroBinsValidationMessage(self, regSpec, binSpec):
     raise ShouldNotOccurError('The region specification "%s" does not ' %
                               regSpec + ' describe any real regions')