Beispiel #1
0
 def getTrackView(self, region):
     #print 'get tv for reg: ',region
     #print str(type(self._origRegion)) + " and " + str(type(region))
     if Config.USE_SLOW_DEFENSIVE_ASSERTS:
         assert (not isIter(self._origRegion) and self._origRegion  == region) or \
                 (isIter(self._origRegion) and region in self._origRegion) 
     
     #if self._cachedTV is None:
     self._origTrack.addFormatReq(self._trackFormatReq)
     origTV = self._origTrack.getTrackView(region)     
     self._checkTrackFormat(origTV)
     assert(not origTV.allowOverlaps)
     assert(origTV.borderHandling == 'crop')
     assert region == origTV.genomeAnchor
     starts, ends, vals, strands, ids, edges, weights, extras = \
         self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                           origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                           origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                           origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                           origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region)
     
     from gtrackcore.util.CommonFunctions import getClassName
     self._cachedTV = TrackView(origTV.genomeAnchor, \
                                (starts + origTV.genomeAnchor.start if starts is not None else None), \
                                (ends + origTV.genomeAnchor.start if ends is not None else None), \
                                vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)
     assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
            + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
     return self._cachedTV
    def _getNewContentsOfPrev(self, x, y, prefix):
        if x is not None and y is not None:
            if isinstance(x, numpy.ndarray):
                x = x.tolist()
            if isinstance(y, numpy.ndarray):
                y = y.tolist()

            if isIter(x):
                if x != y:
                    if len(x) != len(y):
                        return []
                    else:
                        return [
                            self._getMissingValue(prefix)
                            for i, item in enumerate(x)
                        ]
            else:
                valDataType = self._geSource.getValDataType()
                if prefix == 'val' and valDataType[
                        0] == 'S' and valDataType != 'S1':
                    return self._getConcatContent(x, y, prefix, sorted=True)
                elif prefix in ['id'] + self._prevEl.orderedExtraKeys:
                    return self._getConcatContent(x, y, prefix, sorted=False)
                else:
                    if x != y:
                        return self._getMissingValue(prefix)
Beispiel #3
0
 def isCompBin(region):
     if isIter(region):
         return False
     
     offsetOK = (CompBinManager.getOffset( region.start, CompBinManager.getBinNumber(region.start) ) == 0)
     lengthOK = (len(region) == min(CompBinManager.getCompBinSize(), GenomeInfo.getChrLen(region.genome, region.chr) - region.start))
     return offsetOK and lengthOK
Beispiel #4
0
    def isCompBin(region):
        if isIter(region):
            return False

        offsetOK = (CompBinManager.getOffset(
            region.start, CompBinManager.getBinNumber(region.start)) == 0)
        lengthOK = (len(region) == min(
            CompBinManager.getCompBinSize(),
            GenomeInfo.getChrLen(region.genome, region.chr) - region.start))
        return offsetOK and lengthOK
Beispiel #5
0
 def _createExtraLists(target, dtype, size):
    if type(target) == bool:
       if target == True:
          return OrderedDict([('extra1', getRandExtraList(size)), ('extra2', getRandExtraList(size))])
       else:
          return OrderedDict()
    elif isinstance(target, dict):
       return OrderedDict([(name, array(content, dtype=dtype)) for name, content in target.items()])
    elif isIter(target):
       return OrderedDict([(name, getRandExtraList(size)) for name in target])
    else:
       raise ShouldNotOccurError
Beispiel #6
0
 def _createExtraLists(target, dtype, size):
     if type(target) == bool:
         if target == True:
             return OrderedDict([('extra1', getRandExtraList(size)),
                                 ('extra2', getRandExtraList(size))])
         else:
             return OrderedDict()
     elif isinstance(target, dict):
         return OrderedDict([(name, array(content, dtype=dtype))
                             for name, content in target.items()])
     elif isIter(target):
         return OrderedDict([(name, getRandExtraList(size))
                             for name in target])
     else:
         raise ShouldNotOccurError
 def _getNewContentsOfPrev(self, x, y, prefix):
     if x is not None and y is not None:
         if isinstance(x, numpy.ndarray):
             x = x.tolist()
         if isinstance(y, numpy.ndarray):
             y = y.tolist()
         
         if isIter(x):
             if x != y:
                 if len(x) != len(y):
                     return []
                 else:
                     return [self._getMissingValue(prefix) for i,item in enumerate(x)]
         else:
             valDataType = self._geSource.getValDataType()
             if prefix == 'val' and valDataType[0] == 'S' and valDataType != 'S1':
                 return self._getConcatContent(x, y, prefix, sorted=True)
             elif prefix in ['id'] + self._prevEl.orderedExtraKeys:
                 return self._getConcatContent(x, y, prefix, sorted=False)
             else:
                 if x != y:
                     return self._getMissingValue(prefix)
Beispiel #8
0
   def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \
                extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False):
      if type(starts) != bool and ends == True:
        ends = False
      if type(ends) != bool and starts == True:
        starts = False
      
      assert not (starts==False and ends==False)
      assert segments!=False and segments!=True
      assert starts!=None and ends!=None and vals!=None and strands!=None
      assert segments==None or (starts==True and ends==True)
      assert not (isIter(weights) and not isIter(edges))
      
      assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \
             or numElements!=None
      #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \
      #       type(vals)!=bool or type(strands)!=bool) and numElements==None )\
      #       or numElements!=None)
      #
      if anchor==None:
          anchor = [10,1000]
      
      if segments != None:
          starts = []
          ends = []        
          for seg in segments:
              starts.append(seg[0])
              ends.append(seg[1])
      
      if isIter(edges):
         maxNumEdges = self._findMaxNumEls(edges)
         edges = self._appendEmptyToEnd(edges, '', maxNumEdges)
         if isIter(weights):
            weights = self._appendEmptyToEnd(weights, numpy.nan, maxNumEdges)
      
      [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else [])
      for list in [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else []):
          if type(list) != bool  and numElements == None:
              numElements = len(list)
          assert(type(list) == bool or len(list) == numElements)
      
      for coordList in [starts, ends]:
          if type(coordList) != bool:
              for j in range(len(coordList)):
                  coordList[j] += anchor[0]
      
      randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1])
      starts = self._createList(starts, randSegmentLists[0], 'int32')
      ends = self._createList(ends, randSegmentLists[1], 'int32')
      
      vals = self._createList(vals, getRandValList(numElements, valDType), valDType)
      strands = self._createList(strands, getRandStrandList(numElements), 'bool8')
      
      randIds, randEdges, randWeights = getRandGraphLists(numElements)
      ids = self._createList(ids, randIds, randIds.dtype)
      edges = self._createList(edges, randEdges, randEdges.dtype)
      weights = self._createList(weights, randWeights, 'float64')
      
      if weights is not None and len(weights.shape) == 1:
         weights = weights.reshape(weights.shape + (0,))
      
      extras = self._createExtraLists(extras, 'S', numElements)

      if starts == None:
          if ends[0] != 0:
             ends = numpy.append([anchor[0]], ends)
             if vals != None:
                vals = numpy.append([nan], vals)
             if strands != None:
                strands = numpy.append([True], strands)
          if ends[-1] != anchor[1]:
              ends[-1] = anchor[1]
      
#        print (starts, ends, vals, strands, anchor)
      TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \
                         strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)
Beispiel #9
0
    def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \
                 extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False):
        if type(starts) != bool and ends == True:
            ends = False
        if type(ends) != bool and starts == True:
            starts = False

        assert not (starts == False and ends == False)
        assert segments != False and segments != True
        assert starts != None and ends != None and vals != None and strands != None
        assert segments == None or (starts == True and ends == True)
        assert not (isIter(weights) and not isIter(edges))

        assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \
               or numElements!=None
        #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \
        #       type(vals)!=bool or type(strands)!=bool) and numElements==None )\
        #       or numElements!=None)
        #
        if anchor == None:
            anchor = [10, 1000]

        if segments != None:
            starts = []
            ends = []
            for seg in segments:
                starts.append(seg[0])
                ends.append(seg[1])

        if isIter(edges):
            maxNumEdges = self._findMaxNumEls(edges)
            edges = self._appendEmptyToEnd(edges, '', maxNumEdges)
            if isIter(weights):
                weights = self._appendEmptyToEnd(weights, numpy.nan,
                                                 maxNumEdges)

        [starts, ends, vals, strands, ids, edges, weights
         ] + ([x for x in extras.values()] if isinstance(extras, dict) else [])
        for list in [starts, ends, vals, strands, ids, edges, weights] + (
            [x for x in extras.values()] if isinstance(extras, dict) else []):
            if type(list) != bool and numElements == None:
                numElements = len(list)
            assert (type(list) == bool or len(list) == numElements)

        for coordList in [starts, ends]:
            if type(coordList) != bool:
                for j in range(len(coordList)):
                    coordList[j] += anchor[0]

        randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1])
        starts = self._createList(starts, randSegmentLists[0], 'int32')
        ends = self._createList(ends, randSegmentLists[1], 'int32')

        vals = self._createList(vals, getRandValList(numElements, valDType),
                                valDType)
        strands = self._createList(strands, getRandStrandList(numElements),
                                   'bool8')

        randIds, randEdges, randWeights = getRandGraphLists(numElements)
        ids = self._createList(ids, randIds, randIds.dtype)
        edges = self._createList(edges, randEdges, randEdges.dtype)
        weights = self._createList(weights, randWeights, 'float64')

        if weights is not None and len(weights.shape) == 1:
            weights = weights.reshape(weights.shape + (0, ))

        extras = self._createExtraLists(extras, 'S', numElements)

        if starts == None:
            if ends[0] != 0:
                ends = numpy.append([anchor[0]], ends)
                if vals != None:
                    vals = numpy.append([nan], vals)
                if strands != None:
                    strands = numpy.append([True], strands)
            if ends[-1] != anchor[1]:
                ends[-1] = anchor[1]


#        print (starts, ends, vals, strands, anchor)
        TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \
                           strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)