Example #1
0
    def _compute(self):
        if self._region.strand == False and self._distDirection != 'both':
            raise NotSupportedError(
            )  #support by switching between left/right-computation..

        tv1 = self._children[0].getResult()
        tv2 = self._children[1].getResult()
        dists = []
        tv2Iter = tv2.__iter__()

        try:
            p2 = tv2Iter.next()
            p2Pos = p2.start()
            prevP2 = None

        except:
            #no points in track2
            return [None for el in tv1]

        emptyP2 = False

        for p1 in tv1:
            #p1 = el1
            try:
                while p2Pos < p1.start():
                    if not emptyP2:
                        prevP2 = AutonomousTrackElement(trackEl=p2)
                    p2 = tv2Iter.next()
                    p2Pos = p2.start()
                self._appendToDists(dists, p1, p2, prevP2)
            except StopIteration:
                emptyP2 = True
                self._appendToDists(dists, p1, None, prevP2)
        return dists
Example #2
0
    def __init__(self, internalFn):
        head, tail = os.path.split(internalFn)
        dirs = head.split(os.path.sep)

        self.path = internalFn

        self.directories = dirs if dirs != [''] else []
        if '' in self.directories:
            raise NotSupportedError('Absolute paths (starting with '/') or empty directory ' \
                                    'names not supported for internal file names in archive.')

        self.baseFileName = tail
Example #3
0
 def _init(self,
           method='sum_of_sum',
           kernelType=None,
           spreadParam=None,
           **kwArgs):
     self._method = method
     if method != 'sum_of_sum':
         errorMsg = 'KernelWeightedSumInsideStat only supports "sum_of_sum".'
         logLackOfSupport(errorMsg)
         raise NotSupportedError(errorMsg)
     self._kernelType = kernelType
     self._spreadParam = int(spreadParam)
Example #4
0
    def getTrackView(self, region):
        # To make sure that the origTrack is only read once across randomizations
        rawData = RawDataStat(region, self._origTrack, NeutralTrackFormatReq())
        origTv = rawData.getResult()

        if not self._trackRandomizer.supportsTrackFormat(origTv.trackFormat):
            raise NotSupportedError(
                'The original track "{}" has format "{}", '
                'which is not supported by "{}".'.format(
                    prettyPrintTrackName(self.trackName),
                    str(origTv.trackFormat),
                    self._trackRandomizer.getDescription()))

        if not self._trackRandomizer.supportsOverlapMode(origTv.allowOverlaps):
            raise NotSupportedError(
                'The original track "{}" has "allowOverlaps={}", '
                'which is not supported by "{}".'.format(
                    prettyPrintTrackName(self.trackName), origTv.allowOverlaps,
                    self._trackRandomizer.getDescription()))

        assert origTv.borderHandling == 'crop'

        return origTv
    def __init__(self, dense=None, val=None, interval=None, linked=None, strand=None, id=None, weights=None, extra=None, \
                 allowOverlaps=False, borderHandling='crop', name=None):

        assert all(x in [None, False, True]
                   for x in [dense, interval, linked, strand, id])
        assert all(x in [None, False] + TrackFormat.VAL_TYPE_NAME_DICT.keys()
                   for x in [val, weights])
        assert extra in [None, False
                         ] or (type(extra) in [list, tuple] and len(extra) > 0
                               and isinstance(extra[0], basestring))
        assert allowOverlaps in [None, False, True]
        assert borderHandling in [
            None, 'crop', 'discard', 'include', 'duplicate'
        ]

        assert (name is None) or (dense is None and interval is None
                                  and linked is None)

        if name is not None:
            name = name.lower()
            if TrackFormatReq.FORMAT_DICT.has_key(name):
                self._dense, self._val, self._interval, self._linked = TrackFormatReq.FORMAT_DICT[
                    name]

                if self._val:
                    assert val != False
                    self._val = val
                else:
                    assert val is None

                if not self._linked:
                    assert weights is None
                self._weights = weights
            else:
                raise NotSupportedError('Format name is not recognized: ' +
                                        name)
        else:
            self._dense = dense
            self._val = val
            self._interval = interval
            self._linked = linked
            self._weights = weights

        self._reprDense = None
        self._hasStrand = strand
        self._hasId = id
        self._extra = extra
        self._allowOverlaps = allowOverlaps
        self._borderHandling = borderHandling
Example #6
0
    def _compute(self):
        if self._region.strand == False and self._distDirection != 'both':
            raise NotSupportedError() #support by switching between left/right-computation..

        tv1 = self._children[0].getResult()
        tv2 = self._children[1].getResult()
        tv1HasName = tv1.hasExtra('name')
        tv2HasName = tv2.hasExtra('name')
        tv1HasId = tv1.idsAsNumpyArray() is not None
        tv2HasId = tv2.idsAsNumpyArray() is not None
        
        dists = []
        segLengths = []
        fromNames = []
        toNames = []
        
        for p,ldist,rdist,lname,rname in self._yieldLeftAndRightDists(tv1, tv2, tv1HasName, tv2HasName, tv1HasId, tv2HasId): #p is here the track1-point that we have a left/right-distance for
            if tv1HasName:
                fromNames.append(p.name())
            elif tv1HasId:
                fromNames.append(p.id())
            
            segLengths.append(len(p))
            #p.strand
            #self._distDirection
            
            if rdist == 0:
                self._append(dists, 0, toNames, rname)
            elif ldist == 0:
                self._append(dists, 0, toNames, lname)
            elif self._distDirection == 'both':
                minDist, minName = min((x,name) for x,name in [[ldist,lname],[rdist,rname]] if x is not None)
                self._append(dists, minDist, toNames, minName)
            elif p.strand() in [True,None,BINARY_MISSING_VAL]:
                if self._distDirection in ['downstream']:
                    self._append(dists, rdist, toNames, rname)
                else:
                    self._append(dists, ldist, toNames, lname)
            else:
                if self._distDirection in ['downstream']:
                    self._append(dists, ldist, toNames, lname)
                else:
                    self._append(dists, rdist, toNames, rname)
                    
        return OrderedDict([('Result', dists)] + \
                           ([('SegLengths', segLengths)] if self._addSegmentLengths else []) + \
                           ([('FromNames', fromNames)] if tv1HasName else ([('FromIds', fromNames)] if tv1HasId else [])) + \
                           ([('ToNames', toNames)] if tv2HasName else ([('ToIds', fromNames)] if tv2HasId else [])))
Example #7
0
    def __iter__(self):
        self = copy(self)

        #does not support function, partitions and points:
        if (False in [
                attrs in self._geSource.getPrefixList()
                for attrs in ['start', 'end']
        ]):
            raise NotSupportedError('Binning file must be segments. Current file format: ' + \
                                    TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \
                                                                             self._geSource.getValDataType(), \
                                                                             self._geSource.getValDim(), \
                                                                             self._geSource.getEdgeWeightDataType(), \
                                                                             self._geSource.getEdgeWeightDim()).getFormatName() )

        self._geIter = self._geSource.__iter__()
        return self
def getGenomeElementSourceClass(fn, suffix=None, forPreProcessor=False):
    from gold.application.DataTypes import getUnsupportedFileSuffixes
    if not suffix:
        from gold.util.CommonFunctions import getFileSuffix
        suffix = getFileSuffix(fn)

    if not suffix.lower() in getUnsupportedFileSuffixes():
        for geSourceCls in getAllGenomeElementSourceClasses(forPreProcessor):
            for clsSuffix in geSourceCls.FILE_SUFFIXES:
                if (fn.endswith('.' + clsSuffix) if suffix is None else
                        clsSuffix.lower() == suffix.lower()):
                    return geSourceCls
        else:
            if forPreProcessor:
                from gold.origdata.GtrackGenomeElementSource import HbGtrackGenomeElementSource
                GtrackGESourceCls = HbGtrackGenomeElementSource
            else:
                from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
                GtrackGESourceCls = GtrackGenomeElementSource

            if GtrackGESourceCls.suffixBasedGTrackSubtypeExists(suffix):
                return GtrackGESourceCls

    raise NotSupportedError('File type "%s" not supported.' % suffix)
 def reprIsDense(self):
     raise NotSupportedError()
Example #10
0
 def _init(self, method='sum_of_sum', **kwArgs):
     self._method = method
     if method == 'mean_of_mean':
         errorMsg = 'AggregateOfCoveredBpsInSegmentsStat does not support "mean_of_mean".'
         logLackOfSupport(errorMsg)
         raise NotSupportedError(errorMsg)
 def __copy__(self):
     raise NotSupportedError(
         'Shallow copy.copy() of GenomeElement objects is not supported, '
         'as this produces unwanted effects. Please use instance method '
         'getCopy() or copy.deepcopy() instead. getCopy() is by far the '
         'most efficient of the two.')
Example #12
0
 def __copy__(self):
     raise NotSupportedError(
         'Shallow copy.copy() of GenomeRegion objects is not supported '
         'in order to allow polymorphism with the subclass GenomeElement.'
         'Please use instance method getCopy() or copy.deepcopy() instead. '
         'getCopy() is by far the most efficient of the two.')