def __init__(self, genome, trackName, boundingRegions, globalCoords=True, allowOverlaps=False, printWarnings=True, *args, **kwArgs):
     assert len(boundingRegions) > 0
 
     GenomeElementSource.__init__(self, '', genome=genome, trackName=trackName, printWarnings=printWarnings, *args, **kwArgs)
     self._boundingRegions = boundingRegions
     self._isSorted = all([x == y for x,y in zip(boundingRegions, sorted(boundingRegions))])
     self._boundingRegionTuples = None
     self._allowOverlaps = allowOverlaps
     self._globalCoords = globalCoords
     
     self._prefixList = None
     self._valDataType = 'float64'
     self._valDim = 1
     self._edgeWeightDataType = 'float64'
     self._edgeWeightDim = 1
     self._foundDataTypesAndDims = False
     
     self._fileType = None
     self._preProcVersion = None
     self._id = None
     self._undirectedEdges = None
     self._foundTrackInfoBasedMetaData = False
     
     self._fixedLength = None
     self._fixedGapSize = None
     self._reprIsDense = None
예제 #2
0
 def __init__(self, fn, *args, **kwArgs):
     GenomeElementSource.__init__(self, fn, *args, **kwArgs)
     f = open(fn)
     possibleHeader = f.readline()
     if possibleHeader.startswith('track'):
         self._numHeaderLines = 1
     self._numCols = None
예제 #3
0
    def __init__(self, genome, trackName, boundingRegions, globalCoords=True, allowOverlaps=False, printWarnings=True, *args, **kwArgs):
        assert len(boundingRegions) > 0

        GenomeElementSource.__init__(self, '', genome=genome, trackName=trackName, printWarnings=printWarnings, *args, **kwArgs)
        self._boundingRegions = boundingRegions
        self._isSorted = all([x == y for x,y in zip(boundingRegions, sorted(boundingRegions))])
        self._boundingRegionTuples = None
        self._allowOverlaps = allowOverlaps
        self._globalCoords = globalCoords

        self._prefixList = None
        self._valDataType = 'float64'
        self._valDim = 1
        self._edgeWeightDataType = 'float64'
        self._edgeWeightDim = 1
        self._foundDataTypesAndDims = False

        self._fileType = None
        self._preProcVersion = None
        self._id = None
        self._undirectedEdges = None
        self._foundTrackInfoBasedMetaData = False

        self._doneCalculatingTrackViewBasedValues = False
        self._fixedLength = None
        self._fixedGapSize = None
        self._reprIsDense = None
    def __init__(self, fn, *args, **kwArgs):
        GenomeElementSource.__init__(self, fn, *args, **kwArgs)

        f = open(fn)
        trackDef = f.readline().replace('\'', '"')
        if not trackDef.startswith('track type="array"'):
            raise InvalidFormatError(
                'Track definition line must start with: track type="array". Line: '
                + trackDef)

        header = self._parseHeader(trackDef)
        if not all(key in header
                   for key in ['expScale', 'expStep', 'expNames']):
            raise InvalidFormatError(
                'Track definition line must define values for expScale, expStep and expNames: '
                + trackDef)

        expNames = header['expNames']
        if not all(expNames[i] == '"' for i in [0, -1]):
            raise InvalidFormatError(
                'expNames does not start and end in quote marks: ' + trackDef)

        self._globExpCount = len(
            [x for x in expNames[1:-2].split(',') if x != ''])
        if self._globExpCount < 3:
            raise InvalidFormatError(
                'Microarray data must have at least 3 experiments. Length of expNames: '
                + str(self._globExpCount))
예제 #5
0
 def _checkBoundingRegionSortedPair(self, lastBoundingRegion, br):
     GenomeElementSource._checkBoundingRegionSortedPair(
         self, lastBoundingRegion, br)
     if br.start is not None and br.end is not None:
         if lastBoundingRegion.end == br.start:
             raise InvalidFormatError(
                 "Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)."
                 % (lastBoundingRegion, br))
예제 #6
0
 def __init__(self,
              genome,
              trackName,
              region,
              valSlice,
              valDataType='float64'):
     GenomeElementSource.__init__(self,
                                  None,
                                  genome=genome,
                                  trackName=trackName)
     self._returnedOneElement = False
     self._valSlice = valSlice
     self._region = region
     self._valDataType = valDataType
 def __init__(self, fn, *args, **kwArgs):
     GenomeElementSource.__init__(self, fn, *args, **kwArgs)
     
     f = open(fn)
     trackDef = f.readline()
     if trackDef.startswith('track type=bedGraph'):
         numHeaderLines = 1
     else:
         numHeaderLines = 0
         
     headerLine = f.readline()
     while headerLine.startswith('#'):
         numHeaderLines += 1
         headerLine = f.readline()
     
     self._numHeaderLines = numHeaderLines
예제 #8
0
    def __init__(self, fn, *args, **kwArgs):
        GenomeElementSource.__init__(self, fn, *args, **kwArgs)

        f = open(fn)
        trackDef = f.readline()
        if trackDef.startswith('track type=bedGraph'):
            numHeaderLines = 1
        else:
            numHeaderLines = 0

        headerLine = f.readline()
        while headerLine.startswith('#'):
            numHeaderLines += 1
            headerLine = f.readline()

        self._numHeaderLines = numHeaderLines
예제 #9
0
    def __new__(cls,
                regSpec,
                binSpec,
                genome=None,
                categoryFilterList=None,
                strictMatch=True,
                includeExtraChrs=False):  #,fileType):
        if regSpec in ['file', 'track'] + getSupportedFileSuffixesForBinning():
            #if fileType != 'bed':
            #    raise NotImplementedError

            assert genome is not None

            from gtrackcore.input.core.GenomeElementSource import GenomeElementSource
            if regSpec == 'file':
                geSource = GenomeElementSource(binSpec, genome=genome)
            elif regSpec == 'track':
                from gtrackcore.input.adapters.TrackGenomeElementSource import FullTrackGenomeElementSource
                trackName = convertTNstrToTNListFormat(binSpec)
                geSource = FullTrackGenomeElementSource(genome,
                                                        trackName,
                                                        allowOverlaps=False)
            else:
                geSource = GenomeElementSource(binSpec,
                                               genome=genome,
                                               suffix=regSpec)

            if categoryFilterList is not None:
                from gtrackcore.input.wrappers.GECategoryFilter import GECategoryFilter
                geSource = GECategoryFilter(geSource,
                                            categoryFilterList,
                                            strict=strictMatch)
            return cls._applyEnvelope(geSource)
        else:
            if binSpec == '*':
                binSize = None
            else:
                binSize = parseShortenedSizeSpec(binSpec)

            from gtrackcore.input.userbins.AutoBinner import AutoBinner
            return AutoBinner(
                parseRegSpec(regSpec,
                             genome,
                             includeExtraChrs=includeExtraChrs), binSize)
 def __init__(self, fn, *args, **kwArgs):
     GenomeElementSource.__init__(self, fn, *args, **kwArgs)
 
     f = open(fn)
     trackDef = f.readline().replace('\'','"')
     if not trackDef.startswith('track type="array"'):
         raise InvalidFormatError('Track definition line must start with: track type="array". Line: ' + trackDef)
     
     header = self._parseHeader(trackDef)
     if not all(key in header for key in ['expScale', 'expStep', 'expNames']):
         raise InvalidFormatError('Track definition line must define values for expScale, expStep and expNames: ' + trackDef)
     
     expNames = header['expNames']
     if not all(expNames[i] == '"' for i in [0,-1]):
         raise InvalidFormatError('expNames does not start and end in quote marks: ' + trackDef)
     
     self._globExpCount = len( [x for x in expNames[1:-2].split(',') if x != ''] )
     if self._globExpCount < 3:
         raise InvalidFormatError('Microarray data must have at least 3 experiments. Length of expNames: ' + str(self._globExpCount))
예제 #11
0
 def getGESource(fullFn,
                 fileSuffix,
                 extTrackName=None,
                 genome=None,
                 printWarnings=False):
     from gtrackcore.input.core.GenomeElementSource import GenomeElementSource
     return GenomeElementSource(fullFn,
                                suffix=fileSuffix,
                                forPreProcessor=True,
                                genome=genome,
                                trackName=extTrackName,
                                external=True,
                                printWarnings=printWarnings)
예제 #12
0
    def _allGESources(self, trackName):
        baseDir = createOrigPath(self._genome, trackName)

        self._status = 'Trying os.listdir on: ' + baseDir
        for relFn in sorted(os.listdir(baseDir)):
            fn = os.sep.join([baseDir, relFn])

            self._status = 'Checking file: ' + fn
            if os.path.isdir(fn):
                continue

            fnPart = os.path.split(fn)[-1]
            if fnPart[0] in ['.', '_', '#'] or fnPart[-1] in [
                    '~', '#'
            ]:  #to avoid hidden files..
                continue

            self._status = 'Trying to create geSource from fn: ' + fn
            yield GenomeElementSource(fn, self._genome, forPreProcessor=True)
 def __init__(self, *args, **kwArgs):
     GenomeElementSource.__init__(self, *args, **kwArgs)
     self._boundingRegionTuples = []
     self._chr = None
 def __init__(self, *args, **kwArgs):
     GenomeElementSource.__init__(self, *args, **kwArgs)
     self._boundingRegionTuples = []
     
     if self._getFile().read(1) != '>':
         raise InvalidFormatError('FASTA file does not start with the ">" character.')
예제 #15
0
 def getPrefixList(self):
     return GenomeElementSource.getPrefixList(self)
예제 #16
0
 def _getStrandFromString(cls, val):
     if val == '?':
         return BINARY_MISSING_VAL
     else:
         return GenomeElementSource._getStrandFromString(val)
 def __init__(self, *args, **kwArgs):
     GenomeElementSource.__init__(self, *args, **kwArgs)
     self._returnedOneElement = False
예제 #18
0
 def parseFirstDataLine(self):
     return GenomeElementSource.parseFirstDataLine(self)
예제 #19
0
 def parseFirstDataLine(self):
     return GenomeElementSource.parseFirstDataLine(self)
 def __init__(self, genome, trackName, region, valSlice, valDataType='float64'):
     GenomeElementSource.__init__(self, None, genome=genome, trackName=trackName)
     self._returnedOneElement = False
     self._valSlice = valSlice
     self._region = region
     self._valDataType = valDataType
예제 #21
0
 def __init__(self, windowSource, genome, trackName, chr, func):
     GenomeElementSource.__init__(self, None, genome=genome, trackName=trackName)
     self._windowSource = windowSource
     self._windowIter = None
     self._genomeElement.chr = chr
     self._func = func
예제 #22
0
 def __init__(self, fn, *args, **kwArgs):
     GenomeElementSource.__init__(self, fn, *args, **kwArgs)
     
     self._initAll()
     self._handleTrackDefinitionLineIfPresent(self._getFile().readline())
     self._parseFirstDeclarationLine()
예제 #23
0
 def _checkBoundingRegionSortedPair(self, lastBoundingRegion, br):
     GenomeElementSource._checkBoundingRegionSortedPair(self, lastBoundingRegion, br)
     if br.start is not None and br.end is not None:
         if lastBoundingRegion.end == br.start:
             raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastBoundingRegion, br))
 def __init__(self, *args, **kwArgs):
     GenomeElementSource.__init__(self, *args, **kwArgs)
     self._returnedOneElement = False
예제 #25
0
def _commonStandardizeGtrackFile(fn, genome, suffix=None):
    geSource = GenomeElementSource(fn, genome, suffix=suffix)
    composedFile = StdGtrackComposer(
        GtrackElementStandardizer(geSource)).returnComposed()
    return expandHeadersOfGtrackFileAndReturnComposer(
        '', genome, strToUseInsteadOfFn=composedFile)
예제 #26
0
 def _getStrandFromString(cls, val):
     if val == '?':
         return BINARY_MISSING_VAL
     else:
         return GenomeElementSource._getStrandFromString(val)
예제 #27
0
 def __init__(self, *args, **kwArgs):
     GenomeElementSource.__init__(self, *args, **kwArgs)
     self._boundingRegionTuples = []
     self._chr = None
예제 #28
0
 def __init__(self, geSource, genome=None):
     from gtrackcore.input.wrappers.GEDependentAttributesHolder import GEDependentAttributesHolder
     geSource = GEDependentAttributesHolder(geSource)
     GESourceWrapper.__init__(self, geSource)
     GenomeElementSource.__init__(self, '', genome=genome)
예제 #29
0
 def __init__(self, geSource, genome=None):
     from gtrackcore.input.wrappers.GEDependentAttributesHolder import GEDependentAttributesHolder
     geSource = GEDependentAttributesHolder(geSource)
     GESourceWrapper.__init__(self, geSource)
     GenomeElementSource.__init__(self, '', genome=genome)
예제 #30
0
 def getPrefixList(self):
     return GenomeElementSource.getPrefixList(self)
예제 #31
0
    def __init__(self, fn, *args, **kwArgs):
        GenomeElementSource.__init__(self, fn, *args, **kwArgs)

        self._initAll()
        self._handleTrackDefinitionLineIfPresent(self._getFile().readline())
        self._parseFirstDeclarationLine()