예제 #1
0
 def _storeOtherDependentAttrs(self):
     GEDependentAttributesHolder._storeOtherDependentAttrs(self)
     
     self._isSorted = self._geIter.isSorted()
     self._hasCircularElements = self._geIter.hasCircularElements()
     self._hasNoOverlappingElements = self._geIter.hasNoOverlappingElements()
     self._hasUndirectedEdges = self._geIter.hasUndirectedEdges()
     self._valDataType = self._geIter.getValDataType()
     self._edgeWeightDataType = self._geIter.getEdgeWeightDataType()
     
     self._geSource.__class__ = GtrackGenomeElementSource
예제 #2
0
    def testSorting(self):
        geSourceTest = self._commonSetup()

        for caseName in geSourceTest.cases:
            if not caseName.startswith("gtrack"):
                continue

            if "no_sort" in caseName:
                print "Test case skipped: " + caseName
                continue

            print caseName
            print

            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print

            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(
                sourceClass(
                    "sortedFile.gtrack",
                    case.genome,
                    forPreProcessor=forPreProcessor,
                    printWarnings=False,
                    strToUseInsteadOfFn=sortedContents,
                )
            )

            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()

            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass

            self.assertEquals(
                sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()]
            )
예제 #3
0
def _getSortedBoundingRegionsAndGenomeElements(geSource):
    geSource = GEDependentAttributesHolder(geSource)

    doubleElList = [[
        brTuple, geList
    ] for brTuple, geList in iterateOverBRTuplesWithContainedGEs(geSource)]

    noBoundingRegions = doubleElList[0][0] is None
    if not noBoundingRegions:
        doubleElList.sort(key=lambda x: x[0].region)

    for x in doubleElList:
        if len(x[1]) >= 2:
            if x[1][0].reprIsDense():
                break
            x[1].sort()

    return doubleElList, geSource
예제 #4
0
 def __init__(self, geSource, genome=None):
     from gtrackcore.input.wrappers.GEDependentAttributesHolder import GEDependentAttributesHolder
     geSource = GEDependentAttributesHolder(geSource)
     GESourceWrapper.__init__(self, geSource)
     GenomeElementSource.__init__(self, '', genome=genome)
 def _commonTestComposer(self, withTrackGESource, composerCls, suffix):
     geSourceTest = self._commonSetup()
     
     for caseName in geSourceTest.cases:
         if not (caseName == suffix or \
                 (caseName.startswith(suffix) and caseName[len(suffix)] in ['_','.'])):
             continue
             
         if 'no_print' in caseName or \
             withTrackGESource and ('no_track_extract' in caseName or \
                                    caseName.endswith('_no_hb')):
             print 'Test case skipped: ' + caseName
             continue
             
         print caseName
         case = geSourceTest.cases[caseName]
         
         testFn = self._writeTestFile(case)
         sourceClass = case.sourceClass if case.sourceClass is not None else GenomeElementSource
         genome = self.GENOME if withTrackGESource else case.genome
         
         rawCaseGESource = sourceClass(testFn, genome, printWarnings=False)
         caseGESource = GEDependentAttributesHolder(rawCaseGESource)
         #actualSourceClass = caseGESource._geSource.__class__
         
         if withTrackGESource:
             for x in caseGESource:
                 pass
             
             boundingRegionTuples = caseGESource.getBoundingRegionTuples()
             boundingRegions = [br.region for br in boundingRegionTuples]
             if boundingRegions == [] or all(br.chr is None for br in boundingRegions):
                 boundingRegions = GlobalBinSource(self.GENOME)
             
             trackName = self.TRACK_NAME_PREFIX + case.trackName
             self._preProcess(trackName)
             
             allowOverlaps = True if ('start' in case.prefixList) and not caseName.endswith('_compose_no_overlaps') else False
             inputGESource = TrackGenomeElementSource(self.GENOME, trackName, boundingRegions, \
                                                      printWarnings=False, allowOverlaps=allowOverlaps)
         else:
             inputGESource = rawCaseGESource
                 
         composer = composerCls(inputGESource)
         contents = composer.returnComposed()
         print contents
         
         composedFile = NamedTemporaryFile('w', suffix='.' + suffix)
         composedFile.write(contents)
         composedFile.flush()
         
         #print actualSourceClass.__name__
         
         outputGESource = GEDependentAttributesHolder(sourceClass(composedFile.name, genome, printWarnings=False))
         
         if 'no_check_print' in caseName or withTrackGESource and 'no_check_track_extract' in caseName:
             print 'No checks for case: ' + caseName
         else:
             caseGEs = [ge.getCopy() for ge in caseGESource]
             outputGEs = [ge.getCopy() for ge in outputGESource]
             isSortableGE = any(getattr(caseGEs[0], x) is not None for x in ['start','end']) if len(caseGEs) > 0 else False
             if withTrackGESource and isSortableGE and not caseGESource.hasBoundingRegionTuples():
                 caseGEs = sorted(caseGEs)
             self.assertGenomeElementLists(caseGEs, outputGEs)
             self.assertListsOrDicts(caseGESource.getBoundingRegionTuples(), outputGESource.getBoundingRegionTuples())
예제 #6
0
 def _decorateGESource(self, geSource):
     return GEDependentAttributesHolder(geSource)
    def testHeaderExpansion(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_expand' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            onlyGuaranteed = 'no_types_expanded' in caseName
            
            print caseName
            print '==========='
            case = geSourceTest.cases[caseName]
            
            headerLines = [line if not self._isHeaderLine(line) else
                            '##' + ': '.join([str(x).lower() for x in Gtrack.getHeaderKeyValue(line.strip())])
                             for line in case.headerLines]
            
            fullContents = os.linesep.join(headerLines + case.lines)
            print 'Original:\n\n' + fullContents
            
            case.headerLines = [line for line in headerLines if not self._isExpandableHeader(line, onlyGuaranteed)]
            print '-----'
            print 'With headers removed:\n\n' + os.linesep.join(case.headerLines + case.lines)
            
            testFn = self._writeTestFile(case)
            
            expandedContents = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=False)

            print '-----'
            print 'With expanded headers:\n\n' + expandedContents
            
            expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=True)

            print '-----'
            print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults
            
            origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \
                                          if self._isExpandableHeader(line, onlyGuaranteed=False)])
            notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \
                                          if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)])
            expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \
                                    if self._isHeaderLine(line)])
            
            if 'no_check_expand' in caseName:
                print 'No checks for case: ' + caseName
            else:
                for header in origExpandableHeaders:
                    self.assertEquals(origExpandableHeaders[header], expandedHeaders[header])
                for header in notExpandableHeaders:
                    self.assertEquals(notExpandableHeaders[header], expandedHeaders[header])
                    
                for contents in [expandedContents, expandedContentsOnlyNonDefaults]:
                    
                    sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
                    forPreProcessor = True if case.sourceClass is None else False

                    stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \
                                                                          forPreProcessor=forPreProcessor, \
                                                                          printWarnings=False, \
                                                                          strToUseInsteadOfFn=contents))
                    
                    self.assertEquals(case.assertElementList, [ge for ge in stdGeSource])
                    self.assertEquals(case.boundingRegionsAssertList, [br for br in stdGeSource.getBoundingRegionTuples()])