def _storeOtherDependentAttrs(self): GEDependentAttributesHolder._storeOtherDependentAttrs(self) self._isSorted = self._geIter.isSorted() self._hasCircularElements = self._geIter.hasCircularElements() self._hasNoOverlappingElements = self._geIter.hasNoOverlappingElements() self._hasUndirectedEdges = self._geIter.hasUndirectedEdges() self._valDataType = self._geIter.getValDataType() self._edgeWeightDataType = self._geIter.getEdgeWeightDataType() self._geSource.__class__ = GtrackGenomeElementSource
def testSorting(self): geSourceTest = self._commonSetup() for caseName in geSourceTest.cases: if not caseName.startswith("gtrack"): continue if "no_sort" in caseName: print "Test case skipped: " + caseName continue print caseName print case = geSourceTest.cases[caseName] testFn = self._writeTestFile(case) print open(testFn).read() print sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome) print sortedContents sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass forPreProcessor = True if case.sourceClass is None else False sortedGeSource = GEDependentAttributesHolder( sourceClass( "sortedFile.gtrack", case.genome, forPreProcessor=forPreProcessor, printWarnings=False, strToUseInsteadOfFn=sortedContents, ) ) reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense() if not reprIsDense: self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource]) else: for ge in sortedGeSource: pass self.assertEquals( sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()] )
def _getSortedBoundingRegionsAndGenomeElements(geSource): geSource = GEDependentAttributesHolder(geSource) doubleElList = [[ brTuple, geList ] for brTuple, geList in iterateOverBRTuplesWithContainedGEs(geSource)] noBoundingRegions = doubleElList[0][0] is None if not noBoundingRegions: doubleElList.sort(key=lambda x: x[0].region) for x in doubleElList: if len(x[1]) >= 2: if x[1][0].reprIsDense(): break x[1].sort() return doubleElList, geSource
def __init__(self, geSource, genome=None): from gtrackcore.input.wrappers.GEDependentAttributesHolder import GEDependentAttributesHolder geSource = GEDependentAttributesHolder(geSource) GESourceWrapper.__init__(self, geSource) GenomeElementSource.__init__(self, '', genome=genome)
def _commonTestComposer(self, withTrackGESource, composerCls, suffix): geSourceTest = self._commonSetup() for caseName in geSourceTest.cases: if not (caseName == suffix or \ (caseName.startswith(suffix) and caseName[len(suffix)] in ['_','.'])): continue if 'no_print' in caseName or \ withTrackGESource and ('no_track_extract' in caseName or \ caseName.endswith('_no_hb')): print 'Test case skipped: ' + caseName continue print caseName case = geSourceTest.cases[caseName] testFn = self._writeTestFile(case) sourceClass = case.sourceClass if case.sourceClass is not None else GenomeElementSource genome = self.GENOME if withTrackGESource else case.genome rawCaseGESource = sourceClass(testFn, genome, printWarnings=False) caseGESource = GEDependentAttributesHolder(rawCaseGESource) #actualSourceClass = caseGESource._geSource.__class__ if withTrackGESource: for x in caseGESource: pass boundingRegionTuples = caseGESource.getBoundingRegionTuples() boundingRegions = [br.region for br in boundingRegionTuples] if boundingRegions == [] or all(br.chr is None for br in boundingRegions): boundingRegions = GlobalBinSource(self.GENOME) trackName = self.TRACK_NAME_PREFIX + case.trackName self._preProcess(trackName) allowOverlaps = True if ('start' in case.prefixList) and not caseName.endswith('_compose_no_overlaps') else False inputGESource = TrackGenomeElementSource(self.GENOME, trackName, boundingRegions, \ printWarnings=False, allowOverlaps=allowOverlaps) else: inputGESource = rawCaseGESource composer = composerCls(inputGESource) contents = composer.returnComposed() print contents composedFile = NamedTemporaryFile('w', suffix='.' + suffix) composedFile.write(contents) composedFile.flush() #print actualSourceClass.__name__ outputGESource = GEDependentAttributesHolder(sourceClass(composedFile.name, genome, printWarnings=False)) if 'no_check_print' in caseName or withTrackGESource and 'no_check_track_extract' in caseName: print 'No checks for case: ' + caseName else: caseGEs = [ge.getCopy() for ge in caseGESource] outputGEs = [ge.getCopy() for ge in outputGESource] isSortableGE = any(getattr(caseGEs[0], x) is not None for x in ['start','end']) if len(caseGEs) > 0 else False if withTrackGESource and isSortableGE and not caseGESource.hasBoundingRegionTuples(): caseGEs = sorted(caseGEs) self.assertGenomeElementLists(caseGEs, outputGEs) self.assertListsOrDicts(caseGESource.getBoundingRegionTuples(), outputGESource.getBoundingRegionTuples())
def _decorateGESource(self, geSource): return GEDependentAttributesHolder(geSource)
def testHeaderExpansion(self): geSourceTest = self._commonSetup() for caseName in geSourceTest.cases: if not caseName.startswith('gtrack'): continue if 'no_expand' in caseName: print 'Test case skipped: ' + caseName continue onlyGuaranteed = 'no_types_expanded' in caseName print caseName print '===========' case = geSourceTest.cases[caseName] headerLines = [line if not self._isHeaderLine(line) else '##' + ': '.join([str(x).lower() for x in Gtrack.getHeaderKeyValue(line.strip())]) for line in case.headerLines] fullContents = os.linesep.join(headerLines + case.lines) print 'Original:\n\n' + fullContents case.headerLines = [line for line in headerLines if not self._isExpandableHeader(line, onlyGuaranteed)] print '-----' print 'With headers removed:\n\n' + os.linesep.join(case.headerLines + case.lines) testFn = self._writeTestFile(case) expandedContents = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=False) print '-----' print 'With expanded headers:\n\n' + expandedContents expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=True) print '-----' print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \ if self._isExpandableHeader(line, onlyGuaranteed=False)]) notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \ if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)]) expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \ if self._isHeaderLine(line)]) if 'no_check_expand' in caseName: print 'No checks for case: ' + caseName else: for header in origExpandableHeaders: self.assertEquals(origExpandableHeaders[header], expandedHeaders[header]) for header in notExpandableHeaders: self.assertEquals(notExpandableHeaders[header], expandedHeaders[header]) for contents in [expandedContents, expandedContentsOnlyNonDefaults]: sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass forPreProcessor = True if case.sourceClass is None else False stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \ forPreProcessor=forPreProcessor, \ printWarnings=False, \ strToUseInsteadOfFn=contents)) self.assertEquals(case.assertElementList, [ge for ge in stdGeSource]) self.assertEquals(case.boundingRegionsAssertList, [br for br in stdGeSource.getBoundingRegionTuples()])