Esempio n. 1
0
 def _determineHeaderLines(self, hbColumns, columns):
     self._setHeaderDict('track type', Gtrack.getTrackTypeFromColumnSpec(columns))
     self._setHeaderDict('value type', self._getGtrackValueType())
     self._setHeaderDict('value dimension', Gtrack.getGtrackValueDimension(self._geSource.getValDim()))
     self._setHeaderDict('undirected edges', self._geSource.hasUndirectedEdges())
     self._setHeaderDict('edge weights', ('weights' in hbColumns))
     self._setHeaderDict('edge weight type', self._getGtrackEdgeWeightType())
     self._setHeaderDict('edge weight dimension', Gtrack.getGtrackValueDimension(self._geSource.getEdgeWeightDim()))
     self._setHeaderDict('uninterrupted data lines', not self._hasMoreThanOneBoundingRegion())
     self._setHeaderDict('sorted elements', self._geSource.isSorted())
     self._setHeaderDict('no overlapping elements', self._geSource.hasNoOverlappingElements())
     self._setHeaderDict('circular elements', self._geSource.hasCircularElements())
     
     compliesToSubtype = False
     if self._USE_EXTENDED_GTRACK:
         self._setHeaderDict('fixed length', self._geSource.getFixedLength())
         self._setHeaderDict('fixed gap size', self._geSource.getFixedGapSize())
         self._setHeaderDict('fixed-size data lines', self._determineIfFixedSizeDataLines(columns))
         if self._headerDict['fixed-size data lines']:
             self._setHeaderDict('data line size', self._geSource.getValDim())
         
         hbColumns, columns = self._adjustColumnsAccordingToHeaderLines(hbColumns, columns)
         hbColumns, columns, compliesToSubtype = self._determineIfFileCompliesToSubtypes(hbColumns, columns)
         
     if not compliesToSubtype:                
         self._setHeaderDict('1-indexed', self._geSource.inputIsOneIndexed())
         self._setHeaderDict('end inclusive', self._geSource.inputIsEndInclusive())
     
     for header, val in self._forcedHeaderDict.iteritems():
         if header not in self._headerDict:
             self._headerDict[header] = val
     
     return hbColumns, columns
Esempio n. 2
0
def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)
    
    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError
        
    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()
    
    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension']
    
    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer    
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()
        
    return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
Esempio n. 3
0
    def _determineHeaderLines(self, hbColumns, columns):
        self._setHeaderDict('track type',
                            Gtrack.getTrackTypeFromColumnSpec(columns))
        self._setHeaderDict('value type', self._getGtrackValueType())
        self._setHeaderDict(
            'value dimension',
            Gtrack.getGtrackValueDimension(self._geSource.getValDim()))
        self._setHeaderDict('undirected edges',
                            self._geSource.hasUndirectedEdges())
        self._setHeaderDict('edge weights', ('weights' in hbColumns))
        self._setHeaderDict('edge weight type',
                            self._getGtrackEdgeWeightType())
        self._setHeaderDict(
            'edge weight dimension',
            Gtrack.getGtrackValueDimension(self._geSource.getEdgeWeightDim()))
        self._setHeaderDict('uninterrupted data lines',
                            not self._hasMoreThanOneBoundingRegion())
        self._setHeaderDict('sorted elements', self._geSource.isSorted())
        self._setHeaderDict('no overlapping elements',
                            self._geSource.hasNoOverlappingElements())
        self._setHeaderDict('circular elements',
                            self._geSource.hasCircularElements())

        compliesToSubtype = False
        if self._USE_EXTENDED_GTRACK:
            self._setHeaderDict('fixed length',
                                self._geSource.getFixedLength())
            self._setHeaderDict('fixed gap size',
                                self._geSource.getFixedGapSize())
            self._setHeaderDict('fixed-size data lines',
                                self._determineIfFixedSizeDataLines(columns))
            if self._headerDict['fixed-size data lines']:
                self._setHeaderDict('data line size',
                                    self._geSource.getValDim())

            hbColumns, columns = self._adjustColumnsAccordingToHeaderLines(
                hbColumns, columns)
            hbColumns, columns, compliesToSubtype = self._determineIfFileCompliesToSubtypes(
                hbColumns, columns)

        if not compliesToSubtype:
            self._setHeaderDict('1-indexed',
                                self._geSource.inputIsOneIndexed())
            self._setHeaderDict('end inclusive',
                                self._geSource.inputIsEndInclusive())

        for header, val in self._forcedHeaderDict.iteritems():
            if header not in self._headerDict:
                self._headerDict[header] = val

        return hbColumns, columns
Esempio n. 4
0
 def _checkValidEnd(self, chr, end, start=None):
     if start is not None and end <= start:
         if not self._headerDict['circular elements']:
             self._headerDict['circular elements'] = True
             start = None
     
     return GtrackGenomeElementSource._checkValidEnd(self, chr, end, start)
Esempio n. 5
0
def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor,
                                gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)

    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError

    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()

    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict[
            'edge weight dimension']

    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile(
    ) else StdGtrackComposer
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()

    return expandHeadersOfGtrackFileAndReturnComposer(
        '', genome, strToUseInsteadOfFn=composedFile)
Esempio n. 6
0
 def __init__(self, geSource, fullDbDict, gtrackColsToAdd):
     self._prefixesToAdd = [GtrackGenomeElementSource.convertNameFromGtrack(col) for col in gtrackColsToAdd]
     if 'edges' in self._prefixesToAdd:
         self._prefixesToAdd.append('weights')
         
     ElementModifierGESourceWrapper.__init__(self, geSource)
     
     self._fullDbDict = fullDbDict
     self._prefixList = geSource.getPrefixList() + self._prefixesToAdd
Esempio n. 7
0
 def _parseEdges(self, edgeStr):
     if edgeStr != '.':
         for edgeSpec in edgeStr.split(';'):
             if '=' in edgeSpec:
                 if not self._headerDict['edge weights']:
                     self._headerDict['edge weights'] = True
                 self._getValInCorrectType(edgeSpec.split('=')[1], 'edge weight')
     
     return GtrackGenomeElementSource._parseEdges(self, edgeStr)
Esempio n. 8
0
 def _iter(self):
     self._valTypeIndexDict = {}
     self._valLenDict = {}
     self._allMissingDict = {}
     
     #self._headerDict['no overlapping elements'] = True
     self._headerDict['sorted elements'] = True            
     if self._headerDict['track type'].startswith('linked'):
         self._headerDict['undirected edges'] = True
     
     return GtrackGenomeElementSource._iter(self)
Esempio n. 9
0
    def __init__(self, geSource, fullDbDict, gtrackColsToAdd):
        self._prefixesToAdd = [
            GtrackGenomeElementSource.convertNameFromGtrack(col)
            for col in gtrackColsToAdd
        ]
        if 'edges' in self._prefixesToAdd:
            self._prefixesToAdd.append('weights')

        ElementModifierGESourceWrapper.__init__(self, geSource)

        self._fullDbDict = fullDbDict
        self._prefixList = geSource.getPrefixList() + self._prefixesToAdd
Esempio n. 10
0
 def _getGtrackValueDim(self, val, valTypeInfo, valueOrEdgeWeight):
     valLen = len(val.split(valTypeInfo.delim) if valTypeInfo.delim != '' else val)
     
     if valueOrEdgeWeight in self._valLenDict:
         if self._valLenDict[valueOrEdgeWeight] != valLen:
             self._valLenDict[valueOrEdgeWeight] = 0
     else:
         self._valLenDict[valueOrEdgeWeight] = valLen
     
     valDim = GtrackGenomeElementSource.getGtrackValueDimension(self._valLenDict[valueOrEdgeWeight])
         
     return valDim
Esempio n. 11
0
 def _composeBoundingRegionLine(self, boundingRegionTuple):
     region = copy(boundingRegionTuple.region)
     
     if self._headerDict['1-indexed']:
         region.start = region.start+1 if region.start is not None else None
         region.end = region.end+1 if region.end is not None else None
     if self._headerDict['end inclusive']:
         region.end = region.end-1 if region.end is not None else None
         
     brLinePartList = [(Gtrack.convertNameToGtrack(attr), getattr(region, attr)) for attr in ['genome', 'chr', 'start', 'end']]
     return '####' + '; '.join(k + '=' + self._formatPhraseWithCorrectChrUsage(str(v), useUrlEncoding=True, notAllowedChars='=;#\t') \
                               for k,v in brLinePartList if v is not None) + os.linesep
Esempio n. 12
0
    def _composeBoundingRegionLine(self, boundingRegionTuple):
        region = boundingRegionTuple.region.getCopy()

        if self._headerDict['1-indexed']:
            region.start = region.start + 1 if region.start is not None else None
            region.end = region.end + 1 if region.end is not None else None
        if self._headerDict['end inclusive']:
            region.end = region.end - 1 if region.end is not None else None

        brLinePartList = [(Gtrack.convertNameToGtrack(attr),
                           getattr(region, attr))
                          for attr in ['genome', 'chr', 'start', 'end']]
        return '####' + '; '.join(k + '=' + self._formatPhraseWithCorrectChrUsage(str(v), useUrlEncoding=True, notAllowedChars='=;#\t') \
                                  for k,v in brLinePartList if v is not None) + os.linesep
Esempio n. 13
0
 def _handleEndOfFile(self):
     GtrackGenomeElementSource._handleEndOfFile(self)
     
     #To fix an issue where value dimension is "list" if the value type was wrongly
     #guessed for early elements.
     
     newIter = self.__iter__()
     newIter._valTypeIndexDict = self._valTypeIndexDict
     newIter._handleEndOfFile = newIter._basicHandleEndOfFile
     
     try:
         while True:
             newIter.next()
     except StopIteration:
         pass
     
     self._valLenDict = newIter._valLenDict
     if len(self._uniqueEdgeIds) == 0:
         self._headerDict['undirected edges'] = False
     
     for valueOrEdgeWeight in ['value', 'edge weight']:
         if valueOrEdgeWeight in newIter._allMissingDict and newIter._allMissingDict[valueOrEdgeWeight] == True:
             self._headerDict['%s type' % valueOrEdgeWeight] = 'number'
Esempio n. 14
0
    def _getValInCorrectType(self, val, valueOrEdgeWeight='value', isEmptyElement=False):
        headerDictInFile = self.getHeaderDictInFile()
        
        valTypeList = ['binary', 'number', 'category', 'character']
        for i,valueType in enumerate(valTypeList):
            if valueOrEdgeWeight in self._valTypeIndexDict and self._valTypeIndexDict[valueOrEdgeWeight] > i:
                continue
            
            valTypeInfo = GtrackGenomeElementSource.VAL_TYPE_DICT[valueType]
            
            if self._isValOfParticularType(val, valTypeInfo):
                self._noteIfAllValuesAreMissing(valueOrEdgeWeight, val, valTypeInfo)
                self._valTypeIndexDict[valueOrEdgeWeight] = i

                valueDim = self._getGtrackValueDim(val, valTypeInfo, valueOrEdgeWeight)

                if not '%s type' % valueOrEdgeWeight in headerDictInFile:
                    self._headerDict['%s type' % valueOrEdgeWeight] = valTypeList[i]
                if not '%s dimension' % valueOrEdgeWeight in headerDictInFile:
                    self._headerDict['%s dimension' % valueOrEdgeWeight] = valueDim
                
                return GtrackGenomeElementSource._getValInCorrectType(self, val, valueOrEdgeWeight, isEmptyElement)
        raise ShouldNotOccurError()
Esempio n. 15
0
    def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
        if 'subtype url' in self._forcedHeaderDict:
            subtypeUrlList = [self._forcedHeaderDict['subtype url']] \
                if self._forcedHeaderDict['subtype url'] != '' else []
        else:
            subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST

        for subtypeUrl in subtypeUrlList:
            subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
            subtypeColumns = subtypeGESource.getColumns(orig=False)
            subtypeHeaders = subtypeGESource.getHeaderDict()

            numRepeats = 2 if subtypeHeaders[
                'subtype adherence'] == 'redefinable' else 1

            for repeat in range(numRepeats):
                self._setHeaderDict('1-indexed', subtypeHeaders['1-indexed'])
                self._setHeaderDict('end inclusive',
                                    subtypeHeaders['end inclusive'])

                if subtypeHeaders['subtype adherence'] in [
                        'reorderable', 'free'
                ]:
                    rearrangedColumns = columns
                    rearrangedHbColumns = hbColumns
                else:
                    colSet = set(columns)
                    subtypeColSet = set(subtypeColumns)

                    if subtypeHeaders['subtype adherence'] == 'redefinable':
                        colsRemoved = list(subtypeColSet - colSet)
                        colsAdded = list(colSet - subtypeColSet)
                        if len(colsRemoved) != len(colsAdded) or len(
                                colsRemoved) > 2:
                            continue

                        colsRedefinedTo = [
                            'value', 'edges'
                        ] if repeat == 1 else ['edges', 'value']

                        rearrangedColumns = []
                        i, j = (0, 0)
                        for col in subtypeColumns:
                            if col in colsRemoved:
                                rearrangedColumns.append(colsRedefinedTo[i])
                                i += 1
                            elif col in colsRedefinedTo:
                                rearrangedColumns.append(colsAdded[j])
                                j += 1
                            else:
                                rearrangedColumns.append(col)

                        for col in columns:
                            if col in colsAdded[j:]:
                                rearrangedColumns.append(col)
                    else:
                        rearrangedColumns = [x for x in subtypeColumns if x in colSet] + \
                                            [x for x in columns if x not in subtypeColSet]
                    rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(
                        rearrangedColumns)

                try:
                    tempFile = StringIO()
                    self._composeContents(tempFile, rearrangedHbColumns, rearrangedColumns, \
                                          deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True)

                    gtrackGESource = Gtrack('subtype.test.' + self.getDefaultFileNameSuffix(), printWarnings=False, \
                                            strToUseInsteadOfFn=tempFile.getvalue())
                    tempFile.close()

                    if gtrackGESource.compliesWithSubtype(subtypeUrl):
                        gtrackGESource._headerDict['subtype url'] = subtypeUrl
                        gtrackGESource._updateHeadersAccordingToSubtype()
                        updatedHeaders = OrderedDict([(key, val) for key,val in gtrackGESource.getHeaderDict().iteritems() \
                                          if val != Gtrack.DEFAULT_HEADER_DICT.get(key)])
                        for header in updatedHeaders:
                            self._setHeaderDict(header, updatedHeaders[header])

                        return rearrangedHbColumns, rearrangedColumns, True
                except Exception, e:
                    continue
Esempio n. 16
0
 def _createColumnSpec(self, cols, addAnyExtraFixedCols=True):
     GtrackGenomeElementSource._createColumnSpec(self, cols, addAnyExtraFixedCols)
     
     self._headerDict['track type'] = GtrackGenomeElementSource.getTrackTypeFromColumnSpec(self._columnSpec)
 def _isExpandableHeader(self, line, onlyGuaranteed):
     return self._isHeaderLine(line) and \
             ( (Gtrack.getHeaderKeyValue(line)[0] in EXPANDABLE_HEADERS) or \
                (not onlyGuaranteed and Gtrack.getHeaderKeyValue(line)[0] in NOT_GUARANTEED_EXPANDABLE_HEADERS) )
Esempio n. 18
0
    def __init__(self, *args, **kwArgs):
        GtrackGenomeElementSource.__init__(self, *args, **kwArgs)

        self._noOverlappingElements = None
Esempio n. 19
0
 def _basicHandleEndOfFile(self):
     GtrackGenomeElementSource._handleEndOfFile(self)
 def _isValueNotKeptHeader(self, line):
     return self._isHeaderLine(line) and \
             Gtrack.getHeaderKeyValue(line)[0] in VALUE_NOT_KEPT_HEADERS
 def _isValueNotKeptHeader(self, line):
     return self._isHeaderLine(line) and \
             Gtrack.getHeaderKeyValue(line)[0] in VALUE_NOT_KEPT_HEADERS
Esempio n. 22
0
 def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
     if 'subtype url' in self._forcedHeaderDict:
         subtypeUrlList = [self._forcedHeaderDict['subtype url']] \
             if self._forcedHeaderDict['subtype url'] != '' else []
     else:
         subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST
 
     for subtypeUrl in subtypeUrlList:
         subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
         subtypeColumns = subtypeGESource.getColumns(orig=False)
         subtypeHeaders = subtypeGESource.getHeaderDict()
         
         numRepeats = 2 if subtypeHeaders['subtype adherence'] == 'redefinable' else 1
         
         for repeat in range(numRepeats):
             self._setHeaderDict('1-indexed', subtypeHeaders['1-indexed'])
             self._setHeaderDict('end inclusive', subtypeHeaders['end inclusive'])
             
             if subtypeHeaders['subtype adherence'] in ['reorderable', 'free']:
                 rearrangedColumns = columns
                 rearrangedHbColumns = hbColumns
             else:
                 colSet = set(columns)
                 subtypeColSet = set(subtypeColumns)
                 
                 if subtypeHeaders['subtype adherence'] == 'redefinable':
                     colsRemoved = list(subtypeColSet - colSet)
                     colsAdded = list(colSet - subtypeColSet)
                     if len(colsRemoved) != len(colsAdded) or len(colsRemoved) > 2:
                         continue
                     
                     colsRedefinedTo = ['value', 'edges'] if repeat == 1 else ['edges', 'value']
                     
                     rearrangedColumns = []
                     i,j = (0,0)
                     for col in subtypeColumns:
                         if col in colsRemoved:
                             rearrangedColumns.append(colsRedefinedTo[i])
                             i += 1
                         elif col in colsRedefinedTo:
                             rearrangedColumns.append(colsAdded[j])
                             j += 1
                         else:
                             rearrangedColumns.append(col)
                             
                     for col in columns:
                         if col in colsAdded[j:]:
                             rearrangedColumns.append(col)
                 else:
                     rearrangedColumns = [x for x in subtypeColumns if x in colSet] + \
                                         [x for x in columns if x not in subtypeColSet]
                 rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(rearrangedColumns)
             
             try:
                 tempFile = StringIO()
                 self._composeContents(tempFile, rearrangedHbColumns, rearrangedColumns, \
                                       deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True)
                     
                 gtrackGESource = Gtrack('subtype.test.' + self.getDefaultFileNameSuffix(), printWarnings=False, \
                                         strToUseInsteadOfFn=tempFile.getvalue())
                 tempFile.close()
                 
                 if gtrackGESource.compliesWithSubtype(subtypeUrl):
                     gtrackGESource._headerDict['subtype url'] = subtypeUrl
                     gtrackGESource._updateHeadersAccordingToSubtype()
                     updatedHeaders = OrderedDict([(key, val) for key,val in gtrackGESource.getHeaderDict().iteritems() \
                                       if val != Gtrack.DEFAULT_HEADER_DICT.get(key)])
                     for header in updatedHeaders:
                         self._setHeaderDict(header, updatedHeaders[header])
                     
                     return rearrangedHbColumns, rearrangedColumns, True
             except Exception, e:
                 continue
    def testHeaderExpansion(self):
        geSourceTest = self._commonSetup()

        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue

            if 'no_expand' in caseName:
                print 'Test case skipped: ' + caseName
                continue

            onlyGuaranteed = 'no_types_expanded' in caseName

            print caseName
            print '==========='
            case = geSourceTest.cases[caseName]

            headerLines = [
                line if not self._isHeaderLine(line) else '##' + ': '.join([
                    str(x).lower()
                    for x in Gtrack.getHeaderKeyValue(line.strip())
                ]) for line in case.headerLines
            ]

            fullContents = os.linesep.join(headerLines + case.lines)
            print 'Original:\n\n' + fullContents

            case.headerLines = [
                line for line in headerLines
                if not self._isExpandableHeader(line, onlyGuaranteed)
            ]
            print '-----'
            print 'With headers removed:\n\n' + os.linesep.join(
                case.headerLines + case.lines)

            testFn = self._writeTestFile(case)

            expandedContents = expandHeadersOfGtrackFileAndReturnContents(
                testFn, case.genome, onlyNonDefault=False)

            print '-----'
            print 'With expanded headers:\n\n' + expandedContents

            expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(
                testFn, case.genome, onlyNonDefault=True)

            print '-----'
            print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults

            origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \
                                          if self._isExpandableHeader(line, onlyGuaranteed=False)])
            notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \
                                          if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)])
            expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \
                                    if self._isHeaderLine(line)])

            if 'no_check_expand' in caseName:
                print 'No checks for case: ' + caseName
            else:
                for header in origExpandableHeaders:
                    self.assertEquals(origExpandableHeaders[header],
                                      expandedHeaders[header])
                for header in notExpandableHeaders:
                    self.assertEquals(notExpandableHeaders[header],
                                      expandedHeaders[header])

                for contents in [
                        expandedContents, expandedContentsOnlyNonDefaults
                ]:

                    sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
                    forPreProcessor = True if case.sourceClass is None else False

                    stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \
                                                                          forPreProcessor=forPreProcessor, \
                                                                          printWarnings=False, \
                                                                          strToUseInsteadOfFn=contents))
Esempio n. 24
0
 def _getHbColumnsFromGtrackColumns(self, columns):
     return [Gtrack.convertNameFromGtrack(col) for col in columns]
Esempio n. 25
0
 def _getGtrackColumnsFromHbColumns(self, hbColumns):
     return [Gtrack.convertNameToGtrack(col) for col in hbColumns if col != 'weights']
Esempio n. 26
0
 def _getGtrackColumnsFromHbColumns(self, hbColumns):
     return [
         Gtrack.convertNameToGtrack(col) for col in hbColumns
         if col != 'weights'
     ]
 def _isExpandableHeader(self, line, onlyGuaranteed):
     return self._isHeaderLine(line) and \
             ( (Gtrack.getHeaderKeyValue(line)[0] in EXPANDABLE_HEADERS) or \
                (not onlyGuaranteed and Gtrack.getHeaderKeyValue(line)[0] in NOT_GUARANTEED_EXPANDABLE_HEADERS) )
Esempio n. 28
0
 def _getHbColumnsFromGtrackColumns(self, columns):
     return [Gtrack.convertNameFromGtrack(col) for col in columns]
Esempio n. 29
0
 def _checkUndirectedEdges(self):     
     if self._headerDict['track type'].startswith('linked'):
         try:       
             GtrackGenomeElementSource._checkUndirectedEdges(self)
         except InvalidFormatError:
             self._headerDict['undirected edges'] = False
    def testHeaderExpansion(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_expand' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            onlyGuaranteed = 'no_types_expanded' in caseName
            
            print caseName
            print '==========='
            case = geSourceTest.cases[caseName]
            
            headerLines = [line if not self._isHeaderLine(line) else
                            '##' + ': '.join([str(x).lower() for x in Gtrack.getHeaderKeyValue(line.strip())])
                             for line in case.headerLines]
            
            fullContents = os.linesep.join(headerLines + case.lines)
            print 'Original:\n\n' + fullContents
            
            case.headerLines = [line for line in headerLines if not self._isExpandableHeader(line, onlyGuaranteed)]
            print '-----'
            print 'With headers removed:\n\n' + os.linesep.join(case.headerLines + case.lines)
            
            testFn = self._writeTestFile(case)
            
            expandedContents = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=False)

            print '-----'
            print 'With expanded headers:\n\n' + expandedContents
            
            expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=True)

            print '-----'
            print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults
            
            origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \
                                          if self._isExpandableHeader(line, onlyGuaranteed=False)])
            notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \
                                          if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)])
            expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \
                                    if self._isHeaderLine(line)])
            
            if 'no_check_expand' in caseName:
                print 'No checks for case: ' + caseName
            else:
                for header in origExpandableHeaders:
                    self.assertEquals(origExpandableHeaders[header], expandedHeaders[header])
                for header in notExpandableHeaders:
                    self.assertEquals(notExpandableHeaders[header], expandedHeaders[header])
                    
                for contents in [expandedContents, expandedContentsOnlyNonDefaults]:
                    
                    sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
                    forPreProcessor = True if case.sourceClass is None else False

                    stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \
                                                                          forPreProcessor=forPreProcessor, \
                                                                          printWarnings=False, \
                                                                          strToUseInsteadOfFn=contents))
                    
                    self.assertEquals(case.assertElementList, [ge for ge in stdGeSource])
                    self.assertEquals(case.boundingRegionsAssertList, [br for br in stdGeSource.getBoundingRegionTuples()])