def _determineHeaderLines(self, hbColumns, columns):
     self._setHeaderDict('track type', Gtrack.getTrackTypeFromColumnSpec(columns))
     self._setHeaderDict('value type', self._getGtrackValueType())
     self._setHeaderDict('value dimension', Gtrack.getGtrackValueDimension(self._geSource.getValDim()))
     self._setHeaderDict('undirected edges', self._geSource.hasUndirectedEdges())
     self._setHeaderDict('edge weights', ('weights' in hbColumns))
     self._setHeaderDict('edge weight type', self._getGtrackEdgeWeightType())
     self._setHeaderDict('edge weight dimension', Gtrack.getGtrackValueDimension(self._geSource.getEdgeWeightDim()))
     self._setHeaderDict('uninterrupted data lines', not self._hasMoreThanOneBoundingRegion())
     self._setHeaderDict('sorted elements', self._geSource.isSorted())
     self._setHeaderDict('no overlapping elements', self._geSource.hasNoOverlappingElements())
     self._setHeaderDict('circular elements', self._geSource.hasCircularElements())
     
     compliesToSubtype = False
     if self._USE_EXTENDED_GTRACK:
         self._setHeaderDict('fixed length', self._geSource.getFixedLength())
         self._setHeaderDict('fixed gap size', self._geSource.getFixedGapSize())
         self._setHeaderDict('fixed-size data lines', self._determineIfFixedSizeDataLines(columns))
         if self._headerDict['fixed-size data lines']:
             self._setHeaderDict('data line size', self._geSource.getValDim())
         
         hbColumns, columns = self._adjustColumnsAccordingToHeaderLines(hbColumns, columns)
         hbColumns, columns, compliesToSubtype = self._determineIfFileCompliesToSubtypes(hbColumns, columns)
         
     if not compliesToSubtype:                
         self._setHeaderDict('1-indexed', self._geSource.inputIsOneIndexed())
         self._setHeaderDict('end inclusive', self._geSource.inputIsEndInclusive())
     
     for header, val in self._forcedHeaderDict.iteritems():
         if header not in self._headerDict:
             self._headerDict[header] = val
     
     return hbColumns, columns
Example #2
0
    def getOptionsBox5(prevChoices):
        '''Returns a list of options to be displayed in the second options box,
        which will be displayed after a selection is made in the first box.
        prevChoices is a list of selections made by the web-user in the previous
        input boxes (that is, list containing only one element for this case)
        '''
        if prevChoices[2] and prevChoices[3]:
            fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
                prevChoices[2].split(':'))
            fnDB = ExternalTrackManager.extractFnFromGalaxyTN(
                prevChoices[3].split(':'))

            gtrackDBColumnSpec = GtrackGenomeElementSource(
                fnDB).getColumnSpec().keys()
            gtrackSourceColumnSpec = GtrackGenomeElementSource(
                fnSource).getColumnSpec().keys()

            resultlist = ['Element id'] if 'id' in gtrackDBColumnSpec and 'id' in gtrackSourceColumnSpec else []

            commonColumns = list(
                set(gtrackDBColumnSpec) & set(gtrackSourceColumnSpec))
            tupleKey = True if any(x in commonColumns
                                   for x in ['start', 'end']) else False
            resultlist += ['Positional information'] if tupleKey else []

            return resultlist

        return None
 def _getHeaders(prevChoices):
     numCols = TabularToGtrackTool._getFileContentsInfo(prevChoices).numCols
     if prevChoices.columnSelection == 'Select individual columns':
         header = []
         for i in xrange(numCols):
             if hasattr(prevChoices, 'column%s' % i):
                 colHeader = getattr(prevChoices, 'column%s' % i)
                 if colHeader is None or colHeader == '-- ignore --':
                     header.append('')
                 elif colHeader == '-- custom --':
                     header.append(getattr(prevChoices, 'customColumn%s' % i).strip())
                 else:
                     header.append(colHeader)
             else:
                 header.append('')
         return header
     else:
         genome = prevChoices.genome if prevChoices.selectGenome == 'Yes' else None
         inFn = ExternalTrackManager.extractFnFromGalaxyTN(prevChoices.colSpecFile.split(':'))
         try:
             geSource = GtrackGenomeElementSource(inFn, genome=genome)
             geSource.parseFirstDataLine()
             return geSource.getColumns()[:numCols]
         except Exception, e:
             return []
Example #4
0
    def _createColumnSpec(self, cols, addAnyExtraFixedCols=True):
        GtrackGenomeElementSource._createColumnSpec(self, cols,
                                                    addAnyExtraFixedCols)

        self._headerDict[
            'track type'] = GtrackGenomeElementSource.getTrackTypeFromColumnSpec(
                self._columnSpec)
Example #5
0
 def _getHeaders(prevChoices):
     numCols = TabularToGtrackTool._getFileContentsInfo(prevChoices).numCols
     if prevChoices.columnSelection != 'Base columns on existing GTrack file':
         header = []
         for i in xrange(numCols):
             if hasattr(prevChoices, 'column%s' % i):
                 colHeader = getattr(prevChoices, 'column%s' % i)
                 if colHeader is None or colHeader == '-- ignore --':
                     header.append('')
                 elif colHeader == '-- custom --':
                     header.append(getattr(prevChoices, 'customColumn%s' % i).strip())
                 else:
                     header.append(colHeader)
             else:
                 header.append('')
         return header
     else:
         genome = prevChoices.genome if prevChoices.selectGenome == 'Yes' else None
         try:
             inFn = ExternalTrackManager.extractFnFromGalaxyTN(prevChoices.colSpecFile.split(':'))
             geSource = GtrackGenomeElementSource(inFn, genome=genome)
             geSource.parseFirstDataLine()
             return geSource.getColumns()[:numCols]
         except Exception, e:
             return []
def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)
    
    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError
        
    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()
    
    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension']
    
    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer    
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()
        
    return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
 def getOptionsBox6(prevChoices):
     if prevChoices[3]:
         extraDbColumnsDict = OrderedDict()
         fnSource = ExternalTrackManager.extractFnFromGalaxyTN(prevChoices[2].split(':'))
         fnDB = ExternalTrackManager.extractFnFromGalaxyTN(prevChoices[3].split(':'))
         
         gtrackDB = GtrackGenomeElementSource(fnDB)
         gtrackSource = GtrackGenomeElementSource(fnSource)
         
         extraDbColumns = [v for v in gtrackDB.getColumns() if not v in gtrackSource.getColumns()] #list(set(gtrackDBColumnSpec) - set(gtrackSourceColumnSpec))
         for column in extraDbColumns:
             extraDbColumnsDict[column] = False
         return extraDbColumnsDict
Example #8
0
    def _determineHeaderLines(self, hbColumns, columns):
        self._setHeaderDict('track type',
                            Gtrack.getTrackTypeFromColumnSpec(columns))
        self._setHeaderDict('value type', self._getGtrackValueType())
        self._setHeaderDict(
            'value dimension',
            Gtrack.getGtrackValueDimension(self._geSource.getValDim()))
        self._setHeaderDict('undirected edges',
                            self._geSource.hasUndirectedEdges())
        self._setHeaderDict('edge weights', ('weights' in hbColumns))
        self._setHeaderDict('edge weight type',
                            self._getGtrackEdgeWeightType())
        self._setHeaderDict(
            'edge weight dimension',
            Gtrack.getGtrackValueDimension(self._geSource.getEdgeWeightDim()))
        self._setHeaderDict('uninterrupted data lines',
                            not self._hasMoreThanOneBoundingRegion())
        self._setHeaderDict('sorted elements', self._geSource.isSorted())
        self._setHeaderDict('no overlapping elements',
                            self._geSource.hasNoOverlappingElements())
        self._setHeaderDict('circular elements',
                            self._geSource.hasCircularElements())

        compliesToSubtype = False
        if self._USE_EXTENDED_GTRACK:
            self._setHeaderDict('fixed length',
                                self._geSource.getFixedLength())
            self._setHeaderDict('fixed gap size',
                                self._geSource.getFixedGapSize())
            self._setHeaderDict('fixed-size data lines',
                                self._determineIfFixedSizeDataLines(columns))
            if self._headerDict['fixed-size data lines']:
                self._setHeaderDict('data line size',
                                    self._geSource.getValDim())

            hbColumns, columns = self._adjustColumnsAccordingToHeaderLines(
                hbColumns, columns)
            hbColumns, columns, compliesToSubtype = self._determineIfFileCompliesToSubtypes(
                hbColumns, columns)

        if not compliesToSubtype:
            self._setHeaderDict('1-indexed',
                                self._geSource.inputIsOneIndexed())
            self._setHeaderDict('end inclusive',
                                self._geSource.inputIsEndInclusive())

        for header, val in self._forcedHeaderDict.iteritems():
            if header not in self._headerDict:
                self._headerDict[header] = val

        return hbColumns, columns
 def _checkValidEnd(self, chr, end, start=None):
     if start is not None and end <= start:
         if not self._headerDict['circular elements']:
             self._headerDict['circular elements'] = True
             start = None
     
     return GtrackGenomeElementSource._checkValidEnd(self, chr, end, start)
Example #10
0
 def getGeSourceList(cls, genome, tracks):
     from quick.application.ExternalTrackManager import ExternalTrackManager
     from gold.origdata.BedGenomeElementSource import BedGenomeElementSource, BedCategoryGenomeElementSource
     from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
     from gold.origdata.TrackGenomeElementSource import FullTrackGenomeElementSource
     geSourceList = []
     trackNamesWithoutPath = []
     for track in tracks:
         try:
             fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                 track)
             fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
             if fileType == 'category.bed':
                 geSourceList.append(BedCategoryGenomeElementSource(fn))
             elif fileType == 'gtrack':
                 geSourceList.append(GtrackGenomeElementSource(fn))
             else:
                 geSourceList.append(BedGenomeElementSource(fn))
             trackNamesWithoutPath.append(
                 ExternalTrackManager.extractNameFromHistoryTN(track))
         except:  # it is not a history, must be in HB track repository
             geSourceList.append(
                 FullTrackGenomeElementSource(genome,
                                              track,
                                              allowOverlaps=True))
             trackNamesWithoutPath.append(':'.join(track))
     return geSourceList, trackNamesWithoutPath
Example #11
0
    def _getValInCorrectType(self,
                             val,
                             valueOrEdgeWeight='value',
                             isEmptyElement=False):
        headerDictInFile = self.getHeaderDictInFile()

        valTypeList = ['binary', 'number', 'category', 'character']
        for i, valueType in enumerate(valTypeList):
            if valueOrEdgeWeight in self._valTypeIndexDict and self._valTypeIndexDict[
                    valueOrEdgeWeight] > i:
                continue

            valTypeInfo = GtrackGenomeElementSource.VAL_TYPE_DICT[valueType]

            if self._isValOfParticularType(val, valTypeInfo):
                self._noteIfAllValuesAreMissing(valueOrEdgeWeight, val,
                                                valTypeInfo)
                self._valTypeIndexDict[valueOrEdgeWeight] = i

                valueDim = self._getGtrackValueDim(val, valTypeInfo,
                                                   valueOrEdgeWeight)

                if not '%s type' % valueOrEdgeWeight in headerDictInFile:
                    self._headerDict['%s type' %
                                     valueOrEdgeWeight] = valTypeList[i]
                if not '%s dimension' % valueOrEdgeWeight in headerDictInFile:
                    self._headerDict['%s dimension' %
                                     valueOrEdgeWeight] = valueDim

                return GtrackGenomeElementSource._getValInCorrectType(
                    self, val, valueOrEdgeWeight, isEmptyElement)
        raise ShouldNotOccurError()
Example #12
0
    def _checkValidEnd(self, chr, end, start=None):
        if start is not None and end <= start:
            if not self._headerDict['circular elements']:
                self._headerDict['circular elements'] = True
                start = None

        return GtrackGenomeElementSource._checkValidEnd(self, chr, end, start)
Example #13
0
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.application.ExternalTrackManager import ExternalTrackManager

        genome = choices[0]
        preProcTN1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
            genome, choices[2].split(
                ':')) if choices[1] == 'history' else choices[2].split(':')
        chrSizeDict = dict([(chrom, GenomeInfo.getChrLen(genome, chrom))
                            for chrom in GenomeInfo.getChrList(genome)])

        trackType = choices[3].split(':')[1]
        fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
            choices[3].split(':'))

        if trackType in ['valued.bed', 'category.bed', 'bed']:
            geSource = GenomeElementSorter(
                BedGenomeElementSource(fnSource, genome=genome)).__iter__()

        elif trackType == 'gtrack':
            geSource = GenomeElementSorter(
                GtrackGenomeElementSource(fnSource, genome=genome)).__iter__()
            #headLinesStr = geSource.getHeaderLines().replace('##','\n##')
        else:
            raise InvalidFormatError(
                'The Binning must be of the following formats: gtrack, valued.bed, category.bed ,bed ...'
            )

        cls.PrintResultToHistItem(galaxyFn, geSource, preProcTN1, genome,
                                  username)
Example #14
0
def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)
    
    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError
        
    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()
    
    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension']
    
    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer    
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()
        
    return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
Example #15
0
 def __init__(self, geSource, fullDbDict, gtrackColsToAdd):
     self._prefixesToAdd = [GtrackGenomeElementSource.convertNameFromGtrack(col) for col in gtrackColsToAdd]
     if 'edges' in self._prefixesToAdd:
         self._prefixesToAdd.append('weights')
         
     ElementModifierGESourceWrapper.__init__(self, geSource)
     
     self._fullDbDict = fullDbDict
     self._prefixList = geSource.getPrefixList() + self._prefixesToAdd
 def _parseEdges(self, edgeStr):
     if edgeStr != '.':
         for edgeSpec in edgeStr.split(';'):
             if '=' in edgeStr:
                 if not self._headerDict['edge weights']:
                     self._headerDict['edge weights'] = True
                 self._getValInCorrectType(edgeSpec.split('=')[1], 'edge weight')
     
     return GtrackGenomeElementSource._parseEdges(self, edgeStr)
 def __init__(self, geSource, fullDbDict, gtrackColsToAdd):
     self._prefixesToAdd = [GtrackGenomeElementSource.convertNameFromGtrack(col) for col in gtrackColsToAdd]
     if 'edges' in self._prefixesToAdd:
         self._prefixesToAdd.append('weights')
         
     ElementModifierGESourceWrapper.__init__(self, geSource)
     
     self._fullDbDict = fullDbDict
     self._prefixList = geSource.getPrefixList() + self._prefixesToAdd
Example #18
0
    def _parseEdges(self, edgeStr):
        if edgeStr != '.':
            for edgeSpec in edgeStr.split(';'):
                if '=' in edgeSpec:
                    if not self._headerDict['edge weights']:
                        self._headerDict['edge weights'] = True
                    self._getValInCorrectType(
                        edgeSpec.split('=')[1], 'edge weight')

        return GtrackGenomeElementSource._parseEdges(self, edgeStr)
 def _iter(self):
     self._valTypeIndexDict = {}
     self._valLenDict = {}
     self._allMissingDict = {}
     
     #self._headerDict['no overlapping elements'] = True
     self._headerDict['sorted elements'] = True            
     if self._headerDict['track type'].startswith('linked'):
         self._headerDict['undirected edges'] = True
     
     return GtrackGenomeElementSource._iter(self)
Example #20
0
    def _iter(self):
        self._valTypeIndexDict = {}
        self._valLenDict = {}
        self._allMissingDict = {}

        #self._headerDict['no overlapping elements'] = True
        self._headerDict['sorted elements'] = True
        if self._headerDict['track type'].startswith('linked'):
            self._headerDict['undirected edges'] = True

        return GtrackGenomeElementSource._iter(self)
 def getOptionsBoxTrackType(prevChoices):
     if prevChoices.columnSelection == 'Base columns on existing GTrack file' and not prevChoices.colSpecFile:
         return
     
     if prevChoices.history or prevChoices.input:
         trackType = GtrackGenomeElementSource.getTrackTypeFromColumnSpec(TabularToGtrackTool._getHeaders(prevChoices))
         if trackType is not None:
             words = [x.capitalize() for x in trackType.split()]
             abbrv = ''.join([x[0] for x in words])
             fullTrackType = ' '.join(words) + ' (%s)' % abbrv
             return (fullTrackType, 1, True)
 def _getGtrackValueDim(self, val, valTypeInfo, valueOrEdgeWeight):
     valLen = len(val.split(valTypeInfo.delim) if valTypeInfo.delim != '' else val)
     
     if valueOrEdgeWeight in self._valLenDict:
         if self._valLenDict[valueOrEdgeWeight] != valLen:
             self._valLenDict[valueOrEdgeWeight] = 0
     else:
         self._valLenDict[valueOrEdgeWeight] = valLen
     
     valDim = GtrackGenomeElementSource.getGtrackValueDimension(self._valLenDict[valueOrEdgeWeight])
         
     return valDim
 def _composeBoundingRegionLine(self, boundingRegionTuple):
     region = copy(boundingRegionTuple.region)
     
     if self._headerDict['1-indexed']:
         region.start = region.start+1 if region.start is not None else None
         region.end = region.end+1 if region.end is not None else None
     if self._headerDict['end inclusive']:
         region.end = region.end-1 if region.end is not None else None
         
     brLinePartList = [(Gtrack.convertNameToGtrack(attr), getattr(region, attr)) for attr in ['genome', 'chr', 'start', 'end']]
     return '####' + '; '.join(k + '=' + self._formatPhraseWithCorrectChrUsage(str(v), useUrlEncoding=True, notAllowedChars='=;#\t') \
                               for k,v in brLinePartList if v is not None) + os.linesep
 def _handleEndOfFile(self):
     GtrackGenomeElementSource._handleEndOfFile(self)
     
     #To fix an issue where value dimension is "list" if the value type was wrongly
     #guessed for early elements.
     
     newIter = self.__iter__()
     newIter._valTypeIndexDict = self._valTypeIndexDict
     newIter._handleEndOfFile = newIter._basicHandleEndOfFile
     
     try:
         while True:
             newIter.next()
     except StopIteration:
         pass
     
     self._valLenDict = newIter._valLenDict
     if len(self._uniqueEdgeIds) == 0:
         self._headerDict['undirected edges'] = False
     
     for valueOrEdgeWeight in ['value', 'edge weight']:
         if valueOrEdgeWeight in newIter._allMissingDict and newIter._allMissingDict[valueOrEdgeWeight] == True:
             self._headerDict['%s type' % valueOrEdgeWeight] = 'number'
Example #25
0
    def _getGtrackValueDim(self, val, valTypeInfo, valueOrEdgeWeight):
        valLen = len(
            val.split(valTypeInfo.delim) if valTypeInfo.delim != '' else val)

        if valueOrEdgeWeight in self._valLenDict:
            if self._valLenDict[valueOrEdgeWeight] != valLen:
                self._valLenDict[valueOrEdgeWeight] = 0
        else:
            self._valLenDict[valueOrEdgeWeight] = valLen

        valDim = GtrackGenomeElementSource.getGtrackValueDimension(
            self._valLenDict[valueOrEdgeWeight])

        return valDim
Example #26
0
    def _composeBoundingRegionLine(self, boundingRegionTuple):
        region = boundingRegionTuple.region.getCopy()

        if self._headerDict['1-indexed']:
            region.start = region.start + 1 if region.start is not None else None
            region.end = region.end + 1 if region.end is not None else None
        if self._headerDict['end inclusive']:
            region.end = region.end - 1 if region.end is not None else None

        brLinePartList = [(Gtrack.convertNameToGtrack(attr),
                           getattr(region, attr))
                          for attr in ['genome', 'chr', 'start', 'end']]
        return '####' + '; '.join(k + '=' + self._formatPhraseWithCorrectChrUsage(str(v), useUrlEncoding=True, notAllowedChars='=;#\t') \
                                  for k,v in brLinePartList if v is not None) + os.linesep
Example #27
0
    def _handleEndOfFile(self):
        GtrackGenomeElementSource._handleEndOfFile(self)

        #To fix an issue where value dimension is "list" if the value type was wrongly
        #guessed for early elements.

        newIter = self.__iter__()
        newIter._valTypeIndexDict = self._valTypeIndexDict
        newIter._handleEndOfFile = newIter._basicHandleEndOfFile

        try:
            while True:
                newIter.next()
        except StopIteration:
            pass

        self._valLenDict = newIter._valLenDict
        if len(self._uniqueEdgeIds) == 0:
            self._headerDict['undirected edges'] = False

        for valueOrEdgeWeight in ['value', 'edge weight']:
            if valueOrEdgeWeight in newIter._allMissingDict and newIter._allMissingDict[
                    valueOrEdgeWeight] == True:
                self._headerDict['%s type' % valueOrEdgeWeight] = 'number'
Example #28
0
    def getOptionsBoxTrackType(prevChoices):
        if prevChoices.columnSelection == 'Base columns on existing GTrack file' and not prevChoices.colSpecFile:
            return

        if prevChoices.history or prevChoices.input:
            headers = set(TabularToGtrackTool._getHeaders(prevChoices))
            if prevChoices.createDense == 'Yes' and 'start' in headers:
                headers.remove('start')

            if not 'edges' in headers and TabularToGtrackTool._create3dData(prevChoices):
                headers.add('edges')

            trackType = GtrackGenomeElementSource.getTrackTypeFromColumnSpec(headers)
            if trackType is not None:
                words = [x.capitalize() for x in trackType.split()]
                abbrv = ''.join([x[0] for x in words])
                fullTrackType = ' '.join(words) + ' (%s)' % abbrv
                return (fullTrackType, 1, True)
Example #29
0
def getGeSource(track, genome=None):
    from quick.application.ExternalTrackManager import ExternalTrackManager
    from gold.origdata.BedGenomeElementSource import BedGenomeElementSource, BedCategoryGenomeElementSource
    from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
    from gold.origdata.TrackGenomeElementSource import FullTrackGenomeElementSource

    if isinstance(track, basestring):
        track = track.split(':')

    try:
        fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(track)
        fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
        if fileType == 'category.bed':
            return BedCategoryGenomeElementSource(fn)
        elif fileType == 'gtrack':
            return GtrackGenomeElementSource(fn)
        else:
            return BedGenomeElementSource(fn)
    except:
        return FullTrackGenomeElementSource(genome, track, allowOverlaps=False)
    def _getValInCorrectType(self, val, valueOrEdgeWeight='value', isEmptyElement=False):
        valTypeList = ['binary', 'number', 'category', 'character']
        for i,valueType in enumerate(valTypeList):
            if valueOrEdgeWeight in self._valTypeIndexDict and self._valTypeIndexDict[valueOrEdgeWeight] > i:
                continue
            
            valTypeInfo = GtrackGenomeElementSource.VAL_TYPE_DICT[valueType]
            
            if self._isValOfParticularType(val, valTypeInfo):
                self._noteIfAllValuesAreMissing(valueOrEdgeWeight, val, valTypeInfo)
                self._valTypeIndexDict[valueOrEdgeWeight] = i

                valueDim = self._getGtrackValueDim(val, valTypeInfo, valueOrEdgeWeight)

                if not '%s type' % valueOrEdgeWeight in self.getHeaderDictInFile():
                    self._headerDict['%s type' % valueOrEdgeWeight] = valTypeList[i]
                if not '%s dimension' % valueOrEdgeWeight in self.getHeaderDictInFile():
                    self._headerDict['%s dimension' % valueOrEdgeWeight] = valueDim
                
                return GtrackGenomeElementSource._getValInCorrectType(self, val, valueOrEdgeWeight, isEmptyElement)
        raise ShouldNotOccurError()
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''

        fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
            choices[2].split(':'))

        core = HtmlCore()
        core.begin()

        valid = False
        try:
            core.header('Validating GTrack headers')
            core.styleInfoBegin(styleClass='debug')

            print str(core)
            core = HtmlCore()

            gtrackSource = GtrackGenomeElementSource(
                fnSource,
                choices[1] if choices[0] == 'Yes' else None,
                printWarnings=True)

            core.append('Done')
            core.styleInfoEnd()
            core.header('Validating complete GTrack file')
            core.styleInfoBegin(styleClass='debug')

            print str(core)
            core = HtmlCore()

            try:
                for ge in gtrackSource:
                    pass
            except Exception, e:
                raise
            else:
    def execute(cls, choices, galaxyFn=None, username=''):

        outputFile =  open(galaxyFn, 'w')
        genome = choices[0]
        histItem = choices[2]
        trackItem = choices[3]
        chromRegsPath = GenomeInfo.getChrRegsFn(genome)
        
        chrSizeDict =  dict([ ( chr, GenomeInfo.getChrLen(genome, chr)) for chr in GenomeInfo.getChrList(genome)])
        geSource = headLinesStr = None
        if choices[1] == 'history':
            
            trackType = choices[2].split(':')[1]
            
            from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile
            tempFn  = GalaxyRunSpecificFile(['fromHistory.'+trackType],galaxyFn).getDiskPath(True)
            
            fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':'))
            open(tempFn,'w').write(open(fnSource,'r').read())
        
            if trackType in ['valued.bed', 'category.bed', 'bed']:
                geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__()
            
            elif trackType == 'gtrack':
                geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__()
                headLinesStr = geSource.getHeaderLines().replace('##','\n##')
            
            cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True)
            os.remove(tempFn)
        
        else:
            writeHeaderFlag = True
            for chr in GenomeInfo.getChrList(genome):
                gRegion = GenomeRegion(genome, chr, 0, chrSizeDict[chr])
                plTrack = PlainTrack(trackItem.split(':'))
                geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__()
                cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag)
                writeHeaderFlag = False    
        outputFile.close()
Example #33
0
    def getOptionsBox6(prevChoices):
        if prevChoices[3]:
            extraDbColumnsDict = OrderedDict()
            fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
                prevChoices[2].split(':'))
            fnDB = ExternalTrackManager.extractFnFromGalaxyTN(
                prevChoices[3].split(':'))

            gtrackDB = GtrackGenomeElementSource(fnDB)
            gtrackSource = GtrackGenomeElementSource(fnSource)

            extraDbColumns = [
                v for v in gtrackDB.getColumns()
                if not v in gtrackSource.getColumns()
            ]  #list(set(gtrackDBColumnSpec) - set(gtrackSourceColumnSpec))
            for column in extraDbColumns:
                extraDbColumnsDict[column] = False
            return extraDbColumnsDict
Example #34
0
    def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
        if 'subtype url' in self._forcedHeaderDict:
            subtypeUrlList = [self._forcedHeaderDict['subtype url']] \
                if self._forcedHeaderDict['subtype url'] != '' else []
        else:
            subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST

        for subtypeUrl in subtypeUrlList:
            subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
            subtypeColumns = subtypeGESource.getColumns(orig=False)
            subtypeHeaders = subtypeGESource.getHeaderDict()

            numRepeats = 2 if subtypeHeaders[
                'subtype adherence'] == 'redefinable' else 1

            for repeat in range(numRepeats):
                self._setHeaderDict('1-indexed', subtypeHeaders['1-indexed'])
                self._setHeaderDict('end inclusive',
                                    subtypeHeaders['end inclusive'])

                if subtypeHeaders['subtype adherence'] in [
                        'reorderable', 'free'
                ]:
                    rearrangedColumns = columns
                    rearrangedHbColumns = hbColumns
                else:
                    colSet = set(columns)
                    subtypeColSet = set(subtypeColumns)

                    if subtypeHeaders['subtype adherence'] == 'redefinable':
                        colsRemoved = list(subtypeColSet - colSet)
                        colsAdded = list(colSet - subtypeColSet)
                        if len(colsRemoved) != len(colsAdded) or len(
                                colsRemoved) > 2:
                            continue

                        colsRedefinedTo = [
                            'value', 'edges'
                        ] if repeat == 1 else ['edges', 'value']

                        rearrangedColumns = []
                        i, j = (0, 0)
                        for col in subtypeColumns:
                            if col in colsRemoved:
                                rearrangedColumns.append(colsRedefinedTo[i])
                                i += 1
                            elif col in colsRedefinedTo:
                                rearrangedColumns.append(colsAdded[j])
                                j += 1
                            else:
                                rearrangedColumns.append(col)

                        for col in columns:
                            if col in colsAdded[j:]:
                                rearrangedColumns.append(col)
                    else:
                        rearrangedColumns = [x for x in subtypeColumns if x in colSet] + \
                                            [x for x in columns if x not in subtypeColSet]
                    rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(
                        rearrangedColumns)

                try:
                    tempFile = StringIO()
                    self._composeContents(tempFile, rearrangedHbColumns, rearrangedColumns, \
                                          deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True)

                    gtrackGESource = Gtrack('subtypeTest.' + self.getDefaultFileNameSuffix(), printWarnings=False, \
                                            strToUseInsteadOfFn=tempFile.getvalue())
                    tempFile.close()

                    if gtrackGESource.compliesWithSubtype(subtypeUrl):
                        gtrackGESource._headerDict['subtype url'] = subtypeUrl
                        gtrackGESource._updateHeadersAccordingToSubtype()
                        updatedHeaders = OrderedDict([(key, val) for key,val in gtrackGESource.getHeaderDict().iteritems() \
                                          if val != Gtrack.DEFAULT_HEADER_DICT.get(key)])
                        for header in updatedHeaders:
                            self._setHeaderDict(header, updatedHeaders[header])

                        return rearrangedHbColumns, rearrangedColumns, True
                except Exception, e:
                    continue
Example #35
0
 def _getHbColumnsFromGtrackColumns(self, columns):
     return [Gtrack.convertNameFromGtrack(col) for col in columns]
 def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
     if 'subtype url' in self._forcedHeaderDict:
         subtypeUrlList = [self._forcedHeaderDict['subtype url']] \
             if self._forcedHeaderDict['subtype url'] != '' else []
     else:
         subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST
 
     for subtypeUrl in subtypeUrlList:
         subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
         subtypeColumns = subtypeGESource.getColumns(orig=False)
         subtypeHeaders = subtypeGESource.getHeaderDict()
         
         numRepeats = 2 if subtypeHeaders['subtype adherence'] == 'redefinable' else 1
         
         for repeat in range(numRepeats):
             self._setHeaderDict('1-indexed', subtypeHeaders['1-indexed'])
             self._setHeaderDict('end inclusive', subtypeHeaders['end inclusive'])
             
             if subtypeHeaders['subtype adherence'] in ['reorderable', 'free']:
                 rearrangedColumns = columns
                 rearrangedHbColumns = hbColumns
             else:
                 colSet = set(columns)
                 subtypeColSet = set(subtypeColumns)
                 
                 if subtypeHeaders['subtype adherence'] == 'redefinable':
                     colsRemoved = list(subtypeColSet - colSet)
                     colsAdded = list(colSet - subtypeColSet)
                     if len(colsRemoved) != len(colsAdded) or len(colsRemoved) > 2:
                         continue
                     
                     colsRedefinedTo = ['value', 'edges'] if repeat == 1 else ['edges', 'value']
                     
                     rearrangedColumns = []
                     i,j = (0,0)
                     for col in subtypeColumns:
                         if col in colsRemoved:
                             rearrangedColumns.append(colsRedefinedTo[i])
                             i += 1
                         elif col in colsRedefinedTo:
                             rearrangedColumns.append(colsAdded[j])
                             j += 1
                         else:
                             rearrangedColumns.append(col)
                             
                     for col in columns:
                         if col in colsAdded[j:]:
                             rearrangedColumns.append(col)
                 else:
                     rearrangedColumns = [x for x in subtypeColumns if x in colSet] + \
                                         [x for x in columns if x not in subtypeColSet]
                 rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(rearrangedColumns)
             
             try:
                 tempFile = StringIO()
                 self._composeContents(tempFile, rearrangedHbColumns, rearrangedColumns, \
                                       deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True)
                     
                 gtrackGESource = Gtrack('subtypeTest.' + self.getDefaultFileNameSuffix(), printWarnings=False, \
                                         strToUseInsteadOfFn=tempFile.getvalue())
                 tempFile.close()
                 
                 if gtrackGESource.compliesWithSubtype(subtypeUrl):
                     gtrackGESource._headerDict['subtype url'] = subtypeUrl
                     gtrackGESource._updateHeadersAccordingToSubtype()
                     updatedHeaders = OrderedDict([(key, val) for key,val in gtrackGESource.getHeaderDict().iteritems() \
                                       if val != Gtrack.DEFAULT_HEADER_DICT.get(key)])
                     for header in updatedHeaders:
                         self._setHeaderDict(header, updatedHeaders[header])
                     
                     return rearrangedHbColumns, rearrangedColumns, True
             except Exception, e:
                 continue
Example #37
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''

        try:
            if choices.history:
                inputFile = open(ExternalTrackManager.extractFnFromGalaxyTN(choices.history.split(':')), 'r')
            else:
                inputFile = StringIO(choices.input)

            headers = cls._getHeaders(choices)
            headerIdxs = {}
            for i, header in enumerate(headers):
                headerIdxs[header] = i

            createDense = choices.createDense == 'Yes'
            if createDense:
                firstRegInBlock = None
                curReg = None

                assert headerIdxs['seqid'] is not None
                assert headerIdxs['start'] is not None

                headers[headerIdxs['start']] = ''

            create3dData = cls._create3dData(choices)
            if create3dData:
                if any(x in headers for x in ['id', 'edges']):
                    print >> sys.stderr, "Error: when using the special 3D input columns 'linked_seqid' and " + \
                                         "'linked_start', the columns 'id' and 'edges' must not " + \
                                         "be specified in addition."
                    return

                for header in ['linked_seqid', 'linked_start', 'linked_end', 'link_weight']:
                    if header in headerIdxs:
                        headers[headerIdxs[header]] = ''

                for header in ['id', 'edges']:
                    headerIdxs[header] = len(headers)
                    headers += [header]

                regs = []
                regIdx = 0
                prevRegIdx = 0
                idDict = {}
                idCount = 0
                curCols = None
                prevLine = ''

                firstRegInBlock = None
                curReg = None
                prev3dReg = None
                nextReg = None

            if createDense or create3dData:
                newInputFile = NamedTemporaryFile()
                sortedInputFile = NamedTemporaryFile()

            colIndexes = [i for i, header in enumerate(headers) if header != '']
            numSkipLines = cls._getNumSkipLines(choices)

            tempContents = NamedTemporaryFile()
            tempDataLines = NamedTemporaryFile()

            if choices.indexing == '1-indexed, end inclusive':
                tempContents.write('##1-indexed: true' + os.linesep)
                tempContents.write('##end inclusive: true' + os.linesep)

            tempContents.write('###' + '\t'.join([headers[i] for i in colIndexes]) + os.linesep)

            for passType in ['pre','final'] if createDense or create3dData else ['final']:
                for i in xrange(numSkipLines):
                    inputFile.readline()

                splitChar = cls._getSplitChar(choices)
                numCols = cls._getFileContentsInfo(choices).numCols
                regionsDecreased = False

                autoCorrectSeqId = choices.handleSeqId == 'Yes, auto-correct to the best match in the genome build'
                cropCrossingSegments = choices.cropCrossingSegments == 'Yes'
                genome = choices.genome

                for i, line in enumerate(inputFile):
                    if line == '' or len(line) > 0 and line[0] == '#':
                        pass

                    cols = [x.strip() for x in line.strip().split(splitChar)]
                    if create3dData:
                        cols += ['', '']

                    for j in colIndexes:
                        if len(cols) <= j:
                            print >> sys.stderr, "Error in line #%s: %s" % (i+1, line)
                            print >> sys.stderr, "The line does not include the column #%s, which is defined with " \
                                                 "the name '%s' (the number of columns is %s). Please fix the input " \
                                                 "file or redefine the column names of this column." \
                                                 % (j+1, headers[j], len(cols))
                            return

                    if autoCorrectSeqId:
                        from quick.util.GenomeInfo import GenomeInfo
                        cols[headerIdxs['seqid']] = GenomeInfo.findBestMatchingChr(genome, cols[headerIdxs['seqid']])

                    for j, col in enumerate(cols):
                        if col == '':
                            cols[j] = '.'
                        else:
                            cols[j] = formatPhraseWithCorrectChrUsage(col, notAllowedChars='#\t')

                    if cropCrossingSegments:
                        from quick.util.GenomeInfo import GenomeInfo
                        for seqidHdr, startHdr, endHdr in [('seqid','start','end')] \
                                + ([('linked_seqid','linked_start','linked_end')] if create3dData else []):
                            if endHdr in headerIdxs:
                                seqid = cols[headerIdxs[seqidHdr]]
                                start = cols[headerIdxs[startHdr]]
                                end = cols[headerIdxs[endHdr]]
                                if not any(x == '.' for x in [seqid, start, end]):
                                    start, end = int(start), int(end)
                                    if choices.indexing == '1-indexed, end inclusive':
                                        start -= 1
                                    chrLen = GenomeInfo().getChrLen(genome, seqid)
                                    if start < chrLen and end > chrLen:
                                        cols[headerIdxs[endHdr]] = str(chrLen)

                    if createDense or create3dData:
                        prevReg = curReg
                        curReg = cls._getGenomeRegion(cols[headerIdxs['seqid']], cols[headerIdxs['start']], \
                                                                       cols[headerIdxs['end']] if headerIdxs.get('end') else None)

                        if passType == 'pre':
                            newInputFile.write(line.strip() + os.linesep)

                            if create3dData:
                                id = curReg.strShort()
                                if id not in idDict:
                                    regs.append(curReg)
                                    idDict[id] = ''

                                linkedReg = cls._getGenomeRegion(cols[headerIdxs['linked_seqid']], cols[headerIdxs['linked_start']], \
                                                                                     cols[headerIdxs['linked_end']] if 'end' in headerIdxs else None)
                                if choices.undirected == 'Yes' and linkedReg and linkedReg != curReg:
                                    id = linkedReg.strShort()
                                    if id not in idDict:
                                        regs.append(linkedReg)
                                        idDict[id] = ''

                                    cols[headerIdxs['seqid']], cols[headerIdxs['linked_seqid']] = cols[headerIdxs['linked_seqid']], cols[headerIdxs['seqid']]
                                    cols[headerIdxs['start']], cols[headerIdxs['linked_start']] = cols[headerIdxs['linked_start']], cols[headerIdxs['start']]
                                    if 'end' in headerIdxs:
                                        cols[headerIdxs['end']], cols[headerIdxs['linked_end']] = cols[headerIdxs['linked_end']], cols[headerIdxs['end']]

                                    newInputFile.write(splitChar.join(cols[:-2]) + os.linesep)

                        else: #passType == 'final':
                            if firstRegInBlock is None:
                                firstRegInBlock = curReg

                            if create3dData:
                                if curReg != prevReg:
                                    prevCols = curCols
                                    prevRegIdx = regIdx
                                    regIdx = 0
                                    id = curReg.strShort()
                                    curCols = copy(cols)
                                    curCols[headerIdxs['id']] = idDict[id] if choices.idGeneration == 'Counting' else id
                                    curCols[headerIdxs['edges']] = ''

                                linkedReg = cls._getGenomeRegion(cols[headerIdxs['linked_seqid']], cols[headerIdxs['linked_start']], \
                                                                                 cols[headerIdxs['linked_end']] if 'end' in headerIdxs else None)

                                if linkedReg:
                                    edges = curCols[headerIdxs['edges']]

                                    if edges != '':
                                        edges += ';'

                                    id = linkedReg.strShort()
                                    if id not in idDict:
                                        raise InvalidFormatError("Error: linked region '%s' is not present in tabular file. Line: %s" % (linkedReg, line))

                                    if choices.complete == 'Yes':
                                        while regIdx < len(regs) and regs[regIdx] != linkedReg:
                                            missingId = regs[regIdx].strShort()
                                            edges += '%s=.;' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId)
                                            regIdx += 1

                                    edges += idDict[id] if choices.idGeneration == 'Counting' else id
                                    if 'link_weight' in headerIdxs:
                                        edges += '=' + formatPhraseWithCorrectChrUsage(
                                                           cols[headerIdxs['link_weight']],
                                                           notAllowedChars='#\t')
                                    regIdx += 1

                                    curCols[headerIdxs['edges']] = edges

                                if curReg != prevReg and prevCols:
                                    if choices.complete == 'Yes':
                                        for i in xrange(prevRegIdx, len(regs)):
                                            missingId = regs[i].strShort()
                                            if i != 0:
                                                prevCols[headerIdxs['edges']] += ';'
                                            prevCols[headerIdxs['edges']] += '%s=.' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId)

                                    if prevCols[headerIdxs['edges']] == '':
                                        prevCols[headerIdxs['edges']] = '.'

                                    cls._checkOverlap(prev3dReg, prevReg, prevLine)

                                    if createDense:
                                        firstRegInBlock, tempDataLines = cls._writeBlockLines \
                                            (firstRegInBlock, prev3dReg, prevReg, tempContents, tempDataLines)

                                    cls._writeDataLines(prevCols, colIndexes, tempDataLines)

                                    prev3dReg = prevReg
                                prevLine = line
                            else: #createDense
                                cls._checkOverlap(prevReg, curReg, line)

                                firstRegInBlock, tempDataLines = cls._writeBlockLines \
                                    (firstRegInBlock, prevReg, curReg, tempContents, tempDataLines)

                                cls._writeDataLines(cols, colIndexes, tempDataLines)
                    else:
                        cls._writeDataLines(cols, colIndexes, tempDataLines)

                if passType == 'pre':
                    newInputFile.flush()

                    inputFile.close()

                    sortCmd = ["sort", newInputFile.name, "-t$'%s'" % splitChar, "-s"] +\
                               ["-k%s,%s%s" % (headerIdxs[x]+1, headerIdxs[x]+1, s) if x in headerIdxs else "" \
                                for x,s in [('seqid',''), ('start','n'), ('end','n'), \
                                            ('linked_seqid',''), ('linked_start','n'), ('linked_end','n')]] +\
                                ["-o", sortedInputFile.name]
                    subprocess.call(' '.join(sortCmd), stderr=sys.stderr, stdout = sys.stdout, shell=True)

                    #print >> sys.stderr, ' '.join(sortCmd)
                    #os._exit(0)
                    newInputFile.close()

                    if create3dData:
                        regs = sorted(regs)
                        for i,reg in enumerate(regs):
                            idDict[reg.strShort()] = str(i)

                    inputFile = sortedInputFile
                    inputFile.seek(0)
                    numSkipLines = 0
                    curReg = None
                else: #passType == 'final':
                    if create3dData:
                        if choices.complete == 'Yes':
                            for i in xrange(regIdx, len(regs)):
                                missingId = regs[i].strShort()
                                if i != 0:
                                    curCols[headerIdxs['edges']] += ';'
                                curCols[headerIdxs['edges']] += '%s=.' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId)

                        if curCols[headerIdxs['edges']] == '':
                            curCols[headerIdxs['edges']] = '.'

                        cls._checkOverlap(prev3dReg, curReg, prevLine)

                        if createDense:
                            firstRegInBlock, tempDataLines = cls._writeBlockLines \
                                (firstRegInBlock, prev3dReg, curReg, tempContents, tempDataLines)

                        cls._writeDataLines(curCols, colIndexes, tempDataLines)

                    if createDense:
                        firstRegInBlock, tempDataLines = cls._writeBlockLines \
                            (firstRegInBlock, curReg, None, tempContents, tempDataLines)

                    tempDataLines.flush()
                    tempDataLines.seek(0)
                    tempContents.write(tempDataLines.read())
                    tempContents.flush()
                    tempContents.seek(0)

            #print tempContents.read()
            #tempContents.seek(0)

            expandHeadersOfGtrackFileAndReturnComposer(tempContents.name).composeToFile(galaxyFn)
            geSource = GtrackGenomeElementSource(galaxyFn, genome=genome, printWarnings=False)
            for ge in geSource:
                pass

        except Exception, e:
            print >> sys.stderr, e
            raise
 def _checkUndirectedEdges(self):     
     if self._headerDict['track type'].startswith('linked'):
         try:       
             GtrackGenomeElementSource._checkUndirectedEdges(self)
         except InvalidFormatError:
             self._headerDict['undirected edges'] = False
 def _basicHandleEndOfFile(self):
     GtrackGenomeElementSource._handleEndOfFile(self)
    def __init__(self, *args, **kwArgs):
        GtrackGenomeElementSource.__init__(self, *args, **kwArgs)

        self._noOverlappingElements = None
Example #41
0
    def printGSuite(cls, choices, cols, rows, colListString, outFile):
        #print cols
        from quick.extra.ProgressViewer import ProgressViewer

        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer

        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.GtrackComposer import ExtendedGtrackComposer
        from gold.origdata.GESourceWrapper import ListGESourceWrapper
        from gold.origdata.GenomeElement import GenomeElement

        from collections import defaultdict
        from copy import copy
        from urllib import quote

        from unidecode import unidecode
        from pyliftover import LiftOver

        gSuite = GSuite()

        diseaseColIndex = cols.index(cls.DISEASE_COLUMN_NAME)
        chrColIndex = cols.index(cls.CHR_COLUMN_NAME)
        startColIndex = cols.index(cls.START_COLUMN_NAME)
        valColIndex = cols.index(cls.VAL_COLUMN_NAME)
        
        orderedExtraKeys = copy(cols)
        extraIndexes = range(len(cols))
        for colName in [cls.DISEASE_COLUMN_NAME, cls.CHR_COLUMN_NAME,
                        cls.START_COLUMN_NAME, cls.VAL_COLUMN_NAME]:
            extraIndexes.remove(cols.index(colName))
            orderedExtraKeys.remove(colName)
        orderedExtraKeys = [cls._fixColNameForGTrack(key) for key in orderedExtraKeys]

        diseaseToRowsDict = defaultdict(list)
        for row in rows:
            disease = row[diseaseColIndex]
            if isinstance(disease, unicode):
                disease = unidecode(disease).replace('\x00', '')

            diseaseToRowsDict[disease].append(row)

        progressViewer = ProgressViewer([('Create GWAS tracks for diseases/traits', len(diseaseToRowsDict))],
                                        cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE] )

        for disease in sorted(diseaseToRowsDict.keys()):
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE],
                                                extraFileName=disease.replace('/', '_') + '.gtrack')
            gSuiteTrack = GSuiteTrack(uri, title=disease, genome=cls.OUTPUT_GENOME)
            gSuite.addTrack(gSuiteTrack)

            shouldLiftOver = cls.DATABASE_GENOME != cls.OUTPUT_GENOME
            if shouldLiftOver:
                liftOver = LiftOver(cls.DATABASE_GENOME, cls.OUTPUT_GENOME)

            geList = []
            for row in diseaseToRowsDict[disease]:
                extra = {}
                for col, index in zip(orderedExtraKeys, extraIndexes):
                    cell = row[index].strip()
                    if isinstance(cell, unicode):
                        cell = unidecode(cell)

                    extra[col] = cell if cell != '' else '.'

                chrom = 'chr' + row[chrColIndex]
                if chrom == 'chr23':
                    chrom = 'chrX'
                if chrom == 'chr24':
                    chrom = 'chrY'
                if chrom == 'chrMT':
                    chrom = 'chrM'

                start = int(row[startColIndex])
                if shouldLiftOver:
                    newPosList = liftOver.convert_coordinate(chrom, start)
                    if newPosList is None or len(newPosList) != 1:
                        print 'SNP with position %s on chromosome %s ' % (chrom, start) +\
                              'could not be lifted over from reference genome ' +\
                              '%s to %s (for disease/trait "%s")' % \
                              (cls.DATABASE_GENOME, cls.OUTPUT_GENOME, disease)
                    else:
                        chrom, start = newPosList[0][0:2]
                #print extra
                geList.append(GenomeElement(chr=chrom, start=start,
                                            val=row[valColIndex], orderedExtraKeys=orderedExtraKeys,
                                            extra=extra))

            geSource = GtrackGenomeElementSource(cls.GTRACK_BLUEPRINT_PATH)
            wrappedGeSource = ListGESourceWrapper(geSource, geList)
            composer = ExtendedGtrackComposer(wrappedGeSource)
            composer.composeToFile(gSuiteTrack.path)

            progressViewer.update()

        GSuiteComposer.composeToFile(gSuite, outFile)
 def _isExpandableHeader(self, line, onlyGuaranteed):
     return self._isHeaderLine(line) and \
             ( (Gtrack.getHeaderKeyValue(line)[0] in EXPANDABLE_HEADERS) or \
                (not onlyGuaranteed and Gtrack.getHeaderKeyValue(line)[0] in NOT_GUARANTEED_EXPANDABLE_HEADERS) )
 def _getHbColumnsFromGtrackColumns(self, columns):
     return [Gtrack.convertNameFromGtrack(col) for col in columns]
Example #44
0
 def _getGtrackColumnsFromHbColumns(self, hbColumns):
     return [
         Gtrack.convertNameToGtrack(col) for col in hbColumns
         if col != 'weights'
     ]
 def _getGtrackColumnsFromHbColumns(self, hbColumns):
     return [Gtrack.convertNameToGtrack(col) for col in hbColumns if col != 'weights']
    def testHeaderExpansion(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_expand' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            onlyGuaranteed = 'no_types_expanded' in caseName
            
            print caseName
            print '==========='
            case = geSourceTest.cases[caseName]
            
            headerLines = [line if not self._isHeaderLine(line) else
                            '##' + ': '.join([str(x).lower() for x in Gtrack.getHeaderKeyValue(line.strip())])
                             for line in case.headerLines]
            
            fullContents = os.linesep.join(headerLines + case.lines)
            print 'Original:\n\n' + fullContents
            
            case.headerLines = [line for line in headerLines if not self._isExpandableHeader(line, onlyGuaranteed)]
            print '-----'
            print 'With headers removed:\n\n' + os.linesep.join(case.headerLines + case.lines)
            
            testFn = self._writeTestFile(case)
            
            expandedContents = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=False)

            print '-----'
            print 'With expanded headers:\n\n' + expandedContents
            
            expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=True)

            print '-----'
            print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults
            
            origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \
                                          if self._isExpandableHeader(line, onlyGuaranteed=False)])
            notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \
                                          if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)])
            expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \
                                    if self._isHeaderLine(line)])
            
            if 'no_check_expand' in caseName:
                print 'No checks for case: ' + caseName
            else:
                for header in origExpandableHeaders:
                    self.assertEquals(origExpandableHeaders[header], expandedHeaders[header])
                for header in notExpandableHeaders:
                    self.assertEquals(notExpandableHeaders[header], expandedHeaders[header])
                    
                for contents in [expandedContents, expandedContentsOnlyNonDefaults]:
                    
                    sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
                    forPreProcessor = True if case.sourceClass is None else False

                    stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \
                                                                          forPreProcessor=forPreProcessor, \
                                                                          printWarnings=False, \
                                                                          strToUseInsteadOfFn=contents))
                    
                    self.assertEquals(case.assertElementList, [ge for ge in stdGeSource])
                    self.assertEquals(case.boundingRegionsAssertList, [br for br in stdGeSource.getBoundingRegionTuples()])
 def _isValueNotKeptHeader(self, line):
     return self._isHeaderLine(line) and \
             Gtrack.getHeaderKeyValue(line)[0] in VALUE_NOT_KEPT_HEADERS
Example #48
0
    def execute(cls, choices, galaxyFn=None, username=''):

        from quick.application.ExternalTrackManager import ExternalTrackManager
        from collections import defaultdict
        from gold.origdata.BedGenomeElementSource import BedGenomeElementSource, BedCategoryGenomeElementSource
        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.TrackGenomeElementSource import FullTrackGenomeElementSource
        from urllib import unquote
        print choices

        genome = choices[0]
        geSourceList, labelNames = [], []
        selectedHists = [
            unquote(val).split(':') for id, val in choices[1].iteritems()
            if val
        ]
        inorout = [int(x) for x in choices[2].split(',')]
        selectedHists += [
            v.split(':') for v in choices[3:]
            if v not in ['-----  Select  -----', 'no', 'yes', None, '']
        ]
        for track in selectedHists:
            try:
                fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                    track)
                fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
                if fileType == 'category.bed':
                    geSourceList.append(BedCategoryGenomeElementSource(fn))
                elif fileType == 'gtrack':
                    geSourceList.append(GtrackGenomeElementSource(fn))
                else:
                    geSourceList.append(BedGenomeElementSource(fn))

                labelNames.append(
                    ExternalTrackManager.extractNameFromHistoryTN(track))
            except:
                geSourceList.append(
                    FullTrackGenomeElementSource(genome,
                                                 track,
                                                 allowOverlaps=False))
                #labelNames.append(track[-1])
                labelNames.append(':'.join(track))

        primeList = [
            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59
        ]
        resultCounter = defaultdict(int)
        posDict = defaultdict(list)
        catDict = defaultdict(list)

        debugstring = 'debug out:'

        for index, geSource in enumerate(geSourceList):
            primeNum = primeList[index]
            prevEnd = -1
            prevChr = ''
            for ge in geSource:

                posDict[ge.chr] += [ge.start, ge.end]
                catDict[ge.chr] += [primeNum, -primeNum]
                prevEnd = ge.end
                prevChr = ge.chr

        debugstring += 'posDict elements/2: ' + str(
            sum(len(v) for v in posDict.itervalues()) / 2) + '\n'
        debugstring += 'catDict elements/2: ' + str(
            sum(len(v) for v in catDict.itervalues()) / 2) + '\n'

        #maxState = reduce( lambda x, y: x*y, primeList[:len(geSourceList)] ) #assuming all tracks are in.
        selectedState = 1
        for n in range(len(geSourceList)):
            if inorout[n]:
                selectedState = selectedState * primeList[n]

        utfil = open(galaxyFn, 'w')
        for chrom in posDict.keys():
            indxSortedList = sorted(range(len(posDict[chrom])),
                                    key=posDict[chrom].__getitem__)

            posList = posDict[chrom]
            catList = catDict[chrom]
            catCoverageDepth = defaultdict(int)

            currentState = 1
            currentPos = 0

            for indx in indxSortedList:
                pos = posList[indx]
                primeVal = catList[indx]
                #print 'pos, primeVal: ', pos, primeVal
                #print 'resultCounter: ', resultCounter
                if currentPos != pos:
                    if abs(currentState) == selectedState:
                        print >> utfil, '%s\t%i\t%i' % (chrom, currentPos, pos)
                    resultCounter[abs(currentState)] += pos - currentPos
                    #debugstring +='resultCounter='+str(resultCounter)+ ' currentPos='+ str(currentPos) + '    pos='+str(pos)+ '   chrom='+str(chrom)+  '   primeVal='+str(primeVal)+ '    catCoverageDepth='+str(catCoverageDepth) +'<br/>'
                    #print 'resultCounter,currentState,  pos and currentPos',abs(currentState),':',  pos, currentPos
                    currentPos = pos

                if primeVal < 0:
                    catCoverageDepth[abs(primeVal)] -= 1
                    if catCoverageDepth[abs(primeVal)] == 0:
                        currentState /= primeVal
                else:
                    catCoverageDepth[primeVal] += 1
                    if catCoverageDepth[primeVal] == 1:
                        currentState *= primeVal

        utfil.close()
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.origdata.BedGenomeElementSource import BedCategoryGenomeElementSource
        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.TrackGenomeElementSource import TrackGenomeElementSource
        from gold.track.GenomeRegion import GenomeRegion
        from quick.util.GenomeInfo import GenomeInfo
        from collections import defaultdict

        genome = choices[0]
        track = choices[2].split(':')
        allowOverlaps = True if choices[3] == 'Yes' else False

        regionList = []
        for chrom in GenomeInfo.getChrList(genome):
            start = 0
            chromSize = GenomeInfo.getChrLen(genome, chrom)
            regionList.append(GenomeRegion(genome, chrom, start, chromSize))

        if choices[1] == 'From Hyperbrowser repository':
            geSource = TrackGenomeElementSource(genome, track, regionList)
        else:
            fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                track)
            fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
            geSource = BedCategoryGenomeElementSource(
                fn
            ) if fileType == 'category.bed' else GtrackGenomeElementSource(fn)

        resultMinDict = defaultdict(dict)
        resultMaxDict = defaultdict(dict)
        for ge in geSource:
            if resultMaxDict[ge.chr].has_key(ge.val):
                if ge.end:
                    if resultMaxDict[ge.chr][ge.val] < ge.end:
                        resultMaxDict[ge.chr][ge.val] = ge.end
                elif resultMaxDict[ge.chr][ge.val] < ge.start:
                    resultMaxDict[ge.chr][ge.val] = ge.start

                if resultMinDict[ge.chr][ge.val] > ge.start:
                    resultMinDict[ge.chr][ge.val] = ge.start
            else:
                resultMaxDict[ge.chr][ge.val] = ge.end if ge.end else ge.start
                resultMinDict[ge.chr][ge.val] = ge.start

        utfil = open(galaxyFn, 'w')
        quitFlag = False
        errorMsg = 'Error, overlapping regions '
        catsConflicting = []
        for chrom in sorted(resultMinDict.keys()):

            for category in resultMinDict[chrom].keys():
                lower, upper = resultMinDict[chrom][category], resultMaxDict[
                    chrom][category]
                if not allowOverlaps:
                    for cat in resultMinDict[chrom]:
                        if cat != category:
                            l, u = resultMinDict[chrom][cat], resultMaxDict[
                                chrom][cat]
                            if l >= upper or u <= lower:
                                continue
                            if l > lower or u < upper:
                                quitFlag = True
                                catsConflicting.append(
                                    '(Category: %s,  Region: %i - %i) vs. (Category: %s, Region: %i - %i)'
                                    % (category, lower, upper, cat, l, u))
                                #break
                    #if quitFlag: break

                print >> utfil, '\t'.join(
                    [chrom, str(lower),
                     str(upper + 1), category])

            #if quitFlag: break
        utfil.close()

        if quitFlag:
            open(galaxyFn, 'w').write(
                'Error: overlapping resulting regions are not allowed with selected preferences:\n'
                + '\n'.join(catsConflicting))
Example #50
0
 def _isExpandableHeader(self, line, onlyGuaranteed):
     return self._isHeaderLine(line) and \
             ( (Gtrack.getHeaderKeyValue(line)[0] in EXPANDABLE_HEADERS) or \
                (not onlyGuaranteed and Gtrack.getHeaderKeyValue(line)[0] in NOT_GUARANTEED_EXPANDABLE_HEADERS) )
Example #51
0
 def _isValueNotKeptHeader(self, line):
     return self._isHeaderLine(line) and \
             Gtrack.getHeaderKeyValue(line)[0] in VALUE_NOT_KEPT_HEADERS
Example #52
0
    def testHeaderExpansion(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_expand' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            onlyGuaranteed = 'no_types_expanded' in caseName
            
            print caseName
            print '==========='
            case = geSourceTest.cases[caseName]
            
            headerLines = [line if not self._isHeaderLine(line) else
                            '##' + ': '.join([str(x).lower() for x in Gtrack.getHeaderKeyValue(line.strip())])
                             for line in case.headerLines]
            
            fullContents = os.linesep.join(headerLines + case.lines)
            print 'Original:\n\n' + fullContents
            
            case.headerLines = [line for line in headerLines if not self._isExpandableHeader(line, onlyGuaranteed)]
            print '-----'
            print 'With headers removed:\n\n' + os.linesep.join(case.headerLines + case.lines)
            
            testFn = self._writeTestFile(case)
            
            expandedContents = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=False)

            print '-----'
            print 'With expanded headers:\n\n' + expandedContents
            
            expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=True)

            print '-----'
            print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults
            
            origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \
                                          if self._isExpandableHeader(line, onlyGuaranteed=False)])
            notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \
                                          if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)])
            expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \
                                    if self._isHeaderLine(line)])
            
            if 'no_check_expand' in caseName:
                print 'No checks for case: ' + caseName
            else:
                for header in origExpandableHeaders:
                    self.assertEquals(origExpandableHeaders[header], expandedHeaders[header])
                for header in notExpandableHeaders:
                    self.assertEquals(notExpandableHeaders[header], expandedHeaders[header])
                    
                for contents in [expandedContents, expandedContentsOnlyNonDefaults]:
                    
                    sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
                    forPreProcessor = True if case.sourceClass is None else False

                    stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \
                                                                          forPreProcessor=forPreProcessor, \
                                                                          printWarnings=False, \
                                                                          strToUseInsteadOfFn=contents))
 def _createColumnSpec(self, cols, addAnyExtraFixedCols=True):
     GtrackGenomeElementSource._createColumnSpec(self, cols, addAnyExtraFixedCols)
     
     self._headerDict['track type'] = GtrackGenomeElementSource.getTrackTypeFromColumnSpec(self._columnSpec)