Beispiel #1
0
 def _formatEdges(self, edges, weights):
     if len(edges) == 0:
         return '.'
     else:
         return ';'.join(formatPhraseWithCorrectChrUsage(edge, useUrlEncoding=True, notAllowedChars='#,;=\t') + \
                          ('=' + self._formatEdgeWeight(weights[i]) if weights is not None else '') \
                           for i,edge in enumerate(edges) )
Beispiel #2
0
    def _composeDataLine(self, ge, hbColumns, dataLineCount, lastGE):
        cols = []
        for hbColName in hbColumns:
            if hbColName == 'start':
                cols.append(self._formatStart(ge.start))
            elif hbColName == 'end':
                cols.append(self._formatEnd(ge.end))
            elif hbColName == 'strand':
                cols.append(getStringFromStrand(ge.strand))
            elif hbColName == 'val':
                cols.append(self._formatValue(ge.val))
            elif hbColName == 'edges':
                cols.append(self._formatEdges(ge.edges, ge.weights))
            elif hbColName == 'weights':
                pass
            else:
                cols.append(formatPhraseWithCorrectChrUsage(unicode(getattr(ge, hbColName)), \
                                                            useUrlEncoding=True,
                                                            notAllowedChars='#\t'))

        if self._headerDict['fixed-size data lines']:
            assert len(cols) == 1
            return cols[0] + (os.linesep if (dataLineCount * len(cols[0])) % 60 < len(cols[0]) \
                              or lastGE else '')
        else:
            return '\t'.join(cols) + os.linesep
Beispiel #3
0
def _composeTrackLines(gSuite, colSpecs, attributes, out):
    for track in gSuite.allTracks():
        if track.comment:
            print >> out, '#' + track.comment

        cells = [
            formatPhraseWithCorrectChrUsage(getattr(track, colSpec.memberName))
            for colSpec in colSpecs
        ]
        for attribute in attributes:
            if attribute in track.attributes:
                cells.append(
                    formatPhraseWithCorrectChrUsage(
                        track.attributes[attribute]))
            else:
                cells.append('.')

        print >> out, '\t'.join(cells)
Beispiel #4
0
    def _composeBoundingRegionLine(self, boundingRegionTuple):
        region = boundingRegionTuple.region.getCopy()

        if self._headerDict['1-indexed']:
            region.start = region.start + 1 if region.start is not None else None
            region.end = region.end + 1 if region.end is not None else None
        if self._headerDict['end inclusive']:
            region.end = region.end - 1 if region.end is not None else None

        brLinePartList = [(Gtrack.convertNameToGtrack(attr),
                           getattr(region, attr))
                          for attr in ['genome', 'chr', 'start', 'end']]
        return '####' + '; '.join(k + '=' + formatPhraseWithCorrectChrUsage(str(v), useUrlEncoding=True, notAllowedChars='=;#\t') \
                                  for k,v in brLinePartList if v is not None) + os.linesep
Beispiel #5
0
    def _commonFormatVal(self, val, valueType, valueDim):
        valTypeInfo = Gtrack.VAL_TYPE_DICT[valueType]

        if valueDim == 'scalar':
            if not isinstance(val, basestring) and hasattr(
                    val, '__len__') and len(val) == 1:
                val = val[0]

            if (val == valTypeInfo.missingVal) or (isNan(val) and isNan(
                    valTypeInfo.missingVal)):
                return '.'
            elif isinstance(val, basestring):
                return formatPhraseWithCorrectChrUsage(
                    val, useUrlEncoding=True, notAllowedChars='#.,;=\t')
            else:
                if isinstance(val, bool):
                    return '1' if val == True else '0'
                else:
                    return str(val)
        else:
            return valTypeInfo.delim.join([self._commonFormatVal(valPart, valueType, 'scalar') for valPart in val]) \
                   if len(val) != 0 else '.'
Beispiel #6
0
def _composeHeaders(gSuite, out):
    for headerKey, headerVal in _getAllHeadersToPrint(gSuite):
        print >> out, '##%s: %s' % (headerKey,
                                    formatPhraseWithCorrectChrUsage(headerVal))
Beispiel #7
0
 def _composeColSpecLine(self, columns):
     return '###' + '\t'.join(formatPhraseWithCorrectChrUsage(str(col), useUrlEncoding=False, \
                              notAllowedChars='#\t') for col in columns) + os.linesep
Beispiel #8
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''

        try:
            if choices.history:
                inputFile = open(ExternalTrackManager.extractFnFromGalaxyTN(choices.history.split(':')), 'r')
            else:
                inputFile = StringIO(choices.input)

            headers = cls._getHeaders(choices)
            headerIdxs = {}
            for i, header in enumerate(headers):
                headerIdxs[header] = i

            createDense = choices.createDense == 'Yes'
            if createDense:
                firstRegInBlock = None
                curReg = None

                assert headerIdxs['seqid'] is not None
                assert headerIdxs['start'] is not None

                headers[headerIdxs['start']] = ''

            create3dData = cls._create3dData(choices)
            if create3dData:
                if any(x in headers for x in ['id', 'edges']):
                    print >> sys.stderr, "Error: when using the special 3D input columns 'linked_seqid' and " + \
                                         "'linked_start', the columns 'id' and 'edges' must not " + \
                                         "be specified in addition."
                    return

                for header in ['linked_seqid', 'linked_start', 'linked_end', 'link_weight']:
                    if header in headerIdxs:
                        headers[headerIdxs[header]] = ''

                for header in ['id', 'edges']:
                    headerIdxs[header] = len(headers)
                    headers += [header]

                regs = []
                regIdx = 0
                prevRegIdx = 0
                idDict = {}
                idCount = 0
                curCols = None
                prevLine = ''

                firstRegInBlock = None
                curReg = None
                prev3dReg = None
                nextReg = None

            if createDense or create3dData:
                newInputFile = NamedTemporaryFile()
                sortedInputFile = NamedTemporaryFile()

            colIndexes = [i for i, header in enumerate(headers) if header != '']
            numSkipLines = cls._getNumSkipLines(choices)

            tempContents = NamedTemporaryFile()
            tempDataLines = NamedTemporaryFile()

            if choices.indexing == '1-indexed, end inclusive':
                tempContents.write('##1-indexed: true' + os.linesep)
                tempContents.write('##end inclusive: true' + os.linesep)

            tempContents.write('###' + '\t'.join([headers[i] for i in colIndexes]) + os.linesep)

            for passType in ['pre','final'] if createDense or create3dData else ['final']:
                for i in xrange(numSkipLines):
                    inputFile.readline()

                splitChar = cls._getSplitChar(choices)
                numCols = cls._getFileContentsInfo(choices).numCols
                regionsDecreased = False

                autoCorrectSeqId = choices.handleSeqId == 'Yes, auto-correct to the best match in the genome build'
                cropCrossingSegments = choices.cropCrossingSegments == 'Yes'
                genome = choices.genome

                for i, line in enumerate(inputFile):
                    if line == '' or len(line) > 0 and line[0] == '#':
                        pass

                    cols = [x.strip() for x in line.strip().split(splitChar)]
                    if create3dData:
                        cols += ['', '']

                    for j in colIndexes:
                        if len(cols) <= j:
                            print >> sys.stderr, "Error in line #%s: %s" % (i+1, line)
                            print >> sys.stderr, "The line does not include the column #%s, which is defined with " \
                                                 "the name '%s' (the number of columns is %s). Please fix the input " \
                                                 "file or redefine the column names of this column." \
                                                 % (j+1, headers[j], len(cols))
                            return

                    if autoCorrectSeqId:
                        from quick.util.GenomeInfo import GenomeInfo
                        cols[headerIdxs['seqid']] = GenomeInfo.findBestMatchingChr(genome, cols[headerIdxs['seqid']])

                    for j, col in enumerate(cols):
                        if col == '':
                            cols[j] = '.'
                        else:
                            cols[j] = formatPhraseWithCorrectChrUsage(col, notAllowedChars='#\t')

                    if cropCrossingSegments:
                        from quick.util.GenomeInfo import GenomeInfo
                        for seqidHdr, startHdr, endHdr in [('seqid','start','end')] \
                                + ([('linked_seqid','linked_start','linked_end')] if create3dData else []):
                            if endHdr in headerIdxs:
                                seqid = cols[headerIdxs[seqidHdr]]
                                start = cols[headerIdxs[startHdr]]
                                end = cols[headerIdxs[endHdr]]
                                if not any(x == '.' for x in [seqid, start, end]):
                                    start, end = int(start), int(end)
                                    if choices.indexing == '1-indexed, end inclusive':
                                        start -= 1
                                    chrLen = GenomeInfo().getChrLen(genome, seqid)
                                    if start < chrLen and end > chrLen:
                                        cols[headerIdxs[endHdr]] = str(chrLen)

                    if createDense or create3dData:
                        prevReg = curReg
                        curReg = cls._getGenomeRegion(cols[headerIdxs['seqid']], cols[headerIdxs['start']], \
                                                                       cols[headerIdxs['end']] if headerIdxs.get('end') else None)

                        if passType == 'pre':
                            newInputFile.write(line.strip() + os.linesep)

                            if create3dData:
                                id = curReg.strShort()
                                if id not in idDict:
                                    regs.append(curReg)
                                    idDict[id] = ''

                                linkedReg = cls._getGenomeRegion(cols[headerIdxs['linked_seqid']], cols[headerIdxs['linked_start']], \
                                                                                     cols[headerIdxs['linked_end']] if 'end' in headerIdxs else None)
                                if choices.undirected == 'Yes' and linkedReg and linkedReg != curReg:
                                    id = linkedReg.strShort()
                                    if id not in idDict:
                                        regs.append(linkedReg)
                                        idDict[id] = ''

                                    cols[headerIdxs['seqid']], cols[headerIdxs['linked_seqid']] = cols[headerIdxs['linked_seqid']], cols[headerIdxs['seqid']]
                                    cols[headerIdxs['start']], cols[headerIdxs['linked_start']] = cols[headerIdxs['linked_start']], cols[headerIdxs['start']]
                                    if 'end' in headerIdxs:
                                        cols[headerIdxs['end']], cols[headerIdxs['linked_end']] = cols[headerIdxs['linked_end']], cols[headerIdxs['end']]

                                    newInputFile.write(splitChar.join(cols[:-2]) + os.linesep)

                        else: #passType == 'final':
                            if firstRegInBlock is None:
                                firstRegInBlock = curReg

                            if create3dData:
                                if curReg != prevReg:
                                    prevCols = curCols
                                    prevRegIdx = regIdx
                                    regIdx = 0
                                    id = curReg.strShort()
                                    curCols = copy(cols)
                                    curCols[headerIdxs['id']] = idDict[id] if choices.idGeneration == 'Counting' else id
                                    curCols[headerIdxs['edges']] = ''

                                linkedReg = cls._getGenomeRegion(cols[headerIdxs['linked_seqid']], cols[headerIdxs['linked_start']], \
                                                                                 cols[headerIdxs['linked_end']] if 'end' in headerIdxs else None)

                                if linkedReg:
                                    edges = curCols[headerIdxs['edges']]

                                    if edges != '':
                                        edges += ';'

                                    id = linkedReg.strShort()
                                    if id not in idDict:
                                        raise InvalidFormatError("Error: linked region '%s' is not present in tabular file. Line: %s" % (linkedReg, line))

                                    if choices.complete == 'Yes':
                                        while regIdx < len(regs) and regs[regIdx] != linkedReg:
                                            missingId = regs[regIdx].strShort()
                                            edges += '%s=.;' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId)
                                            regIdx += 1

                                    edges += idDict[id] if choices.idGeneration == 'Counting' else id
                                    if 'link_weight' in headerIdxs:
                                        edges += '=' + formatPhraseWithCorrectChrUsage(
                                                           cols[headerIdxs['link_weight']],
                                                           notAllowedChars='#\t')
                                    regIdx += 1

                                    curCols[headerIdxs['edges']] = edges

                                if curReg != prevReg and prevCols:
                                    if choices.complete == 'Yes':
                                        for i in xrange(prevRegIdx, len(regs)):
                                            missingId = regs[i].strShort()
                                            if i != 0:
                                                prevCols[headerIdxs['edges']] += ';'
                                            prevCols[headerIdxs['edges']] += '%s=.' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId)

                                    if prevCols[headerIdxs['edges']] == '':
                                        prevCols[headerIdxs['edges']] = '.'

                                    cls._checkOverlap(prev3dReg, prevReg, prevLine)

                                    if createDense:
                                        firstRegInBlock, tempDataLines = cls._writeBlockLines \
                                            (firstRegInBlock, prev3dReg, prevReg, tempContents, tempDataLines)

                                    cls._writeDataLines(prevCols, colIndexes, tempDataLines)

                                    prev3dReg = prevReg
                                prevLine = line
                            else: #createDense
                                cls._checkOverlap(prevReg, curReg, line)

                                firstRegInBlock, tempDataLines = cls._writeBlockLines \
                                    (firstRegInBlock, prevReg, curReg, tempContents, tempDataLines)

                                cls._writeDataLines(cols, colIndexes, tempDataLines)
                    else:
                        cls._writeDataLines(cols, colIndexes, tempDataLines)

                if passType == 'pre':
                    newInputFile.flush()

                    inputFile.close()

                    sortCmd = ["sort", newInputFile.name, "-t$'%s'" % splitChar, "-s"] +\
                               ["-k%s,%s%s" % (headerIdxs[x]+1, headerIdxs[x]+1, s) if x in headerIdxs else "" \
                                for x,s in [('seqid',''), ('start','n'), ('end','n'), \
                                            ('linked_seqid',''), ('linked_start','n'), ('linked_end','n')]] +\
                                ["-o", sortedInputFile.name]
                    subprocess.call(' '.join(sortCmd), stderr=sys.stderr, stdout = sys.stdout, shell=True)

                    #print >> sys.stderr, ' '.join(sortCmd)
                    #os._exit(0)
                    newInputFile.close()

                    if create3dData:
                        regs = sorted(regs)
                        for i,reg in enumerate(regs):
                            idDict[reg.strShort()] = str(i)

                    inputFile = sortedInputFile
                    inputFile.seek(0)
                    numSkipLines = 0
                    curReg = None
                else: #passType == 'final':
                    if create3dData:
                        if choices.complete == 'Yes':
                            for i in xrange(regIdx, len(regs)):
                                missingId = regs[i].strShort()
                                if i != 0:
                                    curCols[headerIdxs['edges']] += ';'
                                curCols[headerIdxs['edges']] += '%s=.' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId)

                        if curCols[headerIdxs['edges']] == '':
                            curCols[headerIdxs['edges']] = '.'

                        cls._checkOverlap(prev3dReg, curReg, prevLine)

                        if createDense:
                            firstRegInBlock, tempDataLines = cls._writeBlockLines \
                                (firstRegInBlock, prev3dReg, curReg, tempContents, tempDataLines)

                        cls._writeDataLines(curCols, colIndexes, tempDataLines)

                    if createDense:
                        firstRegInBlock, tempDataLines = cls._writeBlockLines \
                            (firstRegInBlock, curReg, None, tempContents, tempDataLines)

                    tempDataLines.flush()
                    tempDataLines.seek(0)
                    tempContents.write(tempDataLines.read())
                    tempContents.flush()
                    tempContents.seek(0)

            #print tempContents.read()
            #tempContents.seek(0)

            expandHeadersOfGtrackFileAndReturnComposer(tempContents.name).composeToFile(galaxyFn)
            geSource = GtrackGenomeElementSource(galaxyFn, genome=genome, printWarnings=False)
            for ge in geSource:
                pass

        except Exception, e:
            print >> sys.stderr, e
            raise