Beispiel #1
0
    def _writeContent(self, fn, header, resDictKey, coreCls):
        keys = self._getKeys(resDictKey)

        core = coreCls()

        core.begin()
        core.bigHeader(header)
        core.header('Local result table for ' + resDictKey)

        if len(self._results.getAllRegionKeys()) > MAX_LOCAL_RESULTS_IN_TABLE:
            core.line('Local results were not printed because of the large number of bins: ' \
                  + str(numUserBins) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE))
        else:
            core.tableHeader([
                str(coreCls().textWithHelp(baseText, helpText))
                for baseText, helpText in (
                    [('Region', '')] +
                    [self._results.getLabelHelpPair(key) for key in keys])
            ])

            for regionKey in self._results.getAllRegionKeys():
                if self._results[regionKey].get(resDictKey) is None:
                    core.tableLine([str(regionKey)] + [None] * len(keys))
                else:
                    core.tableLine([str(regionKey)] +\
                                   [ strWithStdFormatting( self._results[regionKey][resDictKey].get(key) ) for key in keys])
            core.tableFooter()

        core.end()

        ensurePathExists(fn)
        open(fn, 'w').write(str(core))
Beispiel #2
0
def createMapType(mapId, genome, rowTrackName, colTrackName, col2GeneListFn,
                  galaxyId, countType):

    googleMapsCommonDir = '/'.join([STATIC_PATH, 'maps', 'common'])
    googleMapsMapIdDir = '/'.join([googleMapsCommonDir, mapId])

    ensurePathExists(googleMapsMapIdDir + '/test')

    makeLowercaseName2NameShelfFromTnSubTypes(
        genome, rowTrackName,
        '/'.join([googleMapsMapIdDir, 'rowLowerCaseName2Name.shelf']))
    makeLowercaseName2NameShelfFromTnSubTypes(
        genome, colTrackName,
        '/'.join([googleMapsMapIdDir, 'colLowerCaseName2Name.shelf']))

    rowBaseTrackNameFile = open(
        '/'.join([googleMapsMapIdDir, 'rowBaseTrackName.txt']), 'w')
    colBaseTrackNameFile = open(
        '/'.join([googleMapsMapIdDir, 'colBaseTrackName.txt']), 'w')

    rowBaseTrackNameFile.write(rowTrackName + '\n')
    colBaseTrackNameFile.write(colTrackName + '\n')

    rowBaseTrackNameFile.close()
    colBaseTrackNameFile.close()

    if col2GeneListFn != 'None':
        shutil.copy(col2GeneListFn,
                    '/'.join([googleMapsMapIdDir, 'col2GeneList.shelf']))
        createShelvesBehindRankedGeneLists(galaxyId, mapId, countType)
Beispiel #3
0
    def execute(cls, choices, galaxyFn=None, username=''):

        basisTrackNameAsList = choices.basisTrack.split(':')
        extractionOptions = dict(
            GalaxyInterface.getTrackExtractionOptions(choices.genome,
                                                      basisTrackNameAsList))
        extractionFormat = extractionOptions[
            choices.extFormatLbl] if extractionOptions else None

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        outGSuite = GSuite()
        for gsTrack in gsuite.allTracks():
            # outputTrackFn = cls.extraGalaxyFn[gsTrack.title]
            # print '<br>\n<br>\n output track filename: ', outputTrackFn
            # print 'path: ', gsTrack.path
            # print 'parsed uri: ', gsTrack._parsedUri
            newTrackFileName = gsTrack.title + '.' + extractionFormat
            outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn(
                galaxyFn, newTrackFileName)
            ensurePathExists(outGalaxyFn)
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn,
                                                extraFileName=newTrackFileName)
            GalaxyInterface.parseExtFormatAndExtractTrackManyBins(
                choices.genome, basisTrackNameAsList, gsTrack.suffix,
                gsTrack.path, True, choices.extFormatLbl, outGalaxyFn)

            outGSuite.addTrack(
                GSuiteTrack(uri,
                            title=gsTrack.title,
                            fileFormat=gsTrack.fileFormat,
                            trackType=gsTrack.trackType,
                            genome=choices.genome,
                            attributes=gsTrack.attributes))

        GSuiteComposer.composeToFile(outGSuite, galaxyFn)
    def execute(cls, choices, galaxyFn=None, username=''):

        start = time.time()
        genome = choices[0]
        trackName = choices[1].split(':')
        outFn = galaxyFn
        if choices[5] == 'Write to Standardised file':
            outFn = createOrigPath(genome, choices[-1].split(':'),
                                   'collapsed_result.bedgraph')
            ensurePathExists(outFn[:outFn.rfind('/') + 1])

        combineMethod = choices[2]
        category = choices[3] if choices[3] else ''
        numSamples = choices[4] if choices[4] else '1'

        analysisDef = 'dummy [combineMethod=%s] %s [numSamples=%s] -> ConvertToNonOverlappingCategorySegmentsPythonStat' % \
                        (combineMethod, '[category=%s]' % category if category != '' else '', numSamples) #'Python'

        for regSpec in GenomeInfo.getChrList(genome):
            res = GalaxyInterface.runManual([trackName], analysisDef, regSpec, '*', genome, username=username, \
                                            printResults=False, printHtmlWarningMsgs=False)

            from gold.origdata.TrackGenomeElementSource import TrackViewGenomeElementSource
            from gold.origdata.BedComposer import CategoryBedComposer
            for resDict in res.values():
                trackView = resDict['Result']
                tvGeSource = TrackViewGenomeElementSource(
                    genome, trackView, trackName)
                CategoryBedComposer(tvGeSource).composeToFile(outFn)
Beispiel #5
0
    def _getUriForDownloadedAndUncompressedTrackPossiblyCached(
            self, gSuiteTrack, galaxyFn, uncomprSuffix, extraFileName=None):
        from gold.gsuite.GSuiteTrackCache import GSUITE_TRACK_CACHE
        cache = GSUITE_TRACK_CACHE

        if cache.isCached(gSuiteTrack):
            cachedUri = cache.getCachedGalaxyUri(gSuiteTrack)
            if os.path.exists(GSuiteTrack(cachedUri).path):
                return cache.getCachedGalaxyUri(gSuiteTrack)

        if extraFileName:
            from quick.application.ExternalTrackManager import ExternalTrackManager
            outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn(
                galaxyFn, extraFileName)
            ensurePathExists(outGalaxyFn)
            if extraFileName.endswith('.' + uncomprSuffix):
                uri = GalaxyGSuiteTrack.generateURI(
                    galaxyFn=galaxyFn, extraFileName=extraFileName)
            else:
                uri = GalaxyGSuiteTrack.generateURI(
                    galaxyFn=galaxyFn,
                    extraFileName=extraFileName,
                    suffix=uncomprSuffix)
        else:
            outGalaxyFn = galaxyFn
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=outGalaxyFn,
                                                suffix=uncomprSuffix)

        uncompressorAndDownloader = GSuiteTrackUncompressorAndDownloader()
        uncompressorAndDownloader.visit(gSuiteTrack, outGalaxyFn)

        if cache.shouldBeCached(gSuiteTrack):
            cache.cache(gSuiteTrack, uri)

        return uri
Beispiel #6
0
def composeToFile(gSuite, outFileName):
    print("Outfilename", outFileName)
    ensurePathExists(outFileName)
    with open(outFileName, 'w') as out:
        ok = _composeCommon(gSuite, out)

    return ok
Beispiel #7
0
    def execute(cls, choices, galaxyFn=None, username=''):
        from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource
        from gold.origdata.GtrackComposer import StdGtrackComposer

        genome = choices[0]
        if choices[1] == 'track':
            trackName = choices[2].split(':')
        else:
            trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
                genome, choices[2].split(':'))

        outFn = galaxyFn
        if choices[4] == 'Write to Standardised file':
            outFn = createOrigPath(genome, choices[-1].split(':'),
                                   'collapsed_result.bedgraph')
            ensurePathExists(outFn[:outFn.rfind('/') + 1])

        threshold = choices[3]
        analysisDef = 'dummy [threshold=%s] -> ForEachSegmentDistToNearestInSameTrackStat' % threshold  #'Python'
        res = GalaxyInterface.runManual([trackName], analysisDef, '*', '*', genome, username=username, \
                                        printResults=False, printHtmlWarningMsgs=False)

        tvGeSource = TrackViewListGenomeElementSource(
            genome, [x['Result'] for x in res.values()], trackName)
        StdGtrackComposer(tvGeSource).composeToFile(outFn)
 def writeRegionListToBedFile(regList, fn):
     from quick.util.CommonFunctions import ensurePathExists
     ensurePathExists(fn)
     f = open(fn, 'w')
     for reg in regList:
         f.write( '\t'.join([reg.chr, str(reg.start), str(reg.end)]) + os.linesep )
     f.close()
Beispiel #9
0
    def generateSynGSuite(cls, dataOut, galaxyFn, genome):
        outGSuite = GSuite()
        g = SimulationPointIter()
        newData = ''
        chrNum = 0
        for chr in dataOut:

            # fileName = 'syn-chr' + 'iInterR-' + str(chr[0]) + 'st-' + str(chr[1]) + 'end-' + str(
            #     chr[2]) + 'iInterR-' + str(chr[3]) + 'iIntraR-' + str(chr[4]) + 'prob-' + str(chr[5]) + '--' + str(
            #     chrNum)

            fileName = 'syn-' + str(chr[0]) + ',' + str(chr[1]) + ',' + str(chr[2]) + ',' + str(chr[3]) +',' + str(chr[4]) +',' + str(chr[5])

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn,
                                                extraFileName=fileName,
                                                suffix='bed')

            gSuiteTrack = GSuiteTrack(uri)
            outFn = gSuiteTrack.path
            ensurePathExists(outFn)

            g.createChrTrack(genome, chr[0], PointIter, outFn, chr[3], chr[4], chr[5], chr[1], chr[2])

            with open(outFn, 'r') as outputFile:
                newData += ''.join(outputFile.readlines())

            chrNum += 1

            if chrNum == len(dataOut):
                with open(outFn, 'w') as outputFile:
                    outputFile.write(newData)
                outGSuite.addTrack(GSuiteTrack(uri, title=''.join(fileName), genome=genome))
        GSuiteComposer.composeToFile(outGSuite, cls.extraGalaxyFn['synthetic GSuite'])
Beispiel #10
0
    def _commonWriteContent(self, fn, header, coreCls, headerPrefix,
                            numElements):
        core = coreCls()

        core.begin()
        core.styleInfoBegin(styleClass="infomessagesmall",
                            style='padding: 5px; margin-bottom: 10px; ' +\
                                  'background-image: none; background-color: #FFFC8C; ')
        core.header(headerPrefix)
        core.smallHeader(header)
        core.styleInfoEnd()

        #core.bigHeader(header)
        #core.header('Local result table')

        if isinstance(core,
                      HtmlCore) and numElements > MAX_LOCAL_RESULTS_IN_TABLE:
            core.line('Local results were not printed because of the large number of bins: ' \
                  + str(numElements) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE))
        else:
            self._writeTable(core, coreCls)

        core.end()

        ensurePathExists(fn)
        open(fn, 'w').write(str(core))
    def _writeContent(self, resDictKey, fn):
        ensurePathExists(fn)
        arbitraryTV = self._results.getArbitraryLocalResult()['Result']

        assert isinstance(arbitraryTV, TrackView)
        #assert arbitraryTV.trackFormat.getFormatName() in [ 'Valued segments', 'Segments'], arbitraryTV.trackFormat.getFormatName()
        genome = arbitraryTV.genomeAnchor.genome
        #print 'GENOME: ',genome
        from gold.util.CommonFunctions import getClassName
        print type([
            self._results[key]['Result']
            for key in sorted(self._results.keys())
        ][0]), getClassName([
            self._results[key]['Result']
            for key in sorted(self._results.keys())
        ][0])
        tvGeSource = TrackViewListGenomeElementSource(
            genome, [
                self._results[key]['Result']
                for key in sorted(self._results.keys())
            ], 'Private:GK:test1:wgEncodeUchicagoTfbsK562EfosControlPk'.split(
                ':'))
        if arbitraryTV.trackFormat.getFormatName() in ['Segments']:
            BedComposer(tvGeSource).composeToFile(fn)
        else:
            StdGtrackComposer(tvGeSource).composeToFile(fn)
 def execute(cls, choices, galaxyFn=None, username=''):
     start = time.time()
     genome = choices[0]
     trackName = choices[1].split(':')
     outFn = galaxyFn
     if choices[5] == 'Write to Standardised file':
         outFn = createOrigPath(genome, choices[-1].split(':'), 'collapsed_result.bedgraph')
         ensurePathExists(outFn[:outFn.rfind('/')+1])
             
     combineMethod = choices[2]
     category = choices[3] if choices[3] else ''
     numSamples = choices[4] if choices[4] else '1'
     
     analysisDef = 'dummy [combineMethod=%s] %s [numSamples=%s] -> ConvertToNonOverlappingCategorySegmentsPythonStat' % \
                     (combineMethod, '[category=%s]' % category if category != '' else '', numSamples) #'Python'
                                               
     for regSpec in  GenomeInfo.getChrList(genome):
         res = GalaxyInterface.runManual([trackName], analysisDef, regSpec, '*', genome, username=username, \
                                         printResults=False, printHtmlWarningMsgs=False)
         
         from gold.origdata.TrackGenomeElementSource import TrackViewGenomeElementSource
         from gold.origdata.BedComposer import CategoryBedComposer
         for resDict in res.values():
             tvGeSource = TrackViewGenomeElementSource(genome, resDict['Result'], trackName)
             CategoryBedComposer(tvGeSource).composeToFile(outFn)
Beispiel #13
0
    def generate(cls, valIter, valIterLen, maxValue, path, fnPrefix):
        "Assumes valIter gives values between 0 and maxValue"
        assert valIterLen > 0
        chainsFn, startsFn = cls._getFileNames(path, fnPrefix)
        ensurePathExists(chainsFn)
        chains = memmap(chainsFn, 'int32', 'w+', shape=valIterLen)
        starts = memmap(startsFn, 'int32', 'w+', shape=maxValue)
        curPositions = zeros(maxValue, 'int32') - 1
        starts[:] = curPositions
        #os.chmod(chainsFn, S_IRWXU|S_IRWXG|S_IRXO)
        #os.chmod(startsFn, S_IRWXU|S_IRWXG|S_IRXO)

        valIterIndex = 0
        for val in valIter:
            if val == None:
                pass
            elif curPositions[val] < 0:
                starts[val] = curPositions[val] = valIterIndex
            else:
                chains[curPositions[val]] = valIterIndex
                curPositions[val] = valIterIndex
            valIterIndex += 1
            if valIterIndex % 10e6 == 0:
                print '.',
        #for index in curPositions:
        #    if index >= 0:
        #        chains[ index ] = -1
        chains[curPositions[curPositions >= 0]] = -1
        chains.flush()
        starts.flush()
        return SameValueIndexChains(chains, starts)
    def _writeContent(self, fn, header, resDictKey, coreCls):
        keys = self._getKeys(resDictKey)

        core = coreCls()
        
        core.begin()
        core.bigHeader(header)
        core.header('Local result table for ' + resDictKey)
        
        if len( self._results.getAllRegionKeys() ) > MAX_LOCAL_RESULTS_IN_TABLE:
            core.line('Local results were not printed because of the large number of bins: ' \
                  + str(numUserBins) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE))
        else:
            core.tableHeader([ str( coreCls().textWithHelp(baseText, helpText) ) for baseText, helpText in 
                              ([('Region','')] + [self._results.getLabelHelpPair(key) for key in keys]) ])
            
            for regionKey in self._results.getAllRegionKeys():
                if  self._results[regionKey].get(resDictKey) is None:
                    core.tableLine([str(regionKey)] + [None]*len(keys))
                else:
                    core.tableLine([str(regionKey)] +\
                                   [ strWithStdFormatting( self._results[regionKey][resDictKey].get(key) ) for key in keys])
            core.tableFooter()

        core.end()
        
        ensurePathExists(fn)        
        open(fn,'w').write( str(core) )
Beispiel #15
0
    def _uncompressTemporaryFile(self, gSuiteTrack, tmpFileName, outFileName):
        import subprocess
        import os, sys, shutil

        for compSuffix in COMPRESSION_SUFFIXES:
            reduceLen = len(compSuffix) + 1

            suffix = gSuiteTrack.suffix
            if suffix and (suffix == compSuffix
                           or suffix.lower().endswith('.' + compSuffix)):
                if compSuffix == 'gz':
                    subprocess.check_call(['gunzip', tmpFileName],
                                          stderr=sys.stdout)

                    unzippedFileName = tmpFileName[:-reduceLen]

                    ensurePathExists(outFileName)
                    #os.rename(unzippedFileName, outFileName)
                    shutil.move(unzippedFileName, outFileName)
                else:
                    raise ShouldNotOccurError
                break
        else:
            #os.rename(tmpFileName, outFileName)
            shutil.move(tmpFileName, outFileName)

        currentUmask = os.umask(0)
        os.umask(currentUmask)
        os.chmod(outFileName, 0o666 - currentUmask)
    def _writeContent(self, resDictKey, fn):
        from gold.application.RSetup import r
        ensurePathExists(fn)
        silenceRWarnings()
        bmpFn = fn #+ '.png'
#        r.png(filename=bmpFn, units='px', pointsize=self.POINT_SIZE, res=72)

        width, height = self.getPlotDimensions(resDictKey)
        # pdf test:
#        self.LINE_HEIGHT = self.POINT_SIZE
#        width, height = self.getPlotDimensions(resDictKey)
#        r.pdf(bmpFn, height=height*1.0/72, width=width*1.0/72, pointsize=self.POINT_SIZE)
        if any(x > 800 for x in [width, height]):
            self.LINE_HEIGHT = self.POINT_SIZE
            width, height = self.getPlotDimensions(resDictKey)
            if self.HIGH_DEF_COLORS:
                picType = 'png16m'
            else:
                picType = 'png256'

            r.bitmap(bmpFn, height=height, width=width, units='px', type=picType, pointsize=self.POINT_SIZE)
        else:
            r.png(filename=bmpFn, height=height, width=width, units='px', pointsize=self.POINT_SIZE, res=72)
        if resDictKey is not None:
            xlab = self._results.getLabelHelpPair(resDictKey)[0]
        else:
            xlab = None
        main = self._header
        self._customRExecution(resDictKey, xlab, main)
        #r.hist( ,  )
        from gold.application.RSetup import r
        r('dev.off()')
Beispiel #17
0
    def __iter__(self):
        for archivedFileInfo in self._archive:
            #    galaxyFn = self._titleToGalaxyFnDict.get(archivedFileInfo.title)
            #    if not galaxyFn:
            #        raise ShouldNotOccurError('Galaxy filename not found for file with title: ' + archivedFile.title)

            extraFileName = os.sep.join((archivedFileInfo.directories if self._storeHierarchy else []) +\
                                         [archivedFileInfo.baseFileName])

            if self._storeHierarchy:
                attributeList = OrderedDict([('dir_level_%s' % (i+1), directory) \
                                             for i,directory in enumerate(archivedFileInfo.directories)])
            else:
                attributeList = OrderedDict()

            uri = GalaxyGSuiteTrack.generateURI(self._galaxyFn,
                                                extraFileName=extraFileName)
            gSuiteTrack = GSuiteTrack(uri,
                                      title=archivedFileInfo.baseFileName,
                                      attributes=attributeList)

            outFn = gSuiteTrack.path
            ensurePathExists(outFn)
            with open(outFn, 'w') as outFile:
                inFile = self._archive.openFile(archivedFileInfo.path)
                outFile.write(inFile.read())
                inFile.close()

            yield gSuiteTrack
 def _writeContent(self, resDictKey, fn):
     ensurePathExists(fn)
     outF = open( fn ,'w')
     outF.write('track type=wiggle_0 name=' + self._results.getStatClassName() + '_' + resDictKey + os.linesep)
     for bin in self._results.getAllRegionKeys():
         outF.write( '\t'.join([str(x) for x in \
                     [bin.chr, bin.start, bin.end, str(self._results[bin].get(resDictKey)).replace('None', 'nan')] ]) + os.linesep)
     outF.close()
Beispiel #19
0
 def _writeRawData(self, resDictKey, fn):
     ensurePathExists(fn)
     rawData = self._getRawData(resDictKey, False)
     outF = open(fn, 'w')
     outF.write('Xs: ' + ','.join([str(x)
                                   for x in rawData[0]]) + os.linesep)
     outF.write('Ys: ' + ','.join([str(x)
                                   for x in rawData[1]]) + os.linesep)
 def __getitem__(self, memoPath):
     if not memoPath in self._memoDicts:
         memoFn = self._createPickleFn(memoPath)
         ensurePathExists( memoFn )
         try:
             self._memoDicts[memoPath] = SafeDiskMirroredDict(memoFn)
         except Exception, e:
             logMessageOnce("Exception when accessing memo file '%s': %s" % (memoFn, str(e)))
             raise
 def writeChr(cls, genome, chr, trackName, elTupleIter):
     outFn = createOrigPath(genome, trackName, chr + cls._getEnding())
     ensurePathExists(outFn)
     outF = open(outFn, 'w')
     cls._writeHeader(outF, chr)
     numPrintedDots = 0
     for el in elTupleIter:
         cls._writeEl(outF, el, chr)
     outF.close()
Beispiel #22
0
    def writeTrackData(choices, genome, tn):
        from gold.util.RandomUtil import random
        from gold.util.CommonFunctions import createCollectedPath
        from quick.util.CommonFunctions import ensurePathExists

        trackFn = createCollectedPath(genome, tn,
                                      'simulatedTracks.category.bed')
        ensurePathExists(trackFn)
        trackFile = open(trackFn, 'w')
        #determinePossibilities
        numPossiblePositions = int(choices[2])
        spacingBetweenPositions = 1e3
        possiblePositions = [
            i * spacingBetweenPositions
            for i in range(1, int(numPossiblePositions))
        ]
        numHighProbPositions = int(choices[3])
        highProbPossiblePositions = possiblePositions[0:numHighProbPositions]
        lowProbPossiblePositions = possiblePositions[numHighProbPositions:]

        largestPossiblePosition = possiblePositions[-1]
        print 'largestPossiblePosition: ', largestPossiblePosition / 1e6, 'M'
        assert largestPossiblePosition < 1.5e8  #just due to hardcoded analysis region below..

        sizePerPosition = 591  #empirical across all VDR binding sites..
        print 'Total BpCoverage: ', len(possiblePositions) * sizePerPosition

        #make samples
        numExperiments = int(choices[0])
        proportionFromHighProbPositions = float(choices[4])
        fixedNumFromHighProbPositions = int(choices[5])
        #numPositionsPerExperiment = [3000]*9
        numPositionsPerExperiment = [
            int(x) for x in choices[1].split(',')
        ]  #[3073, 7118, 5290, 3059, 4051, 1021, 200, 610, 573]
        for experimentIndex in range(numExperiments):
            #sampledPositions = random.sample(possiblePositions, numPositionsPerExperiment[experimentIndex])
            numHighProbSamples = int(numPositionsPerExperiment[experimentIndex]
                                     * proportionFromHighProbPositions
                                     ) + fixedNumFromHighProbPositions
            numLowProbSamples = numPositionsPerExperiment[
                experimentIndex] - numHighProbSamples
            print 'numHighProbSamples: %i, out of numHighProbPossiblePositions: %i' % (
                numHighProbSamples, len(highProbPossiblePositions))
            sampledPositions = random.sample(highProbPossiblePositions, numHighProbSamples ) \
                            + random.sample(lowProbPossiblePositions, numLowProbSamples )
            sampledSegments = [(position, position + sizePerPosition)
                               for position in sampledPositions]
            for seg in sampledSegments:
                trackFile.write('\t'.join([
                    'chr1',
                    '%i' % seg[0],
                    '%i' % seg[1],
                    'T%i' % experimentIndex
                ]) + '\n')

        trackFile.close()
 def _writeContent(self, resDictKey, fn):
     ensurePathExists(fn)
     outF = open( fn ,'w')
     outF.write('track type=bedGraph name=' + self._results.getStatClassName() + '_' + resDictKey + \
                (' viewLimits=0:1 autoScale=off' if resDictKey.lower() in ['pval','p-value'] else '') + os.linesep)
     for bin in self._results.getAllRegionKeys():
         outF.write( '\t'.join([str(x) for x in \
                     [bin.chr, bin.start, bin.end, str(self._results[bin].get(resDictKey)).replace('None', 'nan')] ]) + os.linesep)
     outF.close()
 def writeChr(cls, genome, chr, trackName, elTupleIter):
     outFn = createOrigPath(genome, trackName, chr+cls._getEnding() )
     ensurePathExists(outFn)
     outF = open(outFn,'w')
     cls._writeHeader(outF, chr)
     numPrintedDots = 0
     for el in elTupleIter:
         cls._writeEl(outF, el, chr)
     outF.close()
 def _writeTestFile(self, case):
     fn = createOrigPath(self.GENOME,
                         self.TRACK_NAME_PREFIX + case.trackName,
                         'testfile' + case.suffix)
     ensurePathExists(fn)
     testfile = open(fn, 'w')
     testfile.write('\n'.join(case.headerLines + case.lines))
     testfile.close()
     return fn
    def generateGenomeAnnotations(cls, abbrv):
        fnSource = cls.getCollectedPathGFF(abbrv)
        if os.path.exists(fnSource):
            from quick.extra.StandardizeTrackFiles import SplitFileToSubDirs
            SplitFileToSubDirs.parseFiles(abbrv, GenomeInfo.getGenomeAnnotationsTrackName(abbrv), direction='coll_to_std', \
                                          suffix='.gff', catSuffix='.category.gff', subTypeCol='2', depth='1', numHeaderLines='0')

            fnDest = cls.getStandardizedPathGFF(abbrv)
            ensurePathExists(fnDest)
            shutil.copyfile(fnSource, fnDest)
 def __getitem__(self, memoPath):
     if not memoPath in self._memoDicts:
         memoFn = self._createPickleFn(memoPath)
         ensurePathExists(memoFn)
         try:
             self._memoDicts[memoPath] = SafeDiskMirroredDict(memoFn)
         except Exception, e:
             logMessageOnce("Exception when accessing memo file '%s': %s" %
                            (memoFn, str(e)))
             raise
 def execute(cls, choices, galaxyFn=None , username=''):
     '''Is called when execute-button is pushed by web-user.
     Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
     If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
     choices is a list of selections made by web-user in each options box.
     '''
     #print 'Executing... with choices %s'%str(choices)
     abbrv = choices[0]
     name = choices[1]
     
     #Should test that the genome is not in hyperbrowser.
     gi = GenomeInfo(abbrv)
     
     if gi.hasOrigFiles():
         sys.stderr.write( "Genome "+abbrv+ " is already in the Genomic HyperBrowser. Remove the old first.")
     else:
         gi.fullName = name
         if choices[2] == 'URL':
             urls = choices[3].split()
             gi.sourceUrls = urls
             for url in urls:
                 try:
                     GenomeImporter.downloadGenomeSequence(abbrv, url)
                 except InvalidFormatError:
                     return
         else:
             basePath =  os.sep.join([NONSTANDARD_DATA_PATH, abbrv] + GenomeInfo.getSequenceTrackName(abbrv))
             fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[4].split(':'))
             fnDest = basePath+'/'+abbrv+'Sequence.fasta'
             ensurePathExists(fnDest)
             copyfile(fnSource, fnDest)
             
         chrs=GenomeImporter.extractChromosomesFromGenome(abbrv)
         gi.sourceChrNames = chrs
         gi.installedBy = username
         gi.genomeBuildSource = choices[5]
         gi.genomeBuildName = choices[6]
         gi.species = choices[7]
         gi.speciesTaxonomyUrl = choices[8]
         gi.assemblyDetails = choices[9]
         gi.privateAccessList = [v.strip() for v in choices[10].replace(os.linesep, ' ').replace(',', ' ').split(' ') if v.find('@')>0]
         gi.isPrivate = (choices[11] != 'All')
         gi.isExperimental = (choices[12] != 'All')
         gi.ucscClade = choices[13]
         gi.ucscGenome = choices[14]
         gi.ucscAssembly = choices[15]
         
         galaxyFile=open(galaxyFn, "w")
         galaxyFile.write( 'Genome abbreviation: ' + abbrv + os.linesep)
         galaxyFile.write( 'Genome full name: ' + name + os.linesep)
         galaxyFile.write( 'Track name: ' + ':'.join(GenomeInfo.getSequenceTrackName(abbrv)) + os.linesep)
         galaxyFile.write( 'Temp chromosome names: ' + ' || '.join(chrs) + os.linesep)
         #GenomeImporter.saveTempInfo(abbrv, name, chrs)
         #print 'Chromosomes: '+chrs
         gi.store()
    def writeRegionListToBedFile(regList, fn):
        from quick.util.CommonFunctions import ensurePathExists
        ensurePathExists(fn)
        f = open(fn, 'w')

        if regList != None:
            for reg in regList:
                f.write(
                    '\t'.join([reg.chr, str(reg.start),
                               str(reg.end)]) + os.linesep)
        f.close()
Beispiel #30
0
 def downloadTrack(self, genome, trackName):
     if trackName[-1]=='primaryTable':
         self.getTableData(genome, trackName)
     else:
         bedString, metaData = self.getBedData(genome, trackName)
         #fileName = 'fromUcsc.bed' if trackName[-1][-3:] == 'bed' else 'fromUcsc.'+trackName[-1]
         fileName = 'fromUcsc.'+trackName[-1]
     
         fn = createOrigPath(genome, trackName, fileName)
         ensurePathExists(fn)
         open(fn,'w').write(bedString)
 def downloadTrack(self, genome, trackName):
     if trackName[-1]=='primaryTable':
         self.getTableData(genome, trackName)
     else:
         bedString, metaData = self.getBedData(genome, trackName)
         #fileName = 'fromUcsc.bed' if trackName[-1][-3:] == 'bed' else 'fromUcsc.'+trackName[-1]
         fileName = 'fromUcsc.'+trackName[-1]
     
         fn = createOrigPath(genome, trackName, fileName)
         ensurePathExists(fn)
         open(fn,'w').write(bedString)
Beispiel #32
0
    def __init__(self):
        if not os.path.exists(self.URI_PREFIXES_FN):
            ensurePathExists(self.URI_PREFIXES_FN)
            open(self.URI_PREFIXES_FN, 'w')

        if not os.path.exists(self.CACHE_SHELVE_FN):
            ensurePathExists(self.CACHE_SHELVE_FN)

        cache = self._openShelve('c')
        cache.close()

        prefixList = [line.strip() for line in open(self.URI_PREFIXES_FN, 'r')]
        self._uriPrefixes = Trie(prefixList)
def renameCollTrack(genome, oldTn, newTn):
    oldPath = createCollectedPath(genome, oldTn)
    if not os.path.exists(oldPath):
        print '(TN did not exist in collTracks..)'
    else:
        print '(renaming track in collTracks..)'
        newPath = createCollectedPath(genome, newTn)
        if not ONLY_SIMULATION:    
            assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
            ensurePathExists(newPath)
            shutil.move(oldPath, newPath)
        else:
            print 'Would move %s to %s' %  (oldPath, newPath)
def renameStdTrack(genome, oldTn, newTn):
    oldPath = createOrigPath(genome, oldTn)
    assert os.path.exists(oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath
    
    print '(renaming track in stdTracks..)'
    newPath = createOrigPath(genome, newTn)
    if not ONLY_SIMULATION:    
        assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
        ensurePathExists(newPath)
        print 'Moving from %s to %s' % (oldPath, newPath)
        shutil.move(oldPath, newPath)
    else:
        print 'Would move %s to %s' %  (oldPath, newPath)
def renameCollTrack(genome, oldTn, newTn):
    oldPath = createCollectedPath(genome, oldTn)
    if not os.path.exists(oldPath):
        print '(TN did not exist in collTracks..)'
    else:
        print '(renaming track in collTracks..)'
        newPath = createCollectedPath(genome, newTn)
        if not ONLY_SIMULATION:
            assert not os.path.exists(
                newPath), 'ERROR: Target path already exists: ' + newPath
            ensurePathExists(newPath)
            shutil.move(oldPath, newPath)
        else:
            print 'Would move %s to %s' % (oldPath, newPath)
 def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                                  globalCoords=False, asOriginal=False, allowOverlaps=False, \
                                                  ignoreEmpty=True):
     ensurePathExists(zipFn)
     zipFile = ZipFile(zipFn, 'w')
     for region in regionList:
         fn = os.path.dirname(zipFn) + os.sep + str(region).replace(':','_')
         okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \
                            globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \
                            allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty)
         if okFn:
             zipFile.write(okFn, os.path.basename(okFn))
             os.remove(okFn)
     zipFile.close()
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:    
                assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' %  (oldPath, newPath)
    def convertSumstat(cls, inFn, outFn, rsidDict, shouldLogTransform, valueFilter=None):
        """
        Converts a sumstat primary track to a valued point track.
        The sumstat track must be formatted as defined in the summary statistic file definition of Bulik-Sullivan et
        al., (2015). See specification at https://github.com/bulik/ldsc/wiki/Summary-Statistics-File-Format

        :param inFn: path to original track
        :param outFn: path to new track
        :param rsidDict: dictionary for mapping of rsids to reference genome
        :param valueFilter: upper threshold for p/z values in original track
        """
        from math import log

        ensurePathExists(outFn)
        inFile = open(inFn, 'r')
        outFile = open(outFn, 'w')

        # Find columns
        inFileLines = [x.strip().split('\t') for x in inFile.readlines()]
        colNames = [col.upper() for col in inFileLines[0]]
        valueColNum = colNames.index(cls.P) if cls.P in colNames else colNames.index(cls.Z)
        idColNum = colNames.index(cls.SNP)

        # Track header information
        outFile.write('##track type: valued points\n')
        outFile.write("##1-indexed: False\n")
        outFile.write('###' + '\t'.join(cls.GTRACK_COLS) + '\n')

        # Convert each line
        for cols in inFileLines[1:]:

            rsid = cols[idColNum]
            value = cols[valueColNum]

            if not valueFilter or float(value) <= valueFilter:

                if shouldLogTransform:
                    try:
                        """Convert values to -log(pval), the values of GWAS Catalog SNPs"""
                        value = str(-log(float(value)))
                    except:
                        """For SNPs with reported p-value of 0.000, assume high significance"""
                        value = str(-log(0.0005))

                seq, pos = RsidMapper.getPosition(rsid, rsidDict)
                if seq and pos:
                    outFile.write('\t'.join([seq, pos, rsid, value]) + '\n')

        inFile.close()
        outFile.close()
    def _downloadFiles(cls, url, basePath, fileType, allowedUnpackedSuffixes):

        #if os.path.exists(basePath):
        #    sys.stderr.write("Genome sequence path already exists: %s. Exiting..." % basePath)
        #    return


        fn = basePath +"/"

        if not url.split('.')[-1].lower() in allowedUnpackedSuffixes + ['tar','tgz','gz','zip']:
            urlinfo = str(urllib2.urlopen(url).info())
            if urlinfo.find('filename=') >0:
                fn+= urlinfo.split('filename=')[-1].replace(';','\n').split('\n')[0].strip()
            else:
                sys.stderr.write("Not a supported file format. File must end with: %s tar tgz tar.gz zip gz") \
                    % ' '.join(allowedUnpackedSuffixes)
                raise InvalidFormatError
        else:
            fn+=url.split("/")[-1]

        ensurePathExists(fn)
        urllib.urlretrieve(url, fn)

        if any(url.lower().endswith(x) for x in allowedUnpackedSuffixes):
            print fileType + " file"

        elif any(url.lower().endswith(x) for x in [".tar",".tgz",".tar.gz"]):
            print "tar file"
            te=tarfile.open(fn)
            te.extractall(path=basePath)
            te.close()
            os.remove(fn)

        elif url.lower().endswith(".zip"):
            print "zip file"
            sourceZip = zipfile.ZipFile(fn, 'r')
            sourceZip.extractall(path=basePath)
            sourceZip.close()
            os.remove(fn)

        elif url.lower().endswith(".gz"):
            print "gz file"
            f = gzip.open(fn, 'rb')
            retfn=fn[0:fn.rfind(".")]#Renames file except last part, ".gz"?
            resfile=open(retfn, "wb")
            for i in f:
                resfile.write(i)
            resfile.close()
            os.remove(fn)
 def _writeRawData(self, resDictKey, fn):
     ensurePathExists(fn)
     outF = open(fn,'w')
     
     rawData = self._getRawData(resDictKey, False)
     if self.maxRawDataPoints is None or len(rawData) <= self.maxRawDataPoints:
         if type(rawData) in [list, tuple, numpy.ndarray] and len(rawData)>0 and type(rawData[0]) in [int,float,numpy.int32,numpy.float,numpy.float32, numpy.float64, numpy.float128, numpy.ndarray]:
             if type(rawData) == tuple:
                 for npArr in rawData:
                     print>>outF, ','.join([str(x) for x in npArr])
             else:
                 outF.write( os.linesep.join([str(x) for x in rawData]) )
         else:
             outF.write( str(rawData) )
     outF.close()
def renameStdTrack(genome, oldTn, newTn):
    oldPath = createOrigPath(genome, oldTn)
    assert os.path.exists(
        oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath

    print '(renaming track in stdTracks..)'
    newPath = createOrigPath(genome, newTn)
    if not ONLY_SIMULATION:
        assert not os.path.exists(
            newPath), 'ERROR: Target path already exists: ' + newPath
        ensurePathExists(newPath)
        print 'Moving from %s to %s' % (oldPath, newPath)
        shutil.move(oldPath, newPath)
    else:
        print 'Would move %s to %s' % (oldPath, newPath)
Beispiel #42
0
 def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                                  globalCoords=False, asOriginal=False, allowOverlaps=False, \
                                                  ignoreEmpty=True):
     ensurePathExists(zipFn)
     zipFile = ZipFile(zipFn, 'w')
     for region in regionList:
         fn = os.path.dirname(zipFn) + os.sep + str(region).replace(
             ':', '_')
         okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \
                            globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \
                            allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty)
         if okFn:
             zipFile.write(okFn, os.path.basename(okFn))
             os.remove(okFn)
     zipFile.close()
Beispiel #43
0
    def printLinkToCallGraph(self, id, galaxyFn, prune=True):
        statsFile = GalaxyRunSpecificFile(id + ['pstats.dump'], galaxyFn)
        dotFile = GalaxyRunSpecificFile(id + ['callGraph.dot'], galaxyFn)
        pngFile = GalaxyRunSpecificFile(id + ['callGraph.png'], galaxyFn)

        ensurePathExists(statsFile.getDiskPath())

        self._stats.dump_stats(statsFile.getDiskPath())
        stats = OverheadStats(statsFile.getDiskPath())
        stats.writeDotGraph(dotFile.getDiskPath(), prune=prune)
        stats.renderGraph(dotFile.getDiskPath(), pngFile.getDiskPath())

        print str(HtmlCore().link(
            'Call graph based on profiling (id=%s)' % ':'.join(id),
            pngFile.getURL()))
Beispiel #44
0
 def getHiCFileFromTargetBins(targetBins, galaxyFn):
     from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile
     staticFile = GalaxyRunSpecificFile(['PEI_regions.bed'], galaxyFn)
     fn = staticFile.getDiskPath()
     from quick.util.CommonFunctions import ensurePathExists
     ensurePathExists(fn)
     f = open(fn, 'w')
     import os
     for region in targetBins:
         chrom = region[0]
         start = region[1]
         end = region[2]
         f.write('\t'.join([chrom, str(start), str(end)]) + os.linesep)
     f.close()
     return staticFile
    def liftOverGTrack(cls, inFn, outFn, rsidDict):
        """
        Liftover for primary point tracks. The tracks must have a column 'id', with the rsid of the SNPs in each row.
        In addition, 'seqid' and 'start' is needed in the original tracks, as these columns will be the only ones
        modified for each track element.

        :param inFn: path to original track
        :param outFn: path to new track
        :param rsidDict: dictionary for mapping of rsids to reference genome
        """
        ensurePathExists(outFn)
        inFile = open(inFn, 'r')
        outFile = open(outFn, 'w')

        rsidCol = 0
        seqCol = 0
        startCol = 0

        # Lift over each line
        for line in inFile.readlines():
            if line.startswith('###'):
                cols = line[3:].strip().split('\t')
                rsidCol = cols.index(cls.RSID)
                seqCol = cols.index(cls.SEQID)
                startCol = cols.index(cls.POS)

            if line.startswith("##1-indexed:"):
                """
                The rsID-mapping is based on the dbSNP positions, which are 0-indexed.
                We need to make sure this attribute is correctly set in our tracks.
                """
                outFile.write("##1-indexed: False\n")

            elif line.startswith('#'):
                outFile.write(line)

            else:
                cols = line.strip().split('\t')
                rsid = cols[rsidCol]
                seq, pos = RsidMapper.getPosition(rsid, rsidDict)
                if seq and pos:
                    cols[seqCol] = str(seq)
                    cols[startCol] = pos
                    outFile.write('\t'.join(cols) + '\n')

        inFile.close()
        outFile.close()
 def downloadGenomeSequence(cls, abbrv, url):
     basePath = cls.getBasePath(abbrv)
     if os.path.exists(basePath):
         sys.stderr.write("Genome sequence path already exists: %s. Exiting..." % basePath)
         return
         
     
     fn = basePath +"/"
     
     if not url.split('.')[-1].lower() in ['fa','fasta','tar','tgz','gz','zip']:
         urlinfo = str(urllib2.urlopen(url).info())
         if urlinfo.find('filename=') >0:
             fn+= urlinfo.split('filename=')[-1].replace(';','\n').split('\n')[0].strip()
         else:
             sys.stderr.write("Not a supported file format. File must end with: fa fasta tar tgz tar.gz zip gz")
             raise InvalidFormatError
     else:
         fn+=url.split("/")[-1]
     
     ensurePathExists(fn)
     urllib.urlretrieve(url, fn)
     
     if url.lower().endswith(".fa") | url.lower().endswith(".fasta"):
         print "fasta file"
     elif url.lower().endswith(".tar") | url.lower().endswith(".tgz") | url.lower().endswith(".tar.gz"):
         print "tar file"
         te=tarfile.open(fn)
         te.extractall(path=basePath)
         te.close()
         os.remove(fn)
     elif url.lower().endswith(".zip"):
         print "zip file"
         sourceZip = zipfile.ZipFile(fn, 'r')
         sourceZip.extractall(path=basePath)
         sourceZip.close()
         os.remove(fn)
     elif url.lower().endswith(".gz"):
         print "gz file"
         f = gzip.open(fn, 'rb')
         retfn=fn[0:fn.rfind(".")]#Renames file except last part, ".gz"?
         resfile=open(retfn, "wb")
         for i in f:
             resfile.write(i)
         resfile.close()
         os.remove(fn)
 def getFileFromTargetBins(targetBins, galaxyFn, subDirId=None):
     staticFile = GalaxyRunSpecificFile(
         ([subDirId] if subDirId else []) + ['intersected_regions.bed'],
         galaxyFn)
     fn = staticFile.getDiskPath()
     from quick.util.CommonFunctions import ensurePathExists
     ensurePathExists(fn)
     f = open(fn, 'w')
     for region in targetBins:
         tmp = region[0].split(':')
         chrom = tmp[0]
         tmp2 = tmp[1].split('-')
         start = tmp2[0]
         end = tmp2[1]
         tfs = region[1]
         f.write('\t'.join([chrom, str(start), str(end), tfs]) + os.linesep)
     f.close()
     return staticFile
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + (
                'with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps'
                                                if allowOverlaps else
                                                ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:
                assert not os.path.exists(
                    newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' % (oldPath, newPath)
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        print 'Executing... starting to remove ' + choices[0] + os.linesep

        paths = [NONSTANDARD_DATA_PATH, ORIG_DATA_PATH, PARSING_ERROR_DATA_PATH, NMER_CHAIN_DATA_PATH] +\
                 [createDirPath('', '', allowOverlaps=x) for x in [False, True]]
        
        for p in paths:
            genome = choices[0]
            origPath = os.sep.join([ p, genome ])
            trashPath = os.sep.join([ p, ".trash", genome ])

            if os.path.exists(origPath):
                print 'Moving ' + genome + ' to .trash in folder: ' + p + os.linesep
                ensurePathExists(trashPath)
                shutil.move(origPath, trashPath)
    def _writeContent(self, fn, resDictKey, header, coreCls):
        ensurePathExists(fn)
        outFile = open(fn,'w')

        core = coreCls()
        core.begin()
        core.bigHeader(header)
        core.header(self.HEADER)

        matrixDict = self._getRawData(resDictKey)
        matrix, rownames, colnames, rowOrder, colOrder = [copy(matrixDict.get(x)) for x in \
                                                          [self.MATRIX_VALUE_KEY,'Rows','Cols', 'RowOrder', 'ColOrder']]
#        matrix, rownames, colnames, rowOrder, colOrder = [matrixDict.get(x) for x in \
#                                                          [self.MATRIX_VALUE_KEY,'Rows','Cols', 'RowOrder', 'ColOrder']]
        
#        print matrix, rownames, colnames, rowOrder, colOrder
    
        if matrix is None:
            raise SilentError
    
        if rowOrder is not None:
            rownames = rownames[rowOrder]
            matrix = matrix[rowOrder]

        if colOrder is not None:
            colnames = colnames[colOrder]
            matrix = matrix[:,colOrder]

        core.tableHeader([''] + [ str( coreCls().textWithHelp(baseText, helpText) ) for baseText, helpText in \
                          [self._results.getLabelHelpPair(col) for col in colnames] ], sortable=True)
        for i,row in enumerate(matrix):
            core.tableLine([ str( coreCls().textWithHelp(*self._results.getLabelHelpPair(rownames[i])) ) ] +\
                           [ strWithStdFormatting( row[i] ) for i in xrange(len(row)) ])
            # In order for the memory usage and handling time not to explode for large tables
            outFile.write( str(core) )
            core = coreCls()
        core.tableFooter()

        core.end()
        outFile.write( str(core) )
        outFile.close()
 def execute(cls, choices, galaxyFn=None, username=''):
     from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource
     from gold.origdata.GtrackComposer import StdGtrackComposer
     genome = choices[0]
     if choices[1] == 'Track':
         trackName = choices[2].split(':')
     else:
         trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices[2].split(':'))
         
     outFn = galaxyFn
     if choices[4] == 'Write to Standardised file':
         outFn = createOrigPath(genome, choices[-1].split(':'), 'collapsed_result.bedgraph')
         ensurePathExists(outFn[:outFn.rfind('/')+1])
            
     threshold = choices[3]
     analysisDef = 'dummy [threshold=%s] -> ForEachSegmentDistToNearestInSameTrackStat' % threshold #'Python'
     res = GalaxyInterface.runManual([trackName], analysisDef, '*', '*', genome, username=username, \
                                     printResults=False, printHtmlWarningMsgs=False)
             
     tvGeSource = TrackViewListGenomeElementSource(genome, [x['Result'] for x in res.values()], trackName)    
     StdGtrackComposer(tvGeSource).composeToFile(outFn)
    def parseFileIntoPointTrack(cls, inFn, outFn, ldDict, rsidDict):
        """
        Loops through a primary track and creates a new linked point track for the given track elements.
        The primary track must have the column header 'snps', whose column elements are rsids.

        :param inFn: Path to original track
        :param outFn: Path to new linked point track (LD graph)
        :param ldDict: Master LD dictionary
        :param edgeDir: Boolean parameter of whether or not the graph is undirected
        :return:
        """
        from quick.util.CommonFunctions import ensurePathExists
        ensurePathExists(outFn)
        inFile = open(inFn, 'r')
        outFile = open(outFn, 'w')
        rsids = cls.getUniqueRsids(inFile)
        expansionDict = CreateLDTrack.getExpansionDict(rsids, ldDict)
        outFile.write(CreateLDTrack.formatPointTrack(expansionDict, rsidDict, rsids))

        inFile.close()
        outFile.close()
    def _writeContent(self, fn, header):
        #core = HtmlCore()
        
        #core.begin()
        #core.bigHeader(header)
        #core.header('Local result table')
        text = ''
        if len( self._results.getAllRegionKeys() ) > MAX_LOCAL_RESULTS_IN_TABLE:
            text += 'Local results were not printed because of the large number of bins: ' \
                  + str(numUserBins) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE)
        else:
            #core.tableHeader([ str( HtmlCore().textWithHelp(baseText, helpText) ) for baseText, helpText in 
            #                  ([('Region','')] + self._results.getLabelHelpPairs()) ])
            
            for regionKey in self._results.getAllRegionKeys():
                text += '\t'.join([str(regionKey)] +\
                    [ strWithStdFormatting( self._results[regionKey].get(resDictKey) ) \
                     for resDictKey in self._results.getResDictKeys() ]) + os.linesep
            #core.tableFooter()

        #core.end()
        
        ensurePathExists(fn)        
        open(fn,'w').write( text )
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)    
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0
        
        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region)
                
                lastRegion = None
                tempContents[br.region.chr] = OrderedDict() #sorteddict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region))
            
            if len(br.region) < 1:
                raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region)
                
            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount))
            
            startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount
            
            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0)
            
            lastRegion = br.region
        
        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]
                    
                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0)
                
                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr
        
        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys())))
        
        ensurePathExists(self._fn)
        
        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values()))
        
        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()
        
        while not self.fileExists():
            from gold.application.LogSetup import logMessage
            logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn)
            import time
            time.sleep(0.2)
Beispiel #55
0
 def getDiskPath(self, ensurePath=False):
     fn = os.sep.join( [STATIC_PATH] + self._id)
     if ensurePath:
         ensurePathExists(fn)
     return fn
 def composeToFile(self, fn, ignoreEmpty=False, **kwArgs):
     ensurePathExists(fn)
     f = open(fn, 'w')
     ok = self._composeCommon(f, ignoreEmpty, **kwArgs)
     f.close()
     return ok
 def _writeResultObject(self, resDictKey, fn):
     if self._plotResultObject is not None:
         ensurePathExists(fn)
         from gold.application.RSetup import r
         r('function(x, fn) {dput(x, fn)}')(self._plotResultObject, fn)
 def getTableData(self, genome, trackName):
     webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_outputType', trackName[-1]) )
     response = mechanize.urlopen(paramForm.click('hgta_doTopSubmit'))
     fn = createCollectedPath(genome, trackName, 'fromUcsc.'+trackName[-1])
     ensurePathExists(fn)
     open(fn,'w').write(response.read())    
import os, shutil
import sys
from quick.util.CommonFunctions import ensurePathExists, getUniqueWebPath
from quick.aux.CustomFuncCatalog import makeLowercaseName2NameShelfFromTnSubTypes, createShelvesBehindRankedGeneLists
from config.Config import STATIC_PATH

if len(sys.argv) != 8:
    print 'Usage: python createGoogleMapType.sh mapId genome rowTrackName colTrackName col2GeneListFn galaxyId countType'
    sys.exit(0)

mapId, genome, rowTrackName, colTrackName, col2GeneListFn, galaxyId, countType = [sys.argv[x] for x in [1,2,3,4,5,6,7]]

googleMapsCommonDir = '/'.join([STATIC_PATH, 'maps', 'common'])
googleMapsMapIdDir = '/'.join([googleMapsCommonDir, mapId])

ensurePathExists(googleMapsMapIdDir + '/test')

makeLowercaseName2NameShelfFromTnSubTypes(genome, rowTrackName, '/'.join([googleMapsMapIdDir, 'rowLowerCaseName2Name.shelf']))
makeLowercaseName2NameShelfFromTnSubTypes(genome, colTrackName, '/'.join([googleMapsMapIdDir, 'colLowerCaseName2Name.shelf']))

rowBaseTrackNameFile = open('/'.join([googleMapsMapIdDir, 'rowBaseTrackName.txt']), 'w')
colBaseTrackNameFile = open('/'.join([googleMapsMapIdDir, 'colBaseTrackName.txt']), 'w')

rowBaseTrackNameFile.write(rowTrackName + '\n')
colBaseTrackNameFile.write(colTrackName + '\n')

rowBaseTrackNameFile.close()
colBaseTrackNameFile.close()

if col2GeneListFn != 'None':
    shutil.copy(col2GeneListFn, '/'.join([googleMapsMapIdDir, 'col2GeneList.shelf']))
 def _writeRawData(self, resDictKey, fn):
     GraphicsPresenter._writeRawData(self, resDictKey, fn)
     if self._returnDict.get(resDictKey) is not None:
         ensurePathExists(fn)
         open(fn,'a').write(os.linesep + 'Return: ' + str(self._returnDict[resDictKey]))