Exemple #1
0
    def run_on_extracted_variables(cls, ts, analysisBins, choices_numberOfTimesToRandomize, choices_randAlg,
                                   choices_randType, galaxyFn, genome):
        assert choices_numberOfTimesToRandomize==1 #For now, since ts probably needs to be unique each time..

        outputGSuite = GSuite()
        for i in range(0, int(choices_numberOfTimesToRandomize)):
            print("TF leaf nodes")
            print(ts.getLeafNodes())
            randTvProvider = cls._createTrackViewProvider(ts, analysisBins, genome, choices_randAlg, choices_randType,
                                                          False, None)  # the last False and non are temporary..
            randomizedTs = getRandomizedVersionOfTs(ts, randTvProvider)

            # output files
            print("Leaf nodes")
            print(randomizedTs.getLeafNodes())
            for j, singleTrackTs in enumerate(randomizedTs.getLeafNodes()):
                uri = "outputfile"

                uri = FileGSuiteTrack.generateURI(path='/home/ivargry/outfile_' + str(randint(0,999999999)) + '_' + str(j) + ".bed", suffix='bed', doQuote=False)
                print("URI: " + uri)

                title = singleTrackTs.metadata.pop('title')
                gSuiteTrack = FileGSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments',
                                          genome=genome, attributes=singleTrackTs.metadata)
                outputGSuite.addTrack(gSuiteTrack)
                singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path
                singleTrackTs.metadata['randomization_run'] = i

            spec = AnalysisSpec(TsWriterStat)

            res = doAnalysis(spec, analysisBins, randomizedTs)
        assert galaxyFn != ""
        GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.util.CommonFunctions import getFileSuffix
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import registerGSuiteTrackClass, GSuiteTrack, FileGSuiteTrack

        registerGSuiteTrackClass(FileGSuiteTrack)

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        outGSuite = GSuite()

        for track in gSuite.allTracks():
            path = track.path
            suffix = track.suffix if track.suffix != getFileSuffix(
                path) else ''
            uri = FileGSuiteTrack.generateURI(path=path, suffix=suffix)

            newTrack = GSuiteTrack(uri,
                                   title=track.title,
                                   trackType=track.trackType,
                                   genome=track.genome,
                                   attributes=track.attributes)

            outGSuite.addTrack(newTrack)

        GSuiteComposer.composeToFile(outGSuite, galaxyFn)
Exemple #3
0
    def execute(cls, choices, galaxyFn=None, username=''):

        basisTrackNameAsList = choices.basisTrack.split(':')
        extractionOptions = dict(
            GalaxyInterface.getTrackExtractionOptions(choices.genome,
                                                      basisTrackNameAsList))
        extractionFormat = extractionOptions[
            choices.extFormatLbl] if extractionOptions else None

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        outGSuite = GSuite()
        for gsTrack in gsuite.allTracks():
            # outputTrackFn = cls.extraGalaxyFn[gsTrack.title]
            # print '<br>\n<br>\n output track filename: ', outputTrackFn
            # print 'path: ', gsTrack.path
            # print 'parsed uri: ', gsTrack._parsedUri
            newTrackFileName = gsTrack.title + '.' + extractionFormat
            outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn(
                galaxyFn, newTrackFileName)
            ensurePathExists(outGalaxyFn)
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn,
                                                extraFileName=newTrackFileName)
            GalaxyInterface.parseExtFormatAndExtractTrackManyBins(
                choices.genome, basisTrackNameAsList, gsTrack.suffix,
                gsTrack.path, True, choices.extFormatLbl, outGalaxyFn)

            outGSuite.addTrack(
                GSuiteTrack(uri,
                            title=gsTrack.title,
                            fileFormat=gsTrack.fileFormat,
                            trackType=gsTrack.trackType,
                            genome=choices.genome,
                            attributes=gsTrack.attributes))

        GSuiteComposer.composeToFile(outGSuite, galaxyFn)
Exemple #4
0
    def generateSynGSuite(cls, dataOut, galaxyFn, genome):
        outGSuite = GSuite()
        g = SimulationPointIter()
        newData = ''
        chrNum = 0
        for chr in dataOut:

            # fileName = 'syn-chr' + 'iInterR-' + str(chr[0]) + 'st-' + str(chr[1]) + 'end-' + str(
            #     chr[2]) + 'iInterR-' + str(chr[3]) + 'iIntraR-' + str(chr[4]) + 'prob-' + str(chr[5]) + '--' + str(
            #     chrNum)

            fileName = 'syn-' + str(chr[0]) + ',' + str(chr[1]) + ',' + str(chr[2]) + ',' + str(chr[3]) +',' + str(chr[4]) +',' + str(chr[5])

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn,
                                                extraFileName=fileName,
                                                suffix='bed')

            gSuiteTrack = GSuiteTrack(uri)
            outFn = gSuiteTrack.path
            ensurePathExists(outFn)

            g.createChrTrack(genome, chr[0], PointIter, outFn, chr[3], chr[4], chr[5], chr[1], chr[2])

            with open(outFn, 'r') as outputFile:
                newData += ''.join(outputFile.readlines())

            chrNum += 1

            if chrNum == len(dataOut):
                with open(outFn, 'w') as outputFile:
                    outputFile.write(newData)
                outGSuite.addTrack(GSuiteTrack(uri, title=''.join(fileName), genome=genome))
        GSuiteComposer.composeToFile(outGSuite, cls.extraGalaxyFn['synthetic GSuite'])
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        attrName = choices.attrName

        outputGSuite = GSuite()

        #Writes the information that has been changed to a new track but jumps over all the empty strings
        for i, track in enumerate(gSuite.allTracks()):
            if i < cls.MAX_NUM_OF_TRACKS:
                newAttrValue = getattr(choices,
                                       'selectAttribute%s' % ((i * 2) + 1))
                if (attrName == TITLE_COL):
                    track.title = newAttrValue
                else:
                    track.setAttribute(attrName, newAttrValue)
            outputGSuite.addTrack(track)

        #Creates the new GSuite
        GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def addResultsToInputGSuite(gsuite, results, attrNames, outputGSuiteFN):
    '''
    Add the values from the analysis results as metadata columns and create a new GSuite.
    If the new attribute names in attrNames already exist as metadata columns in the GSuite,
    attributes with added an appropriate _[index] will be added.
    '''
    assert isinstance(attrNames, (list, tuple)), 'attrNames must be of type list or tuple: %s' % str(attrNames)
    newAttrNames = []
    for attrName in attrNames:
        newAttrNames.append(_updateAttrNameWithIndexIfDuplicate(gsuite, attrName))
    outGSuite = GSuite()
    for gsTrack in gsuite.allTracks():
        currentTrackRes = results[gsTrack.title]
        if len(newAttrNames) == 1:
            if isinstance(currentTrackRes, (list, tuple)):
                if currentTrackRes[0]:
                    gsTrack.setAttribute(newAttrNames[0], str(currentTrackRes[0]))
            else:
                if currentTrackRes:
                    gsTrack.setAttribute(newAttrNames[0], str(currentTrackRes))
        else:
            assert isinstance(currentTrackRes,
                              (list, tuple)), 'Expected multiple results per track. Attribute names %s' % str(attrNames)
            for i, resultVal in enumerate(currentTrackRes):
                if resultVal:
                    gsTrack.setAttribute(newAttrNames[i], str(resultVal))
        outGSuite.addTrack(gsTrack)
    GSuiteComposer.composeToFile(outGSuite, outputGSuiteFN)
    def execute(cls, choices, galaxyFn=None, username=''):
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from quick.application.ExternalTrackManager import ExternalTrackManager

        genome = choices.genome if choices.selectGenome == 'Yes' else None
        selectedHistories = [
            key for key, val in choices.history.iteritems() if val is not None
        ]

        gSuite = GSuite()

        for histGalaxyId in selectedHistories:
            galaxyTrackName = choices.history[histGalaxyId].split(':')

            histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN(
                galaxyTrackName)
            histName = ExternalTrackManager.extractNameFromHistoryTN(
                galaxyTrackName)
            histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                galaxyTrackName)

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn,
                                                suffix=histSuffix)
            gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome))

        GSuiteComposer.composeToFile(gSuite, galaxyFn)
Exemple #8
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack
        from quick.application.UserBinSource import GlobalBinSource
        from quick.extra.ProgressViewer import ProgressViewer
        from quick.extra.TrackExtractor import TrackExtractor

        genome = choices.genome
        fullGenomeBins = GlobalBinSource(genome)
        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        progressViewer = ProgressViewer(
            [(cls.PROGRESS_PROCESS_DESCRIPTION, len(gSuite))], galaxyFn)

        outGSuite = GSuite()
        hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'storage', datasetInfo=choices.gsuite)]

        fileNameSet = set()
        for track in gSuite.allTracks():
            fileName = cls._getUniqueFileName(fileNameSet, track.trackName)
            title = track.title
            attributes = track.attributes
            fi = cls._getFileFormatInfo(choices, gSuite, genome, track)

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=hiddenStorageFn,
                                                extraFileName=fileName,
                                                suffix=fi.suffix)

            gSuiteTrack = GSuiteTrack(uri,
                                      title=title,
                                      genome=genome,
                                      attributes=attributes)

            TrackExtractor.extractOneTrackManyRegsToOneFile(
                track.trackName,
                fullGenomeBins,
                gSuiteTrack.path,
                fileFormatName=fi.fileFormatName,
                globalCoords=True,
                asOriginal=fi.asOriginal,
                allowOverlaps=fi.allowOverlaps)

            outGSuite.addTrack(gSuiteTrack)
            progressViewer.update()

        primaryFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'primary', datasetInfo=choices.gsuite)]
        GSuiteComposer.composeToFile(outGSuite, primaryFn)
Exemple #9
0
    def execute(cls, choices, galaxyFn=None, username=''):
        """
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results
        page in Galaxy history. If getOutputFormat is anything else than
        'html', the output should be written to the file with path galaxyFn.
        If needed, StaticFile can be used to get a path where additional
        files can be put (cls, e.g. generated image files). choices is a list
        of selections made by web-user in each options box.

        Mandatory unless isRedirectTool() returns True.
        """
        #TODO: add functionality for single gtrack within-track randomization

        print 'Executing...'

        inputGsuite = getGSuiteFromGalaxyTN(choices.gs)
        outputGSuite = GSuite()
        genome = inputGsuite.genome
        ts = factory.getFlatTracksTS(genome, choices.gs)
        randIndex = 0
        bins = GlobalBinSource(genome)

        if choices.randType == TsRandAlgReg.BETWEEN_TRACKS_CATEGORY and \
                choices.category not in [None, 'None']:
            ts = ts.getSplittedByCategoryTS(choices.category)
            randomizedTs = TrackStructureV2()
            for subTsKey, subTs in ts.items():
                tvProvider = cls.createTrackViewProvider(
                    choices, subTs, bins, genome)
                randomizedTs[subTsKey] = getRandomizedVersionOfTs(
                    subTs, tvProvider, randIndex)
            randomizedTs = randomizedTs.getFlattenedTS()
        else:
            tvProvider = cls.createTrackViewProvider(choices, ts, bins, genome)
            randomizedTs = getRandomizedVersionOfTs(ts, tvProvider, randIndex)

        for singleTrackTs in randomizedTs.getLeafNodes():
            uri = GalaxyGSuiteTrack.generateURI(
                galaxyFn=galaxyFn,
                extraFileName=os.path.sep.join(singleTrackTs.track.trackName) +
                '.randomized',
                suffix='bed')

            title = singleTrackTs.metadata.pop('title')
            gSuiteTrack = GSuiteTrack(uri,
                                      title=title + '.randomized',
                                      fileFormat='primary',
                                      trackType='segments',
                                      genome=genome,
                                      attributes=singleTrackTs.metadata)
            outputGSuite.addTrack(gSuiteTrack)
            singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path

        spec = AnalysisSpec(TsWriterStat)
        res = doAnalysis(spec, bins, randomizedTs)
        GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
    def testComposeLocalUrlGenomeAttributesNonAscii(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('galaxy:/12345abc',
                        genome=u'hg18ø',
                        attributes=OrderedDict([('one', 'yes')])))
        gSuite.addTrack(
            GSuiteTrack('file:/path/to/file2',
                        genome='hg19',
                        attributes=OrderedDict([('two', u'nø')])))
        gSuite.addTrack(
            GSuiteTrack(
                HbGSuiteTrack.generateURI(trackName=[u'track', u'nøme']),
                genome='hg38',
                attributes=OrderedDict([('two', 'yes')])))
        gSuite.setCustomHeader('my header', u'bø!')

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: local\n' \
            '##file format: unknown\n' \
            '##track type: unknown\n' \
            '##genome: multiple\n' \
            '##my header: b%C3%B8!\n' \
            '###uri\ttitle\tfile_format\tgenome\tone\ttwo\n' \
            'galaxy:/12345abc\t12345abc\tunknown\thg18%C3%B8\tyes\t.\n' \
            'file:/path/to/file2\tfile2\tunknown\thg19\t.\tn%C3%B8\n' \
            'hb:/track/n%C3%B8me\tn%C3%B8me\tpreprocessed\thg38\t.\tyes\n'

        self.assertEquals(targetOutput, output)
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack

        inGSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        outGSuite = GSuite()

        newSuffix = cls._getNewSuffixIfAny(choices)

        for track in cls._getSelectedTracks(choices, inGSuite):
            title, suffix = cls._getExportTrackTitleAndSuffix(track, newSuffix)
            trackGalaxyFn = cls.extraGalaxyFn[title]

            if choices.changeFormat == cls.OUTPUT_FORMAT_CONVERT:
                from gold.origdata.FileFormatComposer import getComposerClsFromFileFormatName
                geSource = track.getGenomeElementSource(printWarnings=False)
                composerCls = getComposerClsFromFileFormatName(
                    choices.outputFormat)
                composer = composerCls(geSource)
                composer.composeToFile(trackGalaxyFn)
            else:
                shutil.copy(track.path, trackGalaxyFn)

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=trackGalaxyFn,
                                                suffix=suffix)
            gSuiteTrack = GSuiteTrack(uri,
                                      title=title,
                                      genome=track.genome,
                                      attributes=track.attributes)
            outGSuite.addTrack(gSuiteTrack)

        GSuiteComposer.composeToFile(outGSuite, galaxyFn)
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        from gold.gsuite.GSuiteArchiver import ArchiveToGalaxyGSuiteTrackIterator, \
                                               convertArchiveToGSuite
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from quick.gsuite.GSuiteHbIntegration import \
            writeGSuiteHiddenTrackStorageHtml
        from quick.extra.ProgressViewer import ProgressViewer

        from quick.util.debug import DebugUtil
        # DebugUtil.insertBreakPoint(username=username, currentUser='******')

        archive = cls._getArchiveReader(choices)
        trackCount = sum(1 for file in archive)

        desc = cls._getOutputHistoryDescription(choices)
        progressViewer = ProgressViewer([('Extract tracks', trackCount)],
                                        galaxyFn)

        storeHierarchy = choices.storeHierarchy == 'Yes'
        hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'storage', desc)]
        archiveToGSuiteTrackIter = \
            ArchiveToGalaxyGSuiteTrackIterator(archive, hiddenStorageFn, storeHierarchy)
        writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)

        gSuite = convertArchiveToGSuite(archiveToGSuiteTrackIter,
                                        progressViewer)

        GSuiteComposer.composeToFile(
            gSuite,
            cls.extraGalaxyFn[getGSuiteHistoryOutputName('primary', desc)])
    def testEmptyCompose(self):
        gSuite = GSuite()

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: unknown\n' \
            '##file format: unknown\n' \
            '##track type: unknown\n' \
            '##genome: unknown\n'

        self.assertEquals(targetOutput, output)
Exemple #14
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        genome = choices.genome
        geSource = etm.getGESourceFromGalaxyOrVirtualTN(choices.track, genome)
        #hiddenStorageFn = cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE]
        hiddenStorageFn = galaxyFn
        composerCls = cls._getComposerCls(choices)
        valAttr = choices.attr

        gSuite = createGalaxyGSuiteBySplittingInputFileOnAttribute\
            (hiddenStorageFn, geSource, genome, composerCls, valAttr)

        GSuiteComposer.composeToFile(gSuite, galaxyFn)
    def testFullCompose(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed',
                        title='Track',
                        attributes=OrderedDict([('cell', 'k562'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(
            GSuiteTrack('http://server.other.com/path/to/file2.bed',
                        title='Track2',
                        attributes=OrderedDict([('cell', 'GM12878'),
                                                ('antibody', 'cMyc')])))
        gSuite.addTrack(
            GSuiteTrack('https://server.other.com/path/to/file3.bed',
                        attributes=OrderedDict([('cell', 'GM12878'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(
            GSuiteTrack('rsync://server.other.com/path/to/file4;wig',
                        title='Track4',
                        attributes=OrderedDict([('cell', 'NHFL')])))
        gSuite.addTrack(GSuiteTrack('hb:/track/name/hierarchy', title='Track'))
        gSuite.addTrack(
            GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name',
                        title='Track',
                        attributes=OrderedDict([('cell', 'k562'),
                                                ('antibody', 'cMyb')])))
        gSuite.addTrack(
            GSuiteTrack('file:/path/to/file.btrack?track=track:name',
                        title='Track name7',
                        attributes=OrderedDict([('antibody', 'cMyb'),
                                                ('extra', 'yes')])))
        gSuite.setGenomeOfAllTracks('hg18')
        gSuite.setCustomHeader('My header', 'Some value')

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: multiple\n' \
            '##file format: multiple\n' \
            '##track type: unknown\n' \
            '##genome: hg18\n' \
            '##my header: Some value\n' \
            '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \
            'ftp://server.somewhere.com/path/to/file1.bed\tTrack\tprimary\tk562\tcMyb\t.\n' \
            'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \
            'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \
            'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \
            'hb:/track/name/hierarchy\tTrack (2)\tpreprocessed\t.\t.\t.\n' \
            'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack (3)\tpreprocessed\tk562\tcMyb\t.\n' \
            'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n'

        self.assertEquals(targetOutput, output)
Exemple #16
0
    def execute(cls, choices, galaxyFn=None, username=''):
        seqs = [s.strip() for s in choices.seqs.splitlines()]
        trackNameList = []
        for nmer in seqs:
            GalaxyInterface.createNmerTrack(choices.genome, nmer)
            trackNameList.append(
                ['Sequence', 'K-mers',
                 str(len(nmer)) + '-mers', nmer])
        #example trackName = ['Sequence', 'K-mers', '7-mers', 'agagaga']
        outGSuite = GSuite()
        for trackName in trackNameList:
            trackType = TrackInfo(choices.genome,
                                  trackName).trackFormatName.lower()
            hbUri = HbGSuiteTrack.generateURI(trackName=trackName)
            outGSuite.addTrack(
                GSuiteTrack(hbUri,
                            title=' '.join(['Nmer track'] + trackName[-1:]),
                            trackType=trackType,
                            genome=choices.genome))

        GSuiteComposer.composeToFile(outGSuite,
                                     cls.extraGalaxyFn['Kmers GSuite'])
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        from quick.gsuite.GSuiteHbIntegration import getSubtracksAsGSuite
        import gold.gsuite.GSuiteComposer as GSuiteComposer

        genome = choices.genome
        parentTrack = choices.parentTrack.split(':')
        if parentTrack == ['']:
            parentTrack = []

        gSuite = getSubtracksAsGSuite(genome, parentTrack, username)

        GSuiteComposer.composeToFile(gSuite, galaxyFn)
Exemple #18
0
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from quick.application.ExternalTrackManager import ExternalTrackManager

        genome = choices.genome if choices.selectGenome == 'Yes' else None
        selectedHistories = [
            key for key, val in choices.history.iteritems() if val is not None
        ]

        gSuite = GSuite()

        for histGalaxyId in selectedHistories:
            galaxyTrackName = choices.history[histGalaxyId].split(':')

            histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN(
                galaxyTrackName)
            histName = ExternalTrackManager.extractNameFromHistoryTN(
                galaxyTrackName)
            histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(
                galaxyTrackName)

            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn,
                                                suffix=histSuffix)
            gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome))

        GSuiteComposer.composeToFile(gSuite, galaxyFn)
Exemple #19
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        categoryToGenomeElementListDict = defaultdict(list)
        genome = choices.genome
        outputType = choices.outputType
        catTrack = choices.catTrack.split(':')
        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            catTrack, genome)
        for ge in geSource:
            categoryToGenomeElementListDict[ge.val].append(ge)

        for category, genomeElementList in categoryToGenomeElementListDict.iteritems(
        ):
            geSourceWrapper = ListGESourceWrapper(geSource, genomeElementList)
            composer = cls.getComposer(geSourceWrapper, outputType)
            #             staticFile = GalaxyRunSpecificFile(catTrack + [category, outputType], galaxyFn)
            composer.composeToFile(cls.extraGalaxyFn[category])

        outGSuite = GSuite()
        for category, galaxyFileName in OrderedDict([
            (x, cls.extraGalaxyFn[x])
                for x in categoryToGenomeElementListDict.keys()
        ]).iteritems():
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFileName,
                                                suffix=outputType)
            outGSuite.addTrack(GSuiteTrack(uri, title=category, genome=genome))
        GSuiteComposer.composeToFile(
            outGSuite, cls.extraGalaxyFn['GSuite from categorical'])

        print 'Execution done!'
    def execute(cls, choices, galaxyFn=None, username=''):
        inGSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        if choices.genome != inGSuite.genome:
            inGSuite.setGenomeOfAllTracks(choices.genome)

        registerGSuiteTrackClass(FileGSuiteTrack)

        outGSuite = GSuite()

        for track in inGSuite.allTracks():
            origGalaxyTN = ETM.createGalaxyTnFromExternalTn(track.trackName)

            uri = FileGSuiteTrack.generateURI(
                path=ETM.extractFnFromGalaxyTN(origGalaxyTN),
                suffix=ETM.extractFileSuffixFromGalaxyTN(origGalaxyTN))
            newTrack = GSuiteTrack(uri,
                                   title=track.title,
                                   trackType=track.trackType,
                                   genome=track.genome,
                                   attributes=track.attributes)
            outGSuite.addTrack(newTrack)

        GSuiteComposer.composeToFile(outGSuite, galaxyFn)
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuitePreprocessor import GSuitePreprocessor
        from quick.extra.ProgressViewer import ProgressViewer
        from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN

        inGSuite = getGSuiteFromGalaxyTN(choices.history)

        if choices.genome != inGSuite.genome:
            inGSuite.setGenomeOfAllTracks(choices.genome)

        progressViewer = ProgressViewer(
            [('Preprocess tracks', inGSuite.numTracks())], galaxyFn)

        gSuitePreprocessor = GSuitePreprocessor()
        outGSuite, errorGSuite = gSuitePreprocessor.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites\
                                                    (inGSuite, progressViewer)

        #outGSuite, errorGSuite = inGSuite.preProcessAllLocalTracksAndReturnOutputAndErrorGSuites()

        nopreprocFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'nopreprocessed', datasetInfo=choices.history)]
        GSuiteComposer.composeToFile(errorGSuite, nopreprocFn)

        preprocFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
            'preprocessed', datasetInfo=choices.history)]
        GSuiteComposer.composeToFile(outGSuite, preprocFn)
    def testComposeRemoteOnlyUrl(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/path/to/file1'))
        gSuite.addTrack(GSuiteTrack('http://server.other.com/path/to/file2'))

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: remote\n' \
            '##file format: unknown\n' \
            '##track type: unknown\n' \
            '##genome: unknown\n' \
            '###uri\ttitle\n' \
            'ftp://server.somewhere.com/path/to/file1\tfile1\n' \
            'http://server.other.com/path/to/file2\tfile2\n'

        self.assertEquals(targetOutput, output)
    def testParseAndCompose(self):
        inputContents = \
            '##location: multiple\n' \
            '##file format: multiple\n' \
            '##track type: unknown\n' \
            '##genome: hg18\n' \
            '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \
            'ftp://server.somewhere.com/path/to/file1.bed\tTrack1\tprimary\tk562\tcMyb\t.\n' \
            'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \
            'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \
            'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \
            'hb:/track/name/hierarchy\tTrack5\tpreprocessed\t.\t.\t.\n' \
            'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack6\tpreprocessed\tk562\tcMyb\t.\n' \
            'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n'

        gSuite = GSuiteParser.parseLines(inputContents.split('\n'))
        outputContents = GSuiteComposer.composeToString(gSuite)

        self.assertEquals(inputContents, outputContents)
    def testComposeUrlTitleLocationTrackType(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/path/to/file1',
                        title='Track1',
                        trackType='points'))
        gSuite.addTrack(
            GSuiteTrack('file:/path/to/file2', trackType='segments'))

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: multiple\n' \
            '##file format: unknown\n' \
            '##track type: multiple\n' \
            '##genome: unknown\n' \
            '###uri\ttitle\ttrack_type\n' \
            'ftp://server.somewhere.com/path/to/file1\tTrack1\tpoints\n' \
            'file:/path/to/file2\tfile2\tsegments\n'

        self.assertEquals(targetOutput, output)
    def testComposeRemoteUrlGenomeFileFormat(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed',
                        genome='hg18'))
        gSuite.addTrack(
            GSuiteTrack('http://server.other.com/path/to/file2',
                        genome='hg18'))

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: remote\n' \
            '##file format: unknown\n' \
            '##track type: unknown\n' \
            '##genome: hg18\n' \
            '###uri\ttitle\tfile_format\n' \
            'ftp://server.somewhere.com/path/to/file1.bed\tfile1.bed\tprimary\n' \
            'http://server.other.com/path/to/file2\tfile2\tunknown\n'

        self.assertEquals(targetOutput, output)
    def testComposeLocalUrlGenomeAttributes(self):
        gSuite = GSuite()
        gSuite.addTrack(
            GSuiteTrack('galaxy:/12345abc',
                        genome='hg18',
                        attributes=OrderedDict([('one', 'yes')])))
        gSuite.addTrack(
            GSuiteTrack('file:/path/to/file2',
                        genome='hg19',
                        attributes=OrderedDict([('two', 'no')])))

        output = GSuiteComposer.composeToString(gSuite)

        targetOutput = \
            '##location: local\n' \
            '##file format: unknown\n' \
            '##track type: unknown\n' \
            '##genome: multiple\n' \
            '###uri\ttitle\tgenome\tone\ttwo\n' \
            'galaxy:/12345abc\t12345abc\thg18\tyes\t.\n' \
            'file:/path/to/file2\tfile2\thg19\t.\tno\n'

        self.assertEquals(targetOutput, output)
    def execute(cls, choices, galaxyFn=None, username=''):
        if not choices.source:
            return
        source = choices.source.split('[')[0].strip()
        fileTypes = []
        gsm = TrackGlobalSearchModule(cls.useSqlite)
        desc = choices.subCategory

        if choices.source.find('HyperBrowser') == -1:
            #items = gsm.getItems(choices.search,choices.subCategory)
            # # allFileTypes = [x.split('[')[0] for x,selected in prevChoices.filetype.iteritems()]
            # # fileTypes = [x.split('[')[0] for x,selected in prevChoices.filetype.iteritems() if selected]
            ##allDataTypes = [x.split('[')[0].strip() for x,selected in choices.dataType.iteritems()]
            #dataTypes = [x.split('[')[0].strip() for x,selected in choices.dataType.iteritems() if selected]
            dataTypes = [choices.dataType.split('[')[0].strip()]

            ##Was made to speadup so that there will be no filetype comparisons,
            ##but deactivated for now since there is hardcoded filtering in
            ##prevChoices.fileType
            #if len(allFileTypes) == len(fileTypes):
            #    fileTypes = []

        if 'all tracks' in choices.outputType:
            remoteGSuite = gsm.getGSuite(choices.search,
                                         choices.subCategory,
                                         source,
                                         dataTypes,
                                         filterFileSuffix=True)
        elif choices.outputType == 'Select tracks manually':
            remoteGSuite = gsm.getGSuite(choices.search,choices.subCategory,source,dataTypes,\
                                         filterFileSuffix = True,selectedFileIDs = choices.results)
        elif choices.outputType == 'Select 10 random tracks':
            remoteGSuite = gsm.getRandomGSuite(choices.search,choices.subCategory,source,dataTypes,\
                                               filterFileSuffix = True,count = 10)
        elif choices.outputType == 'Select 50 random tracks':
            remoteGSuite = gsm.getRandomGSuite(choices.search,choices.subCategory,source,dataTypes,\
                                               filterFileSuffix = True,count = 50)

        if choices.downloadAndPreprocess == 'Yes' and choices.source.find(
                'HyperBrowser') == -1:
            trackCount = remoteGSuite.numTracks()
            progressViewer = \
                ProgressViewer([('Download tracks', trackCount),
                                ('Preprocess tracks', trackCount)], galaxyFn)

            #from gold.gsuite.GSuiteDownloader import GSuiteMultipleGalaxyFnDownloader
            #gSuiteDownloader = GSuiteMultipleGalaxyFnDownloader()
            #localGSuite, errorLocalGSuite = \
            #    gSuiteDownloader.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites\
            #        (remoteGSuite, progressViewer, cls.extraGalaxyFn)
            from gold.gsuite.GSuiteDownloader import GSuiteSingleGalaxyFnDownloader
            from quick.gsuite.GSuiteHbIntegration import \
                writeGSuiteHiddenTrackStorageHtml

            gSuiteDownloader = GSuiteSingleGalaxyFnDownloader()
            hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName(
                'storage', desc)]
            localGSuite, errorLocalGSuite = \
                gSuiteDownloader.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites \
                    (remoteGSuite, progressViewer, hiddenStorageFn, [])
            writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)

            progressViewer.updateProgressObjectElementCount(
                'Preprocess tracks', localGSuite.numTracks())
            gSuitePreprocessor = GSuitePreprocessor()
            preProcessedGSuite, errorPreProcessGSuite = \
                gSuitePreprocessor.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites\
                    (localGSuite, progressViewer)
            #preProcessedGSuite, errorPreProcessGSuite = localGSuite.preProcessAllLocalTracksAndReturnOutputAndErrorGSuites(progressViewer)
            GSuiteComposer.composeToFile(
                remoteGSuite,
                cls.extraGalaxyFn[getGSuiteHistoryOutputName('remote', desc)])
            GSuiteComposer.composeToFile(
                errorLocalGSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName(
                    'nodownload', desc)])
            GSuiteComposer.composeToFile(
                localGSuite,
                cls.extraGalaxyFn[getGSuiteHistoryOutputName('primary', desc)])
            GSuiteComposer.composeToFile(
                errorPreProcessGSuite,
                cls.extraGalaxyFn[getGSuiteHistoryOutputName(
                    'nopreprocessed', desc)])
            GSuiteComposer.composeToFile(
                preProcessedGSuite,
                cls.extraGalaxyFn[getGSuiteHistoryOutputName(
                    'preprocessed', desc)])

        else:
            GSuiteComposer.composeToFile(remoteGSuite, galaxyFn)
 def __str__(self):
     import gold.gsuite.GSuiteComposer as GSuiteComposer
     return GSuiteComposer.composeToString(self)
class MultiTrackIntersectTool(GeneralGuiTool, GenomeMixin):
    GSUITE_FILE_OPTIONS_BOX_KEYS = ['gSuite']
    ALLOW_UNKNOWN_GENOME = False
    ALLOW_GENOME_OVERRIDE = False
    ALLOW_MULTIPLE_GENOMES = False
    WHAT_GENOME_IS_USED_FOR = 'the output GSuite file' # Other common possibility: 'the analysis'

    FROM_HISTORY_TEXT = 'From history'
    FROM_HYPERBROWSER_TEXT = 'From HyperBrowser repository'

    WITH_OVERLAPS = 'Allow multiple overlapping points/segments within the same track'
    NO_OVERLAPS = 'Merge any overlapping points/segments within the same track'

    GSUITE_ALLOWED_FILE_FORMATS = [GSuiteConstants.PREPROCESSED]
    GSUITE_ALLOWED_LOCATIONS = [GSuiteConstants.LOCAL]
    GSUITE_ALLOWED_TRACK_TYPES = [GSuiteConstants.POINTS,
                                  GSuiteConstants.VALUED_POINTS,
                                  GSuiteConstants.SEGMENTS,
                                  GSuiteConstants.VALUED_SEGMENTS]
    GSUITE_DISALLOWED_GENOMES = [GSuiteConstants.UNKNOWN,
                                 GSuiteConstants.MULTIPLE]

    OUTPUT_TRACKS_SUFFIX = 'bed'
    GSUITE_OUTPUT_LOCATION = GSuiteConstants.LOCAL
    GSUITE_OUTPUT_FILE_FORMAT = GSuiteConstants.PREPROCESSED
    GSUITE_OUTPUT_TRACK_TYPE = GSuiteConstants.SEGMENTS

    OUTPUT_GSUITE_DESCRIPTION = ', intersected'
    PROGRESS_INTERSECT_MSG = 'Intersect tracks'
    PROGRESS_PREPROCESS_MSG = 'Preprocess tracks'

    @staticmethod
    def getToolName():
        '''
        Specifies a header of the tool, which is displayed at the top of the
        page.
        '''
        return "Intersect preprocessed tracks in GSuite with a single track"

    @classmethod
    def getInputBoxNames(cls):
        '''
        Specifies a list of headers for the input boxes, and implicitly also the
        number of input boxes to display on the page. The returned list can have
        two syntaxes:

            1) A list of strings denoting the headers for the input boxes in
               numerical order.
            2) A list of tuples of strings, where each tuple has
               two items: a header and a key.

        The contents of each input box must be defined by the function
        getOptionsBoxK, where K is either a number in the range of 1 to the
        number of boxes (case 1), or the specified key (case 2).

        Note: the key has to be camelCase (e.g. "firstKey")
        '''
        return [('Select GSuite file from history:', 'gSuite')] +\
               cls.getInputBoxNamesForGenomeSelection() +\
               [('Select source of filtering track:', 'trackSource'),
                ('Select track from history:', 'trackHistory'),
                ('Select track:', 'track'),
                ('Overlap handling:', 'withOverlaps')]

    #@staticmethod
    #def getInputBoxOrder():
    #    '''
    #    Specifies the order in which the input boxes should be displayed, as a
    #    list. The input boxes are specified by index (starting with 1) or by
    #    key. If None, the order of the input boxes is in the order specified by
    #    getInputBoxNames.
    #    '''
    #    return None

    @classmethod
    def getOptionsBoxGSuite(cls): # Alternatively: getOptionsBox2()
        '''
        See getOptionsBoxFirstKey().

        prevChoices is a namedtuple of selections made by the user in the
        previous input boxes (that is, a namedtuple containing only one element
        in this case). The elements can accessed either by index, e.g.
        prevChoices[0] for the result of input box 1, or by key, e.g.
        prevChoices.key (case 2).
        '''
        return cls.getHistorySelectionElement('gsuite')

    @classmethod
    def getOptionsBoxTrackSource(cls, prevChoices):
        return [cls.FROM_HISTORY_TEXT, cls.FROM_HYPERBROWSER_TEXT]

    @classmethod
    def getOptionsBoxTrackHistory(cls, prevChoices):
        if prevChoices.trackSource == cls.FROM_HISTORY_TEXT:
            from gold.application.DataTypes import getSupportedFileSuffixesForPointsAndSegments
            return cls.getHistorySelectionElement(*getSupportedFileSuffixesForPointsAndSegments())

    @classmethod
    def getOptionsBoxTrack(cls, prevChoices):
        if prevChoices.trackSource == cls.FROM_HYPERBROWSER_TEXT:
            return cls.TRACK_SELECT_ELEMENT

    @classmethod
    def getOptionsBoxWithOverlaps(cls, prevChoices):
        if prevChoices.trackHistory or prevChoices.track:
            return [cls.NO_OVERLAPS, cls.WITH_OVERLAPS]

    #@staticmethod
    #def getInfoForOptionsBoxKey(prevChoices):
    #    '''
    #    If not None, defines the string content of an clickable info box beside
    #    the corresponding input box. HTML is allowed.
    #    '''
    #    return None

    #@staticmethod
    #def getDemoSelections():
    #    return ['testChoice1','..']

    @classmethod
    def getExtraHistElements(cls, choices):
        desc = cls.OUTPUT_GSUITE_DESCRIPTION
        return [HistElement(getGSuiteHistoryOutputName(
                                'nointersect', description=desc, datasetInfo=choices.gSuite),
                            GSuiteConstants.GSUITE_SUFFIX),
                HistElement(getGSuiteHistoryOutputName(
                                'primary', description=desc, datasetInfo=choices.gSuite),
                            GSuiteConstants.GSUITE_SUFFIX),
                HistElement(getGSuiteHistoryOutputName(
                                'nopreprocessed', description=desc, datasetInfo=choices.gSuite),
                            GSuiteConstants.GSUITE_SUFFIX),
                HistElement(getGSuiteHistoryOutputName(
                                'preprocessed', description=desc, datasetInfo=choices.gSuite),
                            GSuiteConstants.GSUITE_SUFFIX),
                HistElement(getGSuiteHistoryOutputName(
                                'storage', description=desc, datasetInfo=choices.gSuite),
                            GSuiteConstants.GSUITE_STORAGE_SUFFIX, hidden=True)]

    @classmethod
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import gold.gsuite.GSuiteComposer as GSuiteComposer
        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, HbGSuiteTrack
        from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource
        from gold.origdata.FileFormatComposer import getComposerClsFromFileSuffix
        from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from quick.application.GalaxyInterface import GalaxyInterface
        from quick.application.UserBinSource import UserBinSource
        from quick.extra.TrackExtractor import TrackExtractor

        genome = choices.genome
        gSuite = getGSuiteFromGalaxyTN(choices.gSuite)

        if choices.withOverlaps == cls.NO_OVERLAPS:
            if choices.trackSource == cls.FROM_HISTORY_TEXT:
                filterTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.trackHistory)
            else:
                filterTrackName = choices.track.split(':')
        else:
            if choices.trackSource == cls.FROM_HISTORY_TEXT:
                regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.trackHistory)
                binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices.trackHistory)
            else:
                regSpec = 'track'
                binSpec = choices.track

            userBinSource = UserBinSource(regSpec, binSpec, genome)

        desc = cls.OUTPUT_GSUITE_DESCRIPTION
        emptyFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('nointersect', description=desc, datasetInfo=choices.gSuite)]
        primaryFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('primary', description=desc, datasetInfo=choices.gSuite)]
        errorFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('nopreprocessed', description=desc, datasetInfo=choices.gSuite)]
        preprocessedFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('preprocessed', description=desc, datasetInfo=choices.gSuite)]
        hiddenStorageFn = cls.extraGalaxyFn \
            [getGSuiteHistoryOutputName('storage', description=desc, datasetInfo=choices.gSuite)]

        analysisDef = '-> TrackIntersectionStat'
#         analysisDef = '-> TrackIntersectionWithValStat'

        numTracks = gSuite.numTracks()
        progressViewer = ProgressViewer([(cls.PROGRESS_INTERSECT_MSG, numTracks),
                                         (cls.PROGRESS_PREPROCESS_MSG, numTracks)], galaxyFn)
        emptyGSuite = GSuite()
        primaryGSuite = GSuite()

        for track in gSuite.allTracks():
            newSuffix = cls.OUTPUT_TRACKS_SUFFIX
            extraFileName = os.path.sep.join(track.trackName)
            extraFileName = changeSuffixIfPresent(extraFileName, newSuffix=newSuffix)
            title = getTitleWithSuffixReplaced(track.title, newSuffix)

            primaryTrackUri = GalaxyGSuiteTrack.generateURI(
                galaxyFn=hiddenStorageFn, extraFileName=extraFileName,
                suffix=newSuffix if not extraFileName.endswith(newSuffix) else '')
            primaryTrack = GSuiteTrack(primaryTrackUri, title=title,
                                       genome=track.genome, attributes=track.attributes)

            if choices.withOverlaps == cls.NO_OVERLAPS:
                res = GalaxyInterface.runManual([track.trackName, filterTrackName], analysisDef, '*', '*',
                                                 genome=genome, galaxyFn=galaxyFn, username=username)

                trackViewList = [res[key]['Result'] for key in sorted(res.keys())]

                tvGeSource = TrackViewListGenomeElementSource(genome, trackViewList)

                composerCls = getComposerClsFromFileSuffix(cls.OUTPUT_TRACKS_SUFFIX)
                composerCls(tvGeSource).composeToFile(primaryTrack.path)
            else:
                TrackExtractor.extractOneTrackManyRegsToOneFile( \
                    track.trackName, userBinSource, primaryTrack.path, fileFormatName=cls.OUTPUT_TRACKS_SUFFIX, \
                    globalCoords=True, asOriginal=False, allowOverlaps=True)

            # Temporary hack until better solution for empty result tracks have been implemented

            from gold.origdata.GenomeElementSource import GenomeElementSource
            geSource = GenomeElementSource(primaryTrack.path, genome=genome, suffix=cls.OUTPUT_TRACKS_SUFFIX)

            try:
                geSource.parseFirstDataLine()
                primaryGSuite.addTrack(primaryTrack)
            except Exception, e: # Most likely empty file
                primaryTrack.comment = e.message
                emptyGSuite.addTrack(primaryTrack)
                numTracks -= 1
                progressViewer.updateProgressObjectElementCount(
                    cls.PROGRESS_PREPROCESS_MSG, numTracks)
            #

            progressViewer.update()

        gSuitePreprocessor = GSuitePreprocessor()
        preprocessedGSuite, errorGSuite = gSuitePreprocessor.\
            visitAllGSuiteTracksAndReturnOutputAndErrorGSuites \
                (primaryGSuite, progressViewer)

        GSuiteComposer.composeToFile(emptyGSuite, emptyFn)
        GSuiteComposer.composeToFile(primaryGSuite, primaryFn)
        GSuiteComposer.composeToFile(preprocessedGSuite, preprocessedFn)
        GSuiteComposer.composeToFile(errorGSuite, errorFn)
        writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
Exemple #30
0
    def printGSuite(cls, choices, cols, rows, colListString, outFile):
        #print cols
        from quick.extra.ProgressViewer import ProgressViewer

        from gold.gsuite.GSuite import GSuite
        from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack
        import gold.gsuite.GSuiteComposer as GSuiteComposer

        from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource
        from gold.origdata.GtrackComposer import ExtendedGtrackComposer
        from gold.origdata.GESourceWrapper import ListGESourceWrapper
        from gold.origdata.GenomeElement import GenomeElement

        from collections import defaultdict
        from copy import copy
        from urllib import quote

        from unidecode import unidecode
        from pyliftover import LiftOver

        gSuite = GSuite()

        diseaseColIndex = cols.index(cls.DISEASE_COLUMN_NAME)
        chrColIndex = cols.index(cls.CHR_COLUMN_NAME)
        startColIndex = cols.index(cls.START_COLUMN_NAME)
        valColIndex = cols.index(cls.VAL_COLUMN_NAME)
        
        orderedExtraKeys = copy(cols)
        extraIndexes = range(len(cols))
        for colName in [cls.DISEASE_COLUMN_NAME, cls.CHR_COLUMN_NAME,
                        cls.START_COLUMN_NAME, cls.VAL_COLUMN_NAME]:
            extraIndexes.remove(cols.index(colName))
            orderedExtraKeys.remove(colName)
        orderedExtraKeys = [cls._fixColNameForGTrack(key) for key in orderedExtraKeys]

        diseaseToRowsDict = defaultdict(list)
        for row in rows:
            disease = row[diseaseColIndex]
            if isinstance(disease, unicode):
                disease = unidecode(disease).replace('\x00', '')

            diseaseToRowsDict[disease].append(row)

        progressViewer = ProgressViewer([('Create GWAS tracks for diseases/traits', len(diseaseToRowsDict))],
                                        cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE] )

        for disease in sorted(diseaseToRowsDict.keys()):
            uri = GalaxyGSuiteTrack.generateURI(galaxyFn=cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE],
                                                extraFileName=disease.replace('/', '_') + '.gtrack')
            gSuiteTrack = GSuiteTrack(uri, title=disease, genome=cls.OUTPUT_GENOME)
            gSuite.addTrack(gSuiteTrack)

            shouldLiftOver = cls.DATABASE_GENOME != cls.OUTPUT_GENOME
            if shouldLiftOver:
                liftOver = LiftOver(cls.DATABASE_GENOME, cls.OUTPUT_GENOME)

            geList = []
            for row in diseaseToRowsDict[disease]:
                extra = {}
                for col, index in zip(orderedExtraKeys, extraIndexes):
                    cell = row[index].strip()
                    if isinstance(cell, unicode):
                        cell = unidecode(cell)

                    extra[col] = cell if cell != '' else '.'

                chrom = 'chr' + row[chrColIndex]
                if chrom == 'chr23':
                    chrom = 'chrX'
                if chrom == 'chr24':
                    chrom = 'chrY'
                if chrom == 'chrMT':
                    chrom = 'chrM'

                start = int(row[startColIndex])
                if shouldLiftOver:
                    newPosList = liftOver.convert_coordinate(chrom, start)
                    if newPosList is None or len(newPosList) != 1:
                        print 'SNP with position %s on chromosome %s ' % (chrom, start) +\
                              'could not be lifted over from reference genome ' +\
                              '%s to %s (for disease/trait "%s")' % \
                              (cls.DATABASE_GENOME, cls.OUTPUT_GENOME, disease)
                    else:
                        chrom, start = newPosList[0][0:2]
                #print extra
                geList.append(GenomeElement(chr=chrom, start=start,
                                            val=row[valColIndex], orderedExtraKeys=orderedExtraKeys,
                                            extra=extra))

            geSource = GtrackGenomeElementSource(cls.GTRACK_BLUEPRINT_PATH)
            wrappedGeSource = ListGESourceWrapper(geSource, geList)
            composer = ExtendedGtrackComposer(wrappedGeSource)
            composer.composeToFile(gSuiteTrack.path)

            progressViewer.update()

        GSuiteComposer.composeToFile(gSuite, outFile)