def execute(cls, choices, galaxyFn=None, username=''): from quick.webtools.clustering.GSuitePrimaryTrackModifier import GSuitePrimaryTrackModifier # Set analysis environment gSuite = getGSuiteFromGalaxyTN(choices.gSuite) outGSuite = GSuite() errorGSuite = GSuite() progressViewer = ProgressViewer([('Manipulate tracks', gSuite.numTracks() + 24)], cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE]) hiddenStorageFn = cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE] # Get rsID map for the chosen reference genome rsidMap = RsidMapper.createRsidMappingFromStaticFiles(progressViewer, choices.refGenome) # Lift over all tracks for track in gSuite.allTracks(): fileName = cls.getFilenameWithGTrackSuffix(track.path) title = getTitleWithSuffixReplaced(track.title, 'gtrack') try: uri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=fileName, suffix='gtrack' ) gSuiteTrack = GSuiteTrack( uri, title=title, genome=track.genome, trackType=track.trackType, attributes=track.attributes ) trackFn = gSuiteTrack.path GSuitePrimaryTrackModifier.liftOverGTrack(track.path, trackFn, rsidMap) outGSuite.addTrack(gSuiteTrack) except Exception as e: track.comment = 'An error occurred for the following track: ' + str(e) errorGSuite.addTrack(track) progressViewer.update() # Update reference genome of all tracks and write to file outGSuite.setGenomeOfAllTracks(choices.refGenome) composeToFile(outGSuite, galaxyFn) composeToFile(errorGSuite, cls.extraGalaxyFn[cls.HISTORY_ERROR_TITLE]) writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
def execute(cls, choices, galaxyFn=None, username=''): from quick.webtools.clustering.CreateLDTrack import CreateLDTrack from quick.webtools.clustering.RsidMapper import RsidMapper gSuite = getGSuiteFromGalaxyTN(choices.gSuite) outGSuite = GSuite() errorGSuite = GSuite() progressViewer = ProgressViewer([('Manipulate tracks', gSuite.numTracks() + 24)], cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE]) ldDict = CreateLDTrack.getLDDict(float(choices.rsquare)) rsidDict = RsidMapper.createRsidMappingFromStaticFiles(progressViewer, choices.refGenome) hiddenStorageFn = cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE] for track in gSuite.allTracks(): fileName = cls.getFilenameWithGTrackSuffix(track.path) title = getTitleWithSuffixReplaced(track.title, 'gtrack') try: uri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=fileName, suffix='gtrack' ) gSuiteTrack = GSuiteTrack( uri, title=title, genome=track.genome, trackType='points', attributes=track.attributes ) trackFn = gSuiteTrack.path CreateLDTrack.parseFileIntoPointTrack(track.path, trackFn, ldDict, rsidDict) outGSuite.addTrack(gSuiteTrack) except Exception as e: track.comment = 'An error occurred for the following track: ' + str(e) errorGSuite.addTrack(track) progressViewer.update() outGSuite.setGenomeOfAllTracks(choices.refGenome) composeToFile(outGSuite, galaxyFn) composeToFile(errorGSuite, cls.extraGalaxyFn[cls.HISTORY_ERROR_TITLE]) writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
def getSubtracksAsGSuite(genome, parentTrack, username=''): from gold.description.TrackInfo import TrackInfo from quick.application.GalaxyInterface import GalaxyInterface from quick.application.ProcTrackNameSource import ProcTrackNameSource fullAccess = GalaxyInterface.userHasFullAccess(username) procTrackNameSource = ProcTrackNameSource(genome, fullAccess=fullAccess, includeParentTrack=False) gSuite = GSuite() for trackName in procTrackNameSource.yielder(parentTrack): trackType = TrackInfo(genome, trackName).trackFormatName.lower() trackType = cleanUpTrackType(trackType) uri = HbGSuiteTrack.generateURI(trackName=trackName) title = prettyPrintTrackName(trackName) if title.startswith("'") and title.endswith("'") and len(title) > 1: title = title[1:-1] gSuite.addTrack( GSuiteTrack(uri, title=title, trackType=trackType, genome=genome)) return gSuite
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' categoryToGenomeElementListDict = defaultdict(list) genome = choices.genome outputType = choices.outputType catTrack = choices.catTrack.split(':') geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN( catTrack, genome) for ge in geSource: categoryToGenomeElementListDict[ge.val].append(ge) for category, genomeElementList in categoryToGenomeElementListDict.iteritems( ): geSourceWrapper = ListGESourceWrapper(geSource, genomeElementList) composer = cls.getComposer(geSourceWrapper, outputType) # staticFile = GalaxyRunSpecificFile(catTrack + [category, outputType], galaxyFn) composer.composeToFile(cls.extraGalaxyFn[category]) outGSuite = GSuite() for category, galaxyFileName in OrderedDict([ (x, cls.extraGalaxyFn[x]) for x in categoryToGenomeElementListDict.keys() ]).iteritems(): uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFileName, suffix=outputType) outGSuite.addTrack(GSuiteTrack(uri, title=category, genome=genome)) GSuiteComposer.composeToFile( outGSuite, cls.extraGalaxyFn['GSuite from categorical']) print 'Execution done!'
def execute(choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from quick.application.ExternalTrackManager import ExternalTrackManager genome = choices.genome if choices.selectGenome == 'Yes' else None selectedHistories = [ key for key, val in choices.history.iteritems() if val is not None ] gSuite = GSuite() for histGalaxyId in selectedHistories: galaxyTrackName = choices.history[histGalaxyId].split(':') histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN( galaxyTrackName) histName = ExternalTrackManager.extractNameFromHistoryTN( galaxyTrackName) histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN( galaxyTrackName) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn, suffix=histSuffix) gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome)) GSuiteComposer.composeToFile(gSuite, galaxyFn)
def testComposeLocalUrlGenomeAttributes(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('galaxy:/12345abc', genome='hg18', attributes=OrderedDict([('one', 'yes')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', genome='hg19', attributes=OrderedDict([('two', 'no')]))) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: local\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: multiple\n' \ '###uri\ttitle\tgenome\tone\ttwo\n' \ 'galaxy:/12345abc\t12345abc\thg18\tyes\t.\n' \ 'file:/path/to/file2\tfile2\thg19\t.\tno\n' self.assertEquals(targetOutput, output)
def execute(cls, choices, galaxyFn=None, username=''): from quick.util.CommonFunctions import getFileSuffix import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import registerGSuiteTrackClass, GSuiteTrack, FileGSuiteTrack registerGSuiteTrackClass(FileGSuiteTrack) gSuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() for track in gSuite.allTracks(): path = track.path suffix = track.suffix if track.suffix != getFileSuffix(path) else '' uri = FileGSuiteTrack.generateURI(path=path, suffix=suffix) newTrack = GSuiteTrack(uri, title=track.title, trackType=track.trackType, genome=track.genome, attributes=track.attributes) outGSuite.addTrack(newTrack) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): inGSuite = getGSuiteFromGalaxyTN(choices.gsuite) if choices.genome != inGSuite.genome: inGSuite.setGenomeOfAllTracks(choices.genome) registerGSuiteTrackClass(FileGSuiteTrack) outGSuite = GSuite() for track in inGSuite.allTracks(): origGalaxyTN = ETM.createGalaxyTnFromExternalTn(track.trackName) uri = FileGSuiteTrack.generateURI( path=ETM.extractFnFromGalaxyTN(origGalaxyTN), suffix=ETM.extractFileSuffixFromGalaxyTN(origGalaxyTN)) newTrack = GSuiteTrack(uri, title=track.title, trackType=track.trackType, genome=track.genome, attributes=track.attributes) outGSuite.addTrack(newTrack) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def testAddGSuiteTracks(self): gSuite = GSuite() gSuite.setGenomeOfAllTracks('hg18') self.assertEqual('unknown', gSuite.genome) gSuite.addTrack(GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', title='Track1', \ attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack(GSuiteTrack('http://server.other.com/path/to/file2.bed', title='Track2', \ attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyc')]))) gSuite.addTrack( GSuiteTrack('https://server.other.com/path/to/file3.bed', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack( 'rsync://server.other.com/other/path/to/file3.bed;bed9', attributes=OrderedDict([('cell', 'NHFL')]))) gSuite.setGenomeOfAllTracks('hg18') self.assertEqual('remote', gSuite.location) self.assertEqual('primary', gSuite.fileFormat) self.assertEqual('unknown', gSuite.trackType) self.assertEqual('hg18', gSuite.genome) self.assertEqual(['cell', 'antibody'], gSuite.attributes) self.assertEqual(False, gSuite.isPreprocessed()) self.assertEqual(True, gSuite.hasCustomTitles()) self.assertEqual(4, gSuite.numTracks()) self.assertEqual(['hg18'] * 4, [x.genome for x in gSuite.allTracks()]) self.assertEqual(['Track1', 'Track2', 'file3.bed', 'file3.bed (2)'], gSuite.allTrackTitles()) self.assertEqual(['unknown'], gSuite.allTrackTypes()) gSuite.addTrack( GSuiteTrack('hb:/track/name/hierarchy', title='Track1', genome='hg19', trackType='segments')) self.assertEqual('multiple', gSuite.genome) gSuite.addTrack(GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name', title='Track2', \ attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack(GSuiteTrack('file:/path/to/file.btrack?track=track:name', title='Track2', \ attributes=OrderedDict([('antibody', 'cMyb'), ('extra', 'yes')]))) self.assertEqual('multiple', gSuite.location) self.assertEqual('multiple', gSuite.fileFormat) self.assertEqual('unknown', gSuite.trackType) self.assertEqual('unknown', gSuite.genome) self.assertEqual(['cell', 'antibody', 'extra'], gSuite.attributes) self.assertEqual(False, gSuite.isPreprocessed()) self.assertEqual(True, gSuite.hasCustomTitles()) self.assertEqual(7, gSuite.numTracks()) self.assertEqual(['hg18'] * 4 + ['hg19'] + ['unknown'] * 2, [x.genome for x in gSuite.allTracks()]) self.assertEqual(['Track1', 'Track2', 'file3.bed', 'file3.bed (2)', \ 'Track1 (2)', 'Track2 (2)', 'Track2 (3)'], gSuite.allTrackTitles()) self.assertEqual(['segments', 'unknown'], gSuite.allTrackTypes()) self.assertRaises( InvalidFormatError, gSuite.addTrack, GSuiteTrack('https://server.third.com/path/to/file3.bed'), allowDuplicateTitles=False)
def testFullCompose(self): gSuite = GSuite() gSuite.addTrack(GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', title='Track', \ attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack(GSuiteTrack('http://server.other.com/path/to/file2.bed', title='Track2', \ attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyc')]))) gSuite.addTrack( GSuiteTrack('https://server.other.com/path/to/file3.bed', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyb')]))) gSuite.addTrack(GSuiteTrack('rsync://server.other.com/path/to/file4;wig', title='Track4', \ attributes=OrderedDict([('cell', 'NHFL')]))) gSuite.addTrack(GSuiteTrack('hb:/track/name/hierarchy', title='Track')) gSuite.addTrack(GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name', title='Track', \ attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack(GSuiteTrack('file:/path/to/file.btrack?track=track:name', title='Track name7', \ attributes=OrderedDict([('antibody', 'cMyb'), ('extra', 'yes')]))) gSuite.setGenomeOfAllTracks('hg18') output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: multiple\n' \ '##file format: multiple\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tTrack\tprimary\tk562\tcMyb\t.\n' \ 'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \ 'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \ 'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \ 'hb:/track/name/hierarchy\tTrack (2)\tpreprocessed\t.\t.\t.\n' \ 'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack (3)\tpreprocessed\tk562\tcMyb\t.\n' \ 'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n' self.assertEquals(targetOutput, output)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack from gold.gsuite.GSuiteComposer import composeToFile from gold.gsuite.GSuiteFunctions import getTitleWithSuffixReplaced from quick.gsuite.GSuiteHbIntegration import \ writeGSuiteHiddenTrackStorageHtml from quick.extra.ProgressViewer import ProgressViewer from quick.util.CommonFunctions import ensurePathExists gSuite = getGSuiteFromGalaxyTN(choices.history) outGSuite = GSuite() errorGSuite = GSuite() progressViewer = ProgressViewer( [('Manipulate tracks', gSuite.numTracks())], galaxyFn) hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'storage', cls.OUTPUT_DESCRIPTION, choices.history)] for track in gSuite.allTracks(): newSuffix = cls._getSuffix(choices, track) fileName = os.path.basename(track.path) fileName = changeSuffixIfPresent(fileName, oldSuffix=track.suffix, newSuffix=newSuffix) title = getTitleWithSuffixReplaced(track.title, newSuffix) try: if fileName.endswith('.' + newSuffix): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=fileName) else: uri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=fileName, suffix=newSuffix) gSuiteTrack = GSuiteTrack(uri, title=title, genome=track.genome, attributes=track.attributes) trackFn = gSuiteTrack.path ensurePathExists(trackFn) cls._runOperation(choices, track.path, trackFn) outGSuite.addTrack(gSuiteTrack) except Exception as e: track.comment = 'An error occurred for the following track: ' + str( e).partition('\n')[0] errorGSuite.addTrack(track) progressViewer.update() primaryFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'primary', cls.OUTPUT_DESCRIPTION, choices.history)] composeToFile(outGSuite, primaryFn) errorFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'nomanipulate', datasetInfo=choices.history)] composeToFile(errorGSuite, errorFn) writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
def printGSuite(cls, choices, cols, rows, colListString, outFile): #print cols from quick.extra.ProgressViewer import ProgressViewer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource from gold.origdata.GtrackComposer import ExtendedGtrackComposer from gold.origdata.GESourceWrapper import ListGESourceWrapper from gold.origdata.GenomeElement import GenomeElement from collections import defaultdict from copy import copy from urllib import quote from unidecode import unidecode from pyliftover import LiftOver gSuite = GSuite() diseaseColIndex = cols.index(cls.DISEASE_COLUMN_NAME) chrColIndex = cols.index(cls.CHR_COLUMN_NAME) startColIndex = cols.index(cls.START_COLUMN_NAME) valColIndex = cols.index(cls.VAL_COLUMN_NAME) orderedExtraKeys = copy(cols) extraIndexes = range(len(cols)) for colName in [cls.DISEASE_COLUMN_NAME, cls.CHR_COLUMN_NAME, cls.START_COLUMN_NAME, cls.VAL_COLUMN_NAME]: extraIndexes.remove(cols.index(colName)) orderedExtraKeys.remove(colName) orderedExtraKeys = [cls._fixColNameForGTrack(key) for key in orderedExtraKeys] diseaseToRowsDict = defaultdict(list) for row in rows: disease = row[diseaseColIndex] if isinstance(disease, unicode): disease = unidecode(disease).replace('\x00', '') diseaseToRowsDict[disease].append(row) progressViewer = ProgressViewer([('Create GWAS tracks for diseases/traits', len(diseaseToRowsDict))], cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE] ) for disease in sorted(diseaseToRowsDict.keys()): uri = GalaxyGSuiteTrack.generateURI(galaxyFn=cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE], extraFileName=disease.replace('/', '_') + '.gtrack') gSuiteTrack = GSuiteTrack(uri, title=disease, genome=cls.OUTPUT_GENOME) gSuite.addTrack(gSuiteTrack) shouldLiftOver = cls.DATABASE_GENOME != cls.OUTPUT_GENOME if shouldLiftOver: liftOver = LiftOver(cls.DATABASE_GENOME, cls.OUTPUT_GENOME) geList = [] for row in diseaseToRowsDict[disease]: extra = {} for col, index in zip(orderedExtraKeys, extraIndexes): cell = row[index].strip() if isinstance(cell, unicode): cell = unidecode(cell) extra[col] = cell if cell != '' else '.' chrom = 'chr' + row[chrColIndex] if chrom == 'chr23': chrom = 'chrX' if chrom == 'chr24': chrom = 'chrY' if chrom == 'chrMT': chrom = 'chrM' start = int(row[startColIndex]) if shouldLiftOver: newPosList = liftOver.convert_coordinate(chrom, start) if newPosList is None or len(newPosList) != 1: print 'SNP with position %s on chromosome %s ' % (chrom, start) +\ 'could not be lifted over from reference genome ' +\ '%s to %s (for disease/trait "%s")' % \ (cls.DATABASE_GENOME, cls.OUTPUT_GENOME, disease) else: chrom, start = newPosList[0][0:2] #print extra geList.append(GenomeElement(chr=chrom, start=start, val=row[valColIndex], orderedExtraKeys=orderedExtraKeys, extra=extra)) geSource = GtrackGenomeElementSource(cls.GTRACK_BLUEPRINT_PATH) wrappedGeSource = ListGESourceWrapper(geSource, geList) composer = ExtendedGtrackComposer(wrappedGeSource) composer.composeToFile(gSuiteTrack.path) progressViewer.update() GSuiteComposer.composeToFile(gSuite, outFile)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, HbGSuiteTrack from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource from gold.origdata.FileFormatComposer import getComposerClsFromFileSuffix from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN from quick.application.ExternalTrackManager import ExternalTrackManager from quick.application.GalaxyInterface import GalaxyInterface from quick.application.UserBinSource import UserBinSource from quick.extra.TrackExtractor import TrackExtractor genome = choices.genome gSuite = getGSuiteFromGalaxyTN(choices.gSuite) if choices.withOverlaps == cls.NO_OVERLAPS: if choices.trackSource == cls.FROM_HISTORY_TEXT: filterTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.trackHistory) else: filterTrackName = choices.track.split(':') else: if choices.trackSource == cls.FROM_HISTORY_TEXT: regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.trackHistory) binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices.trackHistory) else: regSpec = 'track' binSpec = choices.track userBinSource = UserBinSource(regSpec, binSpec, genome) desc = cls.OUTPUT_GSUITE_DESCRIPTION emptyFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('nointersect', description=desc, datasetInfo=choices.gSuite)] primaryFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('primary', description=desc, datasetInfo=choices.gSuite)] errorFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('nopreprocessed', description=desc, datasetInfo=choices.gSuite)] preprocessedFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('preprocessed', description=desc, datasetInfo=choices.gSuite)] hiddenStorageFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('storage', description=desc, datasetInfo=choices.gSuite)] analysisDef = '-> TrackIntersectionStat' # analysisDef = '-> TrackIntersectionWithValStat' numTracks = gSuite.numTracks() progressViewer = ProgressViewer([(cls.PROGRESS_INTERSECT_MSG, numTracks), (cls.PROGRESS_PREPROCESS_MSG, numTracks)], galaxyFn) emptyGSuite = GSuite() primaryGSuite = GSuite() for track in gSuite.allTracks(): newSuffix = cls.OUTPUT_TRACKS_SUFFIX extraFileName = os.path.sep.join(track.trackName) extraFileName = changeSuffixIfPresent(extraFileName, newSuffix=newSuffix) title = getTitleWithSuffixReplaced(track.title, newSuffix) primaryTrackUri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=extraFileName, suffix=newSuffix if not extraFileName.endswith(newSuffix) else '') primaryTrack = GSuiteTrack(primaryTrackUri, title=title, genome=track.genome, attributes=track.attributes) if choices.withOverlaps == cls.NO_OVERLAPS: res = GalaxyInterface.runManual([track.trackName, filterTrackName], analysisDef, '*', '*', genome=genome, galaxyFn=galaxyFn, username=username) trackViewList = [res[key]['Result'] for key in sorted(res.keys())] tvGeSource = TrackViewListGenomeElementSource(genome, trackViewList) composerCls = getComposerClsFromFileSuffix(cls.OUTPUT_TRACKS_SUFFIX) composerCls(tvGeSource).composeToFile(primaryTrack.path) else: TrackExtractor.extractOneTrackManyRegsToOneFile( \ track.trackName, userBinSource, primaryTrack.path, fileFormatName=cls.OUTPUT_TRACKS_SUFFIX, \ globalCoords=True, asOriginal=False, allowOverlaps=True) # Temporary hack until better solution for empty result tracks have been implemented from gold.origdata.GenomeElementSource import GenomeElementSource geSource = GenomeElementSource(primaryTrack.path, genome=genome, suffix=cls.OUTPUT_TRACKS_SUFFIX) try: geSource.parseFirstDataLine() primaryGSuite.addTrack(primaryTrack) except Exception, e: # Most likely empty file primaryTrack.comment = e.message emptyGSuite.addTrack(primaryTrack) numTracks -= 1 progressViewer.updateProgressObjectElementCount( cls.PROGRESS_PREPROCESS_MSG, numTracks) # progressViewer.update()
def execute(cls, choices, galaxyFn=None, username=''): import os gsuite = getGSuiteFromGalaxyTN(choices.gsuite) # match = int(choices.match) # mismatch = int(choices.mismatch) # delta = int(choices.delta) # pm = int(choices.pm) # pi = int(choices.pi) # minscore = int(choices.minscore) # maxperiod = int(choices.maxperiod) minConsensusLength = int(choices.minconsensus) if choices.minconsensus.isdigit() else None maxConsensusLength = int(choices.maxconsensus) if choices.maxconsensus.isdigit() else None minCopyNumber = int(choices.mincopynumber) if choices.mincopynumber.isdigit() else None parameters = [choices.match, choices.mismatch, choices.delta, choices.pm, choices.pi, choices.minscore, choices.maxperiod] resultsDict = OrderedDict() for gsTrack in gsuite.allTracks(): resFile = GalaxyRunSpecificFile(['trf', gsTrack.title, gsTrack.title + '.tmp'], galaxyFn) ensurePathExists(resFile.getDiskPath()) trackDirName = os.path.dirname(os.path.realpath(resFile.getDiskPath())) # parameters = ["2", "5", "7", "80", "10", "50", "300"] #Madeleine suggestion instruction = [cls.TRF_PATH, gsTrack.path] + parameters + ["-d", "-h"] pipe = subprocess.Popen(instruction, cwd=trackDirName, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) results, errors = pipe.communicate() outFileName = ".".join([os.path.basename(gsTrack.path)] + parameters + ["dat"]) outFilePath = os.path.join(trackDirName, outFileName) # print outFilePath resultList = cls.parseTRFResultFile(outFilePath, minConsensusLength, maxConsensusLength, minCopyNumber) if resultList: resultsDict[gsTrack.title] = resultList if choices.regionsGSuite: repeatRegionsBedTracksGSuite = GSuite() for trackName, trfResultList in resultsDict.iteritems(): trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=("Repeat_regions_"+trackName), suffix='bed') gsTrack = GSuiteTrack(trackUri, title=("Repeat regions " + trackName), genome=gsuite.genome) ensurePathExists(gsTrack.path) with open(gsTrack.path, 'w') as bedFile: header = 'track name="' + trackName + '" description="' + trackName + '" priority=1' bedFile.write(header + os.linesep) for trfResult in trfResultList: for repeatRegion in trfResult._repeatRegionList: # if not repeatRegion.strand: # DebugUtil.insertBreakPoint() endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition bedFile.write('\t'.join([repeatRegion.chromosome, str(repeatRegion.startPosition), str(endPosition), repeatRegion.bedName, '0', str(repeatRegion.strand)]) + os.linesep) repeatRegionsBedTracksGSuite.addTrack(gsTrack) GSuiteComposer.composeToFile(repeatRegionsBedTracksGSuite, cls.extraGalaxyFn['Repeat regions (bed) GSuite']) if choices.monomersGSuite: monomersBedTracksGSuite = GSuite() for trackName, trfResultList in resultsDict.iteritems(): trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=("Repeat_monomers_"+trackName), suffix='bed') gsTrack = GSuiteTrack(trackUri, title=("Repeat monomers " + trackName), genome=gsuite.genome) ensurePathExists(gsTrack.path) with open(gsTrack.path, 'w') as bedFile: header = 'track name="' + trackName + '" description="' + trackName + '" priority=1' bedFile.write(header + os.linesep) for trfResult in trfResultList: for repeatRegion in trfResult._repeatRegionList: # if not repeatRegion.strand: # DebugUtil.insertBreakPoint() for repeatMonomer in repeatRegion._monomers: # endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition bedFile.write('\t'.join([repeatRegion.chromosome, str(repeatMonomer.startPosition), str(repeatMonomer.endPosition), repeatMonomer.bedName, '0', str(repeatRegion.strand)]) + os.linesep) monomersBedTracksGSuite.addTrack(gsTrack) GSuiteComposer.composeToFile(monomersBedTracksGSuite, cls.extraGalaxyFn['Repeat monomers (bed) GSuite']) ################### analysisParamsTableColumnTitles = ['Parameter', 'Selected value'] analysisParamsDict = OrderedDict() analysisParamsDict['Tandem Repeat Finder tool version'] = cls.TRF_VERSION analysisParamsDict.update( OrderedDict([ ('Match', choices.match), ('Mismatch', choices.mismatch), ('Delta', choices.delta), ('Matching probability (Pm)', choices.pm), ('Indel probability (Pi)', choices.pi), ('Min score', choices.minscore), ('Max period', choices.maxperiod), ('Min consensus length', choices.minconsensus), ('Max consensus length', choices.maxconsensus), ('Min copy number', choices.mincopynumber)] ) ) ################### ################### countTableColumnTitles = ['Name', 'Nr of repeat regions', 'Avg copy number', 'Min copy number', 'Max copy number', 'Avg consensus length', 'Min consensus length', 'Max consensus length'] countTableDict = OrderedDict() from numpy import mean for trackName, trfResultList in resultsDict.iteritems(): countTableDict[trackName] = [] repeatRegionsNr = sum([x.repeatRegionsCount for x in trfResultList]) countTableDict[trackName].append(repeatRegionsNr) copyNumberList =[] for trfRes in trfResultList: copyNumberList += trfRes.copyNumberList if \ choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else trfRes.realCopyNumberList countTableDict[trackName].append(mean(copyNumberList)) countTableDict[trackName].append(min(copyNumberList)) countTableDict[trackName].append(max(copyNumberList)) consensusLengthList = [] for trfRes in trfResultList: consensusLengthList += trfRes.consensusLengths countTableDict[trackName].append(mean(consensusLengthList)) countTableDict[trackName].append(min(consensusLengthList)) countTableDict[trackName].append(max(consensusLengthList)) ################### core = HtmlCore() core.begin() core.divBegin() # core.paragraph('''This tool reports repeat regions discovered by the TRF tool # ''') core.tableFromDictionary(analysisParamsDict, columnNames=analysisParamsTableColumnTitles, sortable=False) core.divEnd() core.divBegin() core.tableFromDictionary(countTableDict, columnNames=countTableColumnTitles, tableId='repeatCounts', sortable=True, presorted=0) core.divEnd() # core.divBegin() # for k, v in resultsDict.iteritems(): # core.line('track: ' + k) # # for val in v: # core.line(str(val)) # core.divEnd() core.end() print core
from collections import OrderedDict from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, FtpGSuiteTrack, HbGSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer import gold.gsuite.GSuiteParser as GSuiteParser gSuite = GSuite() uri1 = FtpGSuiteTrack.generateURI(netloc='server.com', path='file.bed') gSuite.addTrack( GSuiteTrack(uri1, title='Track1', attributes=OrderedDict([('a', 'yes'), ('b', 'no')]))) uri2 = HbGSuiteTrack.generateURI( trackName=['Genes and gene subsets', 'Genes', 'Refseq']) gSuite.addTrack( GSuiteTrack(uri2, attributes=OrderedDict([('b', 'no'), ('c', 'yes')]))) gSuite.setGenomeOfAllTracks('hg19') contents = GSuiteComposer.composeToString(gSuite) print 'GSuite file contents' print '--------------------' print contents gSuite2 = GSuiteParser.parseFromString(contents) print 'Various ways of direct access' print '-----------------------------'