# Set up 'ana' so she can do all the work. If anaId matches another, then it's log is extended ana = GalaxyAnalysis(settingsFile, anaId, genome, expType) if testOnly: ana.dryRun = testOnly # What step expects: # Inputs: 2 bam files, pre-registered in the analysis and both keyed as: 'bamRep' + replicateN + '.bam' # Outputs: 1 merged bam keyed as: 'mergedRep' + replicate1 + 'Rep' +replcicate2 + '.bam' # set up keys that join inputs through various file forwardings: bamAkey = 'bamRep' + repA + '.bam' bamBkey = 'bamRep' + repB + '.bam' mergedBamKey = 'mergedRep' + repA + 'Rep' + repB + '.bam' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamAkey, 'galaxyInput', galaxyInputBamA) ana.registerFile(bamBkey, 'galaxyInput', galaxyInputBamB) nonGalaxyInput1 = ana.nonGalaxyInput( bamAkey) # Registers and returns the outside location nonGalaxyInput2 = ana.nonGalaxyInput( bamBkey) # Need to register these to ensure nonGalOut naming # outputs: ana.registerFile(mergedBamKey, 'galaxyOutput', galaxyOutMergedBam) resultsDir = ana.resultsDir( galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(mergedBamKey,'nonGalaxyOutput','%s_%s_merged',ext='bam', \ input1=bamAkey, input2=bamBkey) # Establish step and run it: step = MergeBamStep(ana, repA, repB)
suffix = "Rep" + repNo # if tansformed/transformable bam is provided. if alignedBy != 'unknown': suffix = alignedBy.capitalize() + suffix # What step expects: # Inputs: 1 Annotation alignment bam keyed: 'annotation' + suffix + '.bam' # Outputs: 1 target Gene results tab file, keyed: 'quantifyGenesRsem' + suffix + '.tab' # 1 target Transcript results tab file, keyed: 'quantifyTranscriptsRsem' + suffix + '.tab' bamInputKey = 'annotation' + suffix + '.bam' # Used to tie inputs together genesFileKey = 'quantifyGenesRsem' + suffix + '.tab' # Used to tie outputs together transFileKey = 'quantifyTranscriptsRsem' + suffix + '.tab' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey, 'galaxyInput', galaxyBamInput) nonGalaxyInput = ana.nonGalaxyInput( bamInputKey) # Registers and returns the outside location # outputs: ana.registerFile(genesFileKey, 'galaxyOutput', galaxyOutGenes) resultsDir = ana.resultsDir( galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(genesFileKey, 'nonGalaxyOutput', '%s_rsemGenes', ext='tab') ana.registerFile(transFileKey, 'galaxyOutput', galaxyOutTrans) ana.createOutFile(transFileKey, 'nonGalaxyOutput', '%s_rsemTranscripts', ext='tab') # Establish step and run it: step = RsemStep(ana, suffix)
ana = GalaxyAnalysis(settingsFile, anaId, genome, expType) if testOnly: ana.dryRun = testOnly # What step expects: # Inputs: 1 bam, pre-registered in the analysis keyed as: 'alignmentRep' + replicate + '.bam' # Outputs: 1 interim Corr file, keyed as: 'strandCorr' + suffix + '.txt' # 1 target json file, keyed as: 'bamEvaluate' + suffix + '.json' # TODO: Does a galaxy user really want the sample bam? # set up keys that join inputs through various file forwardings: bamInputKey = 'alignmentRep' + repNo + '.bam' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey,'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(bamInputKey) # Registers and returns the outside location suffix = 'Rep' + repNo if nonGalaxyInput.lower().find('_star') != -1: suffix += 'ByStar' elif nonGalaxyInput.find('_tophat') != -1: suffix += 'ByTophat' elif nonGalaxyInput.lower().find('_bwa') != -1: suffix += 'ByBwa' bamSampleKey = 'alignment' + suffix + '_5M.bam' bamEvalKey = 'bamEvaluate' + suffix + '.json' # outputs: ana.registerFile( bamEvalKey, 'galaxyOutput',galaxyOutBamEval) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(bamEvalKey, 'nonGalaxyOutput','%s_bamEval',ext='json')
ana.setVar('tagLen', tagLength) # What step expects: # Inputs: 1 bam, pre-registered in analysis keyed as: 'alignment' + suffix + '.bam' # Outputs: target broadPeak hotspot file, keyed as: 'hot' + suffix + '.bigBed' # target narrowPeak peaks file, keyed as: 'peaks' + suffix + '.bigBed' # target density bigWig file, keyed as: 'density' + suffix + '.bigWig' # set up keys that join inputs through various file forwardings: bamInputKey = 'alignment' + suffix + '.bam' hotKey = 'hot' + suffix + '.bigBed' peakKey = 'peaks' + suffix + '.bigBed' densityKey = 'density' + suffix + '.bigWig' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey, 'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput( bamInputKey) # Registers and returns the outside location # outputs: ana.registerFile(hotKey, 'galaxyOutput', galaxyOutputHot) ana.registerFile(peakKey, 'galaxyOutput', galaxyOutputPeaks) ana.registerFile(densityKey, 'galaxyOutput', galaxyOutputDensity) resultsDir = ana.resultsDir( galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(hotKey, 'nonGalaxyOutput', ext='bigBed') ana.createOutFile(peakKey, 'nonGalaxyOutput', ext='bigBed') ana.createOutFile(densityKey, 'nonGalaxyOutput', ext='bigWig') # Establish step and run it: step = HotspotStep(ana, suffix, tagLength)
# and either 4 (paired) signal files: 'signalStarRep' + replicate + 'UniqMinus.bw' # 'signalStarRep' + replicate + 'UniqPlus.bw' # 'signalStarRep' + replicate + 'AllMinus.bw' # 'signalStarRep' + replicate + 'AllPlus.bw' # or 2 (unpaired) target signal file2: 'signalStarRep' + replicate + 'Uniq.bw' # 'signalStarRep' + replicate + 'All.bw' genoBamKey = 'genomeAlignedStarRep'+repNo + '.bam' # Used to tie outputs together annoBamKey = 'annotationAlignedStarRep'+repNo + '.bam' # Used to tie outputs together statsKey = 'statisticsStarRep'+repNo + '.txt' # Used to tie outputs together # Establish Inputs for galaxy and nonGalaxy alike if pairedOrUnpaired == "paired": fastqRd1Key='tagsRd1Rep'+repNo + '.fastq' # Used to tie inputs together fastqRd2Key='tagsRd2Rep'+repNo + '.fastq' # Used to tie inputs together ana.registerFile(fastqRd1Key,'galaxyInput', galaxyInputFile) ana.registerFile(fastqRd2Key,'galaxyInput', galaxyInputFile2) nonGalaxyInput = ana.nonGalaxyInput(fastqRd1Key) # Registers and returns the outside location nonGalaxyInput2 = ana.nonGalaxyInput(fastqRd2Key) # Registers and returns the outside location # outputs: ana.registerFile(genoBamKey,'galaxyOutput',galaxyGenoBamOutput) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(genoBamKey,'nonGalaxyOutput','%s_%s_starGenome', ext='bam', \ input1=fastqRd1Key, input2=fastqRd2Key) ana.registerFile(annoBamKey,'galaxyOutput',galaxyAnnoBamOutput) ana.createOutFile(annoBamKey,'nonGalaxyOutput','%s_%s_starAnnotation', ext='bam', \ input1=fastqRd1Key, input2=fastqRd2Key) ana.registerFile( statsKey, 'galaxyOutput',galaxyStatsOut) ana.createOutFile(statsKey,'nonGalaxyOutput','%s_%s_starStats', ext='txt', \ input1=fastqRd1Key, input2=fastqRd2Key) # signal bigWigs:
ana.readType = 'single' # What step expects: # Inputs: 1 bam, pre-registered in analysis keyed as: 'alignment' + suffix + '.bam' # 1 bam control (optional), pre-registered keyed as: 'control' + suffix + '.bam' # Outputs: target narrowPeak peaks file, keyed as: 'peaks' + suffix + '.bigBed' # target density bigWig file, keyed as: 'density' + suffix + '.bigWig' # set up keys that join inputs through various file forwardings: bamInputKey = 'alignment' + suffix + '.bam' controlInputKey = 'control' + suffix + '.bam' peakKey = 'peaks' + suffix + '.bigBed' densityKey = 'density' + suffix + '.bigWig' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey,'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(bamInputKey) # Registers and returns the outside location if expType.lower() == 'chipseq': ana.registerFile(controlInputKey,'galaxyInput', galaxyInputFile) ana.nonGalaxyInput(controlInputKey) # outputs: ana.registerFile( peakKey, 'galaxyOutput',galaxyOutputPeaks ) ana.registerFile( densityKey, 'galaxyOutput',galaxyOutputDensity) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(peakKey, 'nonGalaxyOutput',ext='bigBed') ana.createOutFile(densityKey, 'nonGalaxyOutput',ext='bigWig') # Establish step and run it: step = MacsStep(ana,suffix,expType,isPaired) sys.exit( step.run() )
ana.readType = pairedOrUnpaired # What step expects: # Inputs: 1 or 2 fastq files, pre-registered in the analysis keyed as: # Single: 'tagsRep'+replicate+'.fastq' # Paired: 'tagsRd1Rep'+replicate+'.fastq' and 'tagsRd2Rep'+replicate+'.fastq' # Outputs: a single bam target keyed as: # 'alignmentRep'+replicate+'.bam' bamFileKey = 'alignmentRep'+repNo + '.bam' # Used to tie outputs togther # Establish Inputs for galaxy and nonGalaxy alike if pairedOrUnpaired == "paired": fastqRd1Key='tagsRd1Rep'+repNo + '.fastq' # Used to tie inputs togther fastqRd2Key='tagsRd2Rep'+repNo + '.fastq' # Used to tie inputs togther ana.registerFile(fastqRd1Key,'galaxyInput', galaxyInputFile) ana.registerFile(fastqRd2Key,'galaxyInput', galaxyInputFile2) nonGalaxyInput = ana.nonGalaxyInput(fastqRd1Key) # Registers and returns the outside location nonGalaxyInput2 = ana.nonGalaxyInput(fastqRd2Key) # Registers and returns the outside location # outputs: ana.registerFile(bamFileKey,'galaxyOutput',galaxyOutputFile) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(bamFileKey,'nonGalaxyOutput','%s_%s', ext='bam', \ input1=fastqRd1Key, input2=fastqRd2Key) else: fastqKey='tagsRep'+repNo + '.fastq' # Used to tie inputs togther ana.registerFile(fastqKey,'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(fastqKey) # Registers and returns the outside location # outputs: ana.registerFile(bamFileKey,'galaxyOutput',galaxyOutputFile) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc
# Set up 'ana' so she can do all the work. If anaId matches another, then it's log is extended ana = GalaxyAnalysis(settingsFile, anaId, genome, expType) if testOnly: ana.dryRun = testOnly # What step expects: # Inputs: 2 bam files, pre-registered in the analysis and both keyed as: 'bamRep' + replicateN + '.bam' # Outputs: 1 merged bam keyed as: 'mergedRep' + replicate1 + 'Rep' +replcicate2 + '.bam' # set up keys that join inputs through various file forwardings: bamAkey = 'bamRep' + repA + '.bam' bamBkey = 'bamRep' + repB + '.bam' mergedBamKey = 'mergedRep' + repA + 'Rep' + repB + '.bam' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamAkey,'galaxyInput', galaxyInputBamA) ana.registerFile(bamBkey,'galaxyInput', galaxyInputBamB) nonGalaxyInput1 = ana.nonGalaxyInput(bamAkey) # Registers and returns the outside location nonGalaxyInput2 = ana.nonGalaxyInput(bamBkey) # Need to register these to ensure nonGalOut naming # outputs: ana.registerFile( mergedBamKey, 'galaxyOutput',galaxyOutMergedBam) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(mergedBamKey,'nonGalaxyOutput','%s_%s_merged',ext='bam', \ input1=bamAkey, input2=bamBkey) # Establish step and run it: step = MergeBamStep(ana,repA,repB) sys.exit( step.run() )
# html file in that directory keyed as: 'fastqVal' + suffix + '.html' # json file keyed as: 'fastqVal' + suffix + '.json' # set up keys that join inputs through various file forwardings: suffix = ana.galaxyFileId( galaxyInputFile) # suffix needs to be based on the input file if suffix == '-1': suffix = ana.fileGetPart(galaxyInputFile, 'root') inputKey = 'tags' + suffix + '.fastq' valDirKey = 'fastqValDir' + suffix valZipKey = 'fastqVal' + suffix + '.zip' valHtmlKey = 'fastqVal' + suffix + '.html' valJsonKey = 'fastqVal' + suffix + '.json' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(inputKey, 'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput( inputKey) # Registers and returns the outside location # Output is complete dir ana.registerFile(valDirKey, 'galaxyOutput', galaxyOutputDir) ana.createOutFile(valDirKey, 'nonGalaxyOutput', '%s_sample_fastqc', ext='dir') ana.createOutFile(valZipKey, 'nonGalaxyOutput', '%s_sample_fastqc', ext='zip') ana.createOutFile(valJsonKey, 'nonGalaxyOutput', '%s_validate', ext='json') # Galaxy needs to know about a single file within the dir. While it is moved to the analysisDir # as part of the htmlDir, It must be manually moved for galaxy. Thus, the standard # analysis file forwarding is slightly modified here: ana.registerFile(valHtmlKey, 'galaxyOutput', galaxyOutputHtml) nonGalaxyOutput = ana.createOutFile(valHtmlKey,'nonGalaxyOutput', \ '%s_sample_fastqc/fastqc_report', ext='html' )
ana.readType = pairedOrUnpaired # What step expects: # Inputs: 1 or 2 fastq files, pre-registered in the analysis keyed as: # Single: 'tagsRep'+replicate+'.fastq' # Paired: 'tagsRd1Rep'+replicate+'.fastq' and 'tagsRd2Rep'+replicate+'.fastq' # Outputs: a single bam target keyed as: # 'alignmentRep'+replicate+'.bam' bamFileKey = 'alignmentRep' + repNo + '.bam' # Used to tie outputs togther # Establish Inputs for galaxy and nonGalaxy alike if pairedOrUnpaired == "paired": fastqRd1Key = 'tagsRd1Rep' + repNo + '.fastq' # Used to tie inputs togther fastqRd2Key = 'tagsRd2Rep' + repNo + '.fastq' # Used to tie inputs togther ana.registerFile(fastqRd1Key, 'galaxyInput', galaxyInputFile) ana.registerFile(fastqRd2Key, 'galaxyInput', galaxyInputFile2) nonGalaxyInput = ana.nonGalaxyInput( fastqRd1Key) # Registers and returns the outside location nonGalaxyInput2 = ana.nonGalaxyInput( fastqRd2Key) # Registers and returns the outside location # outputs: ana.registerFile(bamFileKey, 'galaxyOutput', galaxyOutputFile) resultsDir = ana.resultsDir( galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(bamFileKey,'nonGalaxyOutput','%s_%s', ext='bam', \ input1=fastqRd1Key, input2=fastqRd2Key) else: fastqKey = 'tagsRep' + repNo + '.fastq' # Used to tie inputs togther ana.registerFile(fastqKey, 'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(
# zipped file of that directory keyed as: 'fastqVal' + suffix + '.zip' # html file in that directory keyed as: 'fastqVal' + suffix + '.html' # json file keyed as: 'fastqVal' + suffix + '.json' # set up keys that join inputs through various file forwardings: suffix = ana.galaxyFileId(galaxyInputFile) # suffix needs to be based on the input file if suffix == '-1': suffix = ana.fileGetPart(galaxyInputFile,'root') inputKey = 'tags' + suffix + '.fastq' valDirKey = 'fastqValDir' + suffix valZipKey = 'fastqVal' + suffix + '.zip' valHtmlKey = 'fastqVal' + suffix + '.html' valJsonKey = 'fastqVal' + suffix + '.json' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(inputKey,'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(inputKey) # Registers and returns the outside location # Output is complete dir ana.registerFile(valDirKey,'galaxyOutput',galaxyOutputDir) ana.createOutFile(valDirKey,'nonGalaxyOutput', '%s_sample_fastqc', ext='dir' ) ana.createOutFile(valZipKey,'nonGalaxyOutput', '%s_sample_fastqc', ext='zip' ) ana.createOutFile(valJsonKey,'nonGalaxyOutput','%s_validate', ext='json' ) # Galaxy needs to know about a single file within the dir. While it is moved to the analysisDir # as part of the htmlDir, It must be manually moved for galaxy. Thus, the standard # analysis file forwarding is slightly modified here: ana.registerFile(valHtmlKey,'galaxyOutput',galaxyOutputHtml) nonGalaxyOutput = ana.createOutFile(valHtmlKey,'nonGalaxyOutput', \ '%s_sample_fastqc/fastqc_report', ext='html' ) #ana.registerFile(valZipKey,'galaxyOutput',galaxyOutputZip) # No need: galaxy zips it anyway
ana = GalaxyAnalysis(settingsFile, anaId, genome, expType) if testOnly: ana.dryRun = testOnly suffix = "Rep" + repNo if alignedBy != 'unknown': suffix = alignedBy.capitalize() + suffix # What step expects: # Inputs: 1 bam, pre-registered in analysis, keyed as: 'alignment' + suffix + '.bam' # Outputs: target signal file, keyed as: 'signal + suffix + readFiler + strand + '.bw' # set up keys that join inputs through various file forwardings: bamInputKey = 'alignment' + suffix + '.bam' signalKey = 'signal' + suffix + readFilter + strand + '.bw' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey,'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(bamInputKey) # Registers and returns the outside location # outputs: ana.registerFile( signalKey, 'galaxyOutput',galaxyOutSignal ) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(signalKey, 'nonGalaxyOutput', \ '%s_' + readFilter.lower() + strand.capitalize(),ext='bw') # Establish step and run it: step = BamToBwStep(ana,suffix,readFilter,strand) sys.exit( step.run() )
ana.setVar('tagLen',tagLength) # What step expects: # Inputs: 1 bam, pre-registered in analysis keyed as: 'alignment' + suffix + '.bam' # Outputs: target broadPeak hotspot file, keyed as: 'hot' + suffix + '.bigBed' # target narrowPeak peaks file, keyed as: 'peaks' + suffix + '.bigBed' # target density bigWig file, keyed as: 'density' + suffix + '.bigWig' # set up keys that join inputs through various file forwardings: bamInputKey = 'alignment' + suffix + '.bam' hotKey = 'hot' + suffix + '.bigBed' peakKey = 'peaks' + suffix + '.bigBed' densityKey = 'density' + suffix + '.bigWig' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey,'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput(bamInputKey) # Registers and returns the outside location # outputs: ana.registerFile( hotKey, 'galaxyOutput',galaxyOutputHot ) ana.registerFile( peakKey, 'galaxyOutput',galaxyOutputPeaks ) ana.registerFile( densityKey, 'galaxyOutput',galaxyOutputDensity) resultsDir = ana.resultsDir(galaxyPath) # prefers nonGalaxyInput location over settings loc ana.createOutFile(hotKey, 'nonGalaxyOutput',ext='bigBed') ana.createOutFile(peakKey, 'nonGalaxyOutput',ext='bigBed') ana.createOutFile(densityKey, 'nonGalaxyOutput',ext='bigWig') # Establish step and run it: step = HotspotStep(ana,suffix,tagLength) sys.exit( step.run() )
ana = GalaxyAnalysis(settingsFile, anaId, genome, expType) if testOnly: ana.dryRun = testOnly # What step expects: # Inputs: 1 bam, pre-registered in the analysis keyed as: 'alignmentRep' + replicate + '.bam' # Outputs: 1 interim Corr file, keyed as: 'strandCorr' + suffix + '.txt' # 1 target json file, keyed as: 'bamEvaluate' + suffix + '.json' # TODO: Does a galaxy user really want the sample bam? # set up keys that join inputs through various file forwardings: bamInputKey = 'alignmentRep' + repNo + '.bam' # Establish Inputs for galaxy and nonGalaxy alike ana.registerFile(bamInputKey, 'galaxyInput', galaxyInputFile) nonGalaxyInput = ana.nonGalaxyInput( bamInputKey) # Registers and returns the outside location suffix = 'Rep' + repNo if nonGalaxyInput.lower().find('_star') != -1: suffix += 'ByStar' elif nonGalaxyInput.find('_tophat') != -1: suffix += 'ByTophat' elif nonGalaxyInput.lower().find('_bwa') != -1: suffix += 'ByBwa' bamSampleKey = 'alignment' + suffix + '_5M.bam' bamEvalKey = 'bamEvaluate' + suffix + '.json' # outputs: ana.registerFile(bamEvalKey, 'galaxyOutput', galaxyOutBamEval) resultsDir = ana.resultsDir(