def runPicard(listOfBams):
    readGroupsCommands = []
    markDuplicatesCommands = []
    for bamFile in listOfBams:
        cellLine = fileUtil.getCellLineFromFilename(bamFile)
        # Runs AddOrReplaceReadGroups
        # Template: java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=star_output.sam O=rg_added_sorted.bam SO=coordinate RGID=id RGLB=library RGPL=platform RGPU=machine RGSM=sample
        commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=%s O=%s_rg_added_sorted.bam SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=%s"
        command = commandTemplate % (bamFile, cellLine, cellLine)
        readGroupsCommands.append(command)
        # command = command + bamFile + " "
        # command = command + "O=" + cellLine + "_rg_added_sorted.bam "
        # command = command + "SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=" + cellLine + "\n"
        # outputScript.write(command)
        # Runs MarkDuplicates
        # Template: java -jar MarkDuplicates I=rg_added_sorted.bam O=dedupped.bam  CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=output.metrics 
        commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar MarkDuplicates I=%s_rg_added_sorted.bam O=%s_dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=%s_dedupped.metrics"
        command = commandTemplate % (cellLine, cellLine, cellLine)
        markDuplicatesCommands.append(command)
        # command = command + "O=" + cellLine + "_dedupped.bam "
        # command = command + "CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=" + cellLine + "output.metrics\n"
        # outputScript.write(command)
        # outputScript.write("\n")
    s.executeFunctions(readGroupsCommands, parallel = True, simulate = True)
    s.executeFunctions(markDuplicatesCommands, parallel = True, simulate = True)
예제 #2
0
def runStar(fastq1, fastq2, genome = "/media/Data/genomes/STAR_index_hg19_vGATK/STAR_genomeDir_hg19_vGATK"):
	# Runs STAR to output a coordinate soorted BAM file that is compatible with cuff
	commandTemplate = "STAR --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outSAMtype BAM SortedByCoordinate --genomeDir %s --readFilesIn %s %s --runThreadN 16"
	command = commandTemplate % (genome, fastq1, fastq2)
	s.executeFunctions(command)