Python getCellLineFromFilename Examples

Programming Language: Python

Namespace/Package Name: fileUtil

Method/Function: getCellLineFromFilename

Examples at hotexamples.com: 2

Python getCellLineFromFilename - 2 examples found. These are the top rated real world Python examples of fileUtil.getCellLineFromFilename extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: runPicard.py Project: wukevin/ortiz-ucsf-bioinformatics

def runPicard(listOfBams):
    readGroupsCommands = []
    markDuplicatesCommands = []
    for bamFile in listOfBams:
        cellLine = fileUtil.getCellLineFromFilename(bamFile)
        # Runs AddOrReplaceReadGroups
        # Template: java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=star_output.sam O=rg_added_sorted.bam SO=coordinate RGID=id RGLB=library RGPL=platform RGPU=machine RGSM=sample
        commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=%s O=%s_rg_added_sorted.bam SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=%s"
        command = commandTemplate % (bamFile, cellLine, cellLine)
        readGroupsCommands.append(command)
        # command = command + bamFile + " "
        # command = command + "O=" + cellLine + "_rg_added_sorted.bam "
        # command = command + "SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=" + cellLine + "\n"
        # outputScript.write(command)
        # Runs MarkDuplicates
        # Template: java -jar MarkDuplicates I=rg_added_sorted.bam O=dedupped.bam  CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=output.metrics 
        commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar MarkDuplicates I=%s_rg_added_sorted.bam O=%s_dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=%s_dedupped.metrics"
        command = commandTemplate % (cellLine, cellLine, cellLine)
        markDuplicatesCommands.append(command)
        # command = command + "O=" + cellLine + "_dedupped.bam "
        # command = command + "CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=" + cellLine + "output.metrics\n"
        # outputScript.write(command)
        # outputScript.write("\n")
    s.executeFunctions(readGroupsCommands, parallel = True, simulate = True)
    s.executeFunctions(markDuplicatesCommands, parallel = True, simulate = True)

Example #2

Show file

File: MuTectWrapper.py Project: wukevin/ortiz-ucsf-bioinformatics

- NORMAL .bam file
- TUMOR .bam file
It automatically the outputfile name as well as the log filename based on
these .bam file inputs. The .bam files must have been processed already by
Picard and splitNCigarReads, which can be run by runPicard.py and 
runSplitNCigarReads.py, respectively.

Written by Kevin Wu, Ortiz Lab UCSF, August 2015
"""

import subprocess
import sys
sys.path.append("/home/ortiz-lab/Documents/kwu/scripts/util/")
import fileUtil as f
import shellUtil as s

if len(sys.argv) != 3:
    print("Error. Wrong number of arguments.")
    print(helpDoc)
    exit()
# (normalBam, tumorBam, outputFile, logFile)
normalBam = sys.argv[1]
tumorBam = sys.argv[2]

outputFile = "%s_normal_vs_%s_tumor_mutect.out" % (f.getCellLineFromFilename(normalBam), f.getCellLineFromFilename(tumorBam))
logFile = outputFile + ".log"

command = "java -Xmx8g -jar /home/ortiz-lab/software/muTect/mutect-1.1.7.jar --analysis_type MuTect --reference_sequence /media/Data/genomes/hg19_ordered/hg19.fa --cosmic /home/ortiz-lab/software/muTect/bundle/liftedover_output_hg19.vcf --dbsnp /home/ortiz-lab/software/muTect/bundle/dbsnp_138.hg19.excluding_sites_after_129.vcf --input_file:normal %s --input_file:tumor %s --out %s -log %s" % (normalBam, tumorBam, outputFile, logFile)
print(command)
subprocess.call(command, shell = True)