def getAnalysisIDs(participant_id): commandTemplate = 'cgquery "participant_id=%s&library_strategy=RNA-Seq"' command = commandTemplate % (participant_id) print(command) output = subprocess.check_output(command, shell=True).splitlines() ids = [] for line in output: if "analysis_id" in line: tokens = line.split(":") tokens = [x.strip() for x in tokens] ids.append(tokens[1]) return ids def downloadFromAnalysisID(analysis_id, keyFile="/media/Data2/TCGA_SKCM/cghub.key"): commandTemplate = " gtdownload -v -c %s -d %s" command = commandTemplate % (keyFile, analysis_id) print(command) # s.executeFunctions(command, simulate = True, captureOutput = False) if s.isStdInEmpty(): if len(sys.argv) < 2: print("You need to input something.") else: stdin = s.getStdIn() for x in stdin: ids = getAnalysisIDs(x) for y in ids: downloadFromAnalysisID(y)
cellLine = fileUtil.getCellLineFromFilename(bamFile) # Runs AddOrReplaceReadGroups # Template: java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=star_output.sam O=rg_added_sorted.bam SO=coordinate RGID=id RGLB=library RGPL=platform RGPU=machine RGSM=sample commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=%s O=%s_rg_added_sorted.bam SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=%s" command = commandTemplate % (bamFile, cellLine, cellLine) readGroupsCommands.append(command) # command = command + bamFile + " " # command = command + "O=" + cellLine + "_rg_added_sorted.bam " # command = command + "SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=" + cellLine + "\n" # outputScript.write(command) # Runs MarkDuplicates # Template: java -jar MarkDuplicates I=rg_added_sorted.bam O=dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=output.metrics commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar MarkDuplicates I=%s_rg_added_sorted.bam O=%s_dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=%s_dedupped.metrics" command = commandTemplate % (cellLine, cellLine, cellLine) markDuplicatesCommands.append(command) # command = command + "O=" + cellLine + "_dedupped.bam " # command = command + "CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=" + cellLine + "output.metrics\n" # outputScript.write(command) # outputScript.write("\n") s.executeFunctions(readGroupsCommands, parallel = True, simulate = True) s.executeFunctions(markDuplicatesCommands, parallel = True, simulate = True) if s.isStdInEmpty(): if len(sys.argv) > 1: runPicard(sys.argv[1:]) else: print("Error. No piped input or command line arguments") print(helpDoc) else: runPicard(s.getStdIn())