def _run_abyss_mergePairs(global_config, sample_config , sorted_libraries_by_insert): print "running abyss-mergepairs ..." assembler = "abyss_mergePairs" outputName = sample_config["output"] currentDirectory = os.getcwd() assemblyDirectory = os.path.join(currentDirectory, assembler) if common.directory_exists(assemblyDirectory): return sample_config os.chdir(assemblyDirectory) # now I am in the assembly directory sorted_libraries_by_insert = common.prepare_folder_structure(sorted_libraries_by_insert) programBIN = global_config["Tools"][assembler]["bin"] # in abyss case there is no exectuable program_options = global_config["Tools"][assembler]["options"] if assembler in sample_config: program_options=sample_config[assembler] ########### HERE IT START THE SPECIFIC ASSEMBLER PART program=programBIN command = [] command.append(program) for option in program_options: command.append(option) libraries = {} for library, libraryInfo in sorted_libraries_by_insert: read1 = libraryInfo["pair1"] read2 = libraryInfo["pair2"] orientation = libraryInfo["orientation"] insert = libraryInfo["insert"] std = libraryInfo["std"] outputNameArray = read1.split('/')[-1].split('_') outputName = "{}_{}".format(outputNameArray[0], outputNameArray[1]) if orientation=="innie": if read2 is not None: currentCommand = command; currentCommand.append('-o') currentCommand.append(outputName) currentCommand.append(read1) currentCommand.append(read2) abyss_stdOut = open("mergePairs_{}.stdOut".format(outputName), "a") abyss_stdErr = open("mergePairs_{}.stdErr".format(outputName), "a") print command subprocess.call(command, stdout=abyss_stdOut, stderr=abyss_stdErr) command_mv = ["mv", "mergePairs_{}.stdErr".format(outputName), "{}.txt".format(outputName)] subprocess.call(command_mv) os.chdir("..") return sample_config
def _prepare_folder_structure(assembler,assemblyDirectory): if common.directory_exists(assemblyDirectory): print "Assembler {} asumer already computed as folder {} exists".format(assembler,assemblyDirectory) return 1 return 0
def _run_trinity(global_config, sample_config, sorted_libraries_by_insert): print "running trinity ..." assembler = "trinity" outputName = sample_config["output"] currentDirectory = os.getcwd() assemblyDirectory = os.path.join(currentDirectory, assembler) if common.directory_exists(assemblyDirectory): return sample_config os.chdir(assemblyDirectory) # now I am in the assembly directory sorted_libraries_by_insert = common.prepare_folder_structure(sorted_libraries_by_insert) programBIN = global_config["Tools"][assembler]["bin"] + "Trinity.pl" # in masurca case there is no exectuable as a make file must be created program_options = global_config["Tools"][assembler]["options"] if assembler in sample_config: program_options=sample_config[assembler] ########### HERE IT START THE SPECIFIC ASSEMBLER PART command = [programBIN] command.append("--seqType") command.append("fq") command.append("--JM") command.append("50G") for library, libraryInfo in sorted_libraries_by_insert: read1 =libraryInfo["pair1"] read2 =libraryInfo["pair2"] orientation = libraryInfo["orientation"] insert = libraryInfo["insert"] std = libraryInfo["std"] if read2 is None: command.append("--single") command.append("{}".format(read1)) elif orientation=="innie": command.append("--left") command.append("{}".format(read1)) command.append("--right") command.append("{}".format(read2)) else: print "trinity: somthing wrong or unexpected in the sample config file" return sample_config command.append("--output") command.append("trinity") assembler_stdOut = open("trinity.stdOut", "w") assembler_stdErr = open("trinity.stdErr", "w") print command returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut, stderr=assembler_stdErr, shell=True) # now align reads back to transcripts os.chdir("trinity") programBIN = global_config["Tools"][assembler]["bin"] + "util/alignReads.pl" command = [programBIN] command.append("--target") command.append("Trinity.fasta") command.append("--seqType") command.append("fq") for library, libraryInfo in sorted_libraries_by_insert: read1 =libraryInfo["pair1"] read2 =libraryInfo["pair2"] orientation = libraryInfo["orientation"] insert = libraryInfo["insert"] std = libraryInfo["std"] if read2 is not None and orientation == "innie": command.append("--left") command.append("{}".format(os.path.splitext(read1)[0])) command.append("--right") command.append("{}".format(os.path.splitext(read2)[0])) command.append("--aligner") command.append("bowtie") command.append("--retain_intermediate_files") print command returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut, stderr=assembler_stdErr, shell=True) # now quantify trnascripts programBIN = global_config["Tools"][assembler]["bin"] + "util/RSEM_util/run_RSEM_align_n_estimate.pl" command = [programBIN] command.append("--transcripts") command.append("Trinity.fasta") command.append("--seqType") command.append("fq") for library, libraryInfo in sorted_libraries_by_insert: read1 =libraryInfo["pair1"] read2 =libraryInfo["pair2"] orientation = libraryInfo["orientation"] insert = libraryInfo["insert"] std = libraryInfo["std"] if read2 is not None and orientation == "innie": command.append("--left") command.append("{}".format(os.path.splitext(read1)[0])) command.append("--right") command.append("{}".format(os.path.splitext(read2)[0])) print command returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut, stderr=assembler_stdErr, shell=True) #now copy results os.chdir("..") subprocess.call(["cp", "trinity/Trinity.fasta", "{}.fasta".format(outputName)]) subprocess.call(["cp", "trinity/RSEM.isoforms.results", "{}.isoforms.results".format(outputName)]) subprocess.call(["cp", "trinity/RSEM.genes.results", "{}.genes.results".format(outputName)]) os.chdir(currentDirectory) return sample_config