Example #1
0
def _run_abyss_mergePairs(global_config, sample_config , sorted_libraries_by_insert):
    print "running abyss-mergepairs ..."
    assembler = "abyss_mergePairs"
    outputName = sample_config["output"]
    currentDirectory  = os.getcwd()
    assemblyDirectory = os.path.join(currentDirectory, assembler)
    if common.directory_exists(assemblyDirectory):
        return sample_config
    os.chdir(assemblyDirectory) # now I am in the assembly directory
    sorted_libraries_by_insert = common.prepare_folder_structure(sorted_libraries_by_insert)
    programBIN      = global_config["Tools"][assembler]["bin"] # in abyss case there is no exectuable
    program_options = global_config["Tools"][assembler]["options"]
    if assembler in sample_config:
        program_options=sample_config[assembler]
    ########### HERE IT START THE SPECIFIC ASSEMBLER PART
    
    program=programBIN
    command = []
    command.append(program)
    for option in program_options:
            command.append(option)
    
    libraries = {}
    for library, libraryInfo in sorted_libraries_by_insert:
        read1       = libraryInfo["pair1"]
        read2       = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert      = libraryInfo["insert"]
        std         = libraryInfo["std"]
        outputNameArray  = read1.split('/')[-1].split('_')
        outputName = "{}_{}".format(outputNameArray[0], outputNameArray[1])
        
        if orientation=="innie":
            if read2 is not None:
                currentCommand = command;
                currentCommand.append('-o')
                currentCommand.append(outputName)
                currentCommand.append(read1)
                currentCommand.append(read2)
                abyss_stdOut = open("mergePairs_{}.stdOut".format(outputName), "a")
                abyss_stdErr = open("mergePairs_{}.stdErr".format(outputName), "a")
                print command
                subprocess.call(command, stdout=abyss_stdOut, stderr=abyss_stdErr)
                command_mv = ["mv", "mergePairs_{}.stdErr".format(outputName), "{}.txt".format(outputName)]
                subprocess.call(command_mv)

    os.chdir("..")
    return sample_config
Example #2
0
def _run_trinity(global_config, sample_config, sorted_libraries_by_insert):
    print "running trinity ..."
    assembler = "trinity"
    outputName = sample_config["output"]
    currentDirectory  = os.getcwd()
    assemblyDirectory = os.path.join(currentDirectory, assembler)
    if common.directory_exists(assemblyDirectory):
        return sample_config
    os.chdir(assemblyDirectory) # now I am in the assembly directory
    sorted_libraries_by_insert = common.prepare_folder_structure(sorted_libraries_by_insert)
    programBIN      = global_config["Tools"][assembler]["bin"] + "Trinity.pl"  # in masurca case there is no exectuable as a make file must be created
    program_options = global_config["Tools"][assembler]["options"]
    if assembler in sample_config:
        program_options=sample_config[assembler]
    ########### HERE IT START THE SPECIFIC ASSEMBLER PART

    command = [programBIN]
    command.append("--seqType")
    command.append("fq")
    command.append("--JM")
    command.append("50G")
    for library, libraryInfo in sorted_libraries_by_insert:
        read1       =libraryInfo["pair1"]
        read2       =libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert      = libraryInfo["insert"]
        std         = libraryInfo["std"]
        if read2 is None:
            command.append("--single")
            command.append("{}".format(read1))
        elif orientation=="innie":
            command.append("--left")
            command.append("{}".format(read1))
            command.append("--right")
            command.append("{}".format(read2))
        else:
            print "trinity: somthing wrong or unexpected in the sample config file"
            return sample_config
    command.append("--output")
    command.append("trinity")
    assembler_stdOut = open("trinity.stdOut", "w")
    assembler_stdErr = open("trinity.stdErr", "w")
    print command

    returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut, stderr=assembler_stdErr, shell=True)

    # now align reads back to transcripts
    os.chdir("trinity")
    programBIN = global_config["Tools"][assembler]["bin"] + "util/alignReads.pl"
    command = [programBIN]
    command.append("--target")
    command.append("Trinity.fasta")
    command.append("--seqType")
    command.append("fq")
    for library, libraryInfo in sorted_libraries_by_insert:
        read1       =libraryInfo["pair1"]
        read2       =libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert      = libraryInfo["insert"]
        std         = libraryInfo["std"]
        if read2 is not None and orientation == "innie":
            command.append("--left")
            command.append("{}".format(os.path.splitext(read1)[0]))
            command.append("--right")
            command.append("{}".format(os.path.splitext(read2)[0]))

    command.append("--aligner")
    command.append("bowtie")
    command.append("--retain_intermediate_files")
    print command
    returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut, stderr=assembler_stdErr, shell=True)

    # now quantify trnascripts
    programBIN = global_config["Tools"][assembler]["bin"] + "util/RSEM_util/run_RSEM_align_n_estimate.pl"
    command = [programBIN]
    command.append("--transcripts")
    command.append("Trinity.fasta")
    command.append("--seqType")
    command.append("fq")
    for library, libraryInfo in sorted_libraries_by_insert:
        read1       =libraryInfo["pair1"]
        read2       =libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert      = libraryInfo["insert"]
        std         = libraryInfo["std"]
        if read2 is not None and orientation == "innie":
            command.append("--left")
            command.append("{}".format(os.path.splitext(read1)[0]))
            command.append("--right")
            command.append("{}".format(os.path.splitext(read2)[0]))

    print command
    returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut, stderr=assembler_stdErr, shell=True)

    #now copy results
    os.chdir("..")
    subprocess.call(["cp", "trinity/Trinity.fasta", "{}.fasta".format(outputName)])
    subprocess.call(["cp", "trinity/RSEM.isoforms.results", "{}.isoforms.results".format(outputName)])
    subprocess.call(["cp", "trinity/RSEM.genes.results", "{}.genes.results".format(outputName)])
    os.chdir(currentDirectory)
    return sample_config