Exemple #1
0
    def run_transdecoder_predict(self,
                                 infasta,
                                 longorfs_dir,
                                 out_dir=None,
                                 verbose=False,
                                 quiet=False,
                                 logs=True,
                                 objectid="NA",
                                 **kwargs):

        if not pu.check_files_exist(infasta):
            pu.print_boldred("Please check input file:" + infasta)
        if not pu.check_paths_exist(longorfs_dir):
            pu.print_boldred("Path {} doesn't exist".format(longorfs_dir))

        move_flag = True
        if not out_dir:
            out_dir = os.getcwd()
            move_flag = False

        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        newOpts = {"-t": infasta, "-O": longorfs_dir}
        mergedOpts = {**newOpts, **kwargs}

        #execute Predict
        status = self.run_transdecoder('TransDecoder.Predict',
                                       verbose=verbose,
                                       quiet=quiet,
                                       logs=logs,
                                       objectid=objectid,
                                       **mergedOpts)
        if not status:
            pu.print_boldred("Transdecoder failed")
            return ""

        #move output files to outdir
        if move_flag:
            outfile_prefix = pu.get_filename(infasta) + ".transdecoder"
            pe.move_file(outfile_prefix + ".bed",
                         os.path.join(out_dir, outfile_prefix + ".bed"),
                         verbose)
            pe.move_file(outfile_prefix + ".cds",
                         os.path.join(out_dir, outfile_prefix + ".cds"),
                         verbose)
            pe.move_file(outfile_prefix + ".gff3",
                         os.path.join(out_dir, outfile_prefix + ".gff3"),
                         verbose)
            pe.move_file(outfile_prefix + ".pep",
                         os.path.join(out_dir, outfile_prefix + ".pep"),
                         verbose)
        return out_dir
Exemple #2
0
 def build_index(self,index_path,index_name,fasta,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """
     build salmon index and store the path to index in self
     
     index_path: str
         path to the output directory
     index_name: str
         index name
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     kwargs: dict
         Options to pass to salmon. This will override the existing options
         
     :return: status of salmon index
     :rtype: bool
     """
     
     #check input
     if not pu.check_files_exist(fasta):
         pu.print_boldred("{} does not exist. Exiting".format(fasta))
         return False
     #create out dir
     if not pu.check_paths_exist(index_path):
         if not pu.mkdir(index_path):
             print("ERROR in building hisat2 index. Failed to create index directory.")
             return False
     indexOut=os.path.join(index_path,index_name)
     newOpts={"-t":fasta,"-i":indexOut}
     mergedOpts={**kwargs,**newOpts}
     
     #call salmon
     status=self.run_salmon("index",verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**mergedOpts)
     
     if status:
         #check if sam file is present in the location directory of sra_object
         #if check_files_exist(os.path.join(indexOut,"versionInfo.json")): #not sure if this is reliable
         if pu.check_paths_exist(indexOut):
             self.salmon_index=indexOut
             self.passedArgumentDict['-i']=self.salmon_index
             pu.print_green("salmon index is:"+self.salmon_index)
             return True
     
     pu.print_boldred("Failed to create salmon index")
     return False
Exemple #3
0
    def create_lock(self, target_list, message):
        """
        Cretes a temporary .Lock file associated with a target file and write a message in it.

        Parameters
        ----------
        target_list : List
            List of target files.
        message : Str
            Message to write in file.

        Returns
        -------
        templist : List
            A list of .Lock file names coressponding to the target files.

        """
        templist = []
        for f in target_list:
            temp_path = pu.get_file_directory(f)
            if not pu.check_paths_exist(temp_path): pu.mkdir(temp_path)
            prefix = pu.get_filename(f) + '_'
            temp = tempfile.NamedTemporaryFile(prefix=prefix,
                                               suffix='.Lock',
                                               dir=temp_path,
                                               delete=False)
            #TODO: dump command in lock
            timestamp = pu.get_timestamp()
            temp.write(str.encode(timestamp + '\t' + message))

            templist.append(temp.name)
        return templist
Exemple #4
0
    def createMikadoGTFlist(self,
                            out_file,
                            out_dir,
                            searchPath,
                            searchQuery="*.gtf",
                            strand=False):
        """Create a file to be used by mikado configure
        """

        files = pe.find_files(searchPath, searchQuery)
        args = files

        #create out dir
        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)
        outFilePath = os.path.join(out_dir, out_file + ".txt")

        gtfs = []
        for l in args:
            thisName = pu.get_file_basename(l)
            if thisName:
                gtfs.append("\t".join([l, thisName, str(strand)]))

        f = open(outFilePath, "w")
        f.write("\n".join(gtfs))
        f.close()

        pu.print_green("Mikado list file written to:" + outFilePath)
        return outFilePath
Exemple #5
0
    def build_index(self,
                    in_fasta,
                    dbname,
                    out_dir=None,
                    threads=None,
                    verbose=False,
                    quiet=False,
                    logs=True,
                    objectid="NA",
                    **kwargs):
        """Build a diamond index and store its path in self
        """

        #check input files
        if not pu.check_files_exist(in_fasta):
            pu.print_boldred(
                "Input fasta: {} not found...\n diamond makedb failed".format(
                    in_fasta))
            return False
        #create out_dir
        if not out_dir:
            out_dir = os.getcwd()
        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        #check if index already exists
        index_path = os.path.join(out_dir, dbname)
        self.index = index_path
        if self.check_index():
            pu.print_green("Diamond index: {} exists, using it...".format(
                self.index))
            self.index = index_path
            return True

        if not threads:
            threads = self.threads

        newOpts = {
            "--in": in_fasta,
            "-d": index_path,
            "--threads": str(threads)
        }

        #add input files to kwargs, overwrite newOpts with kwargs
        mergedOpts = {**newOpts, **kwargs}

        #call run_diamond
        status = self.run_diamond("makedb",
                                  verbose=verbose,
                                  quiet=quiet,
                                  logs=logs,
                                  objectid=objectid,
                                  **mergedOpts)

        if status:
            self.index = index_path
            return True

        return False
Exemple #6
0
 def perform_qc(self,sra_object,out_dir="",out_suffix="_bbduk",objectid="NA"):
     """Run bbduk on fastq files specified by the sra_object
    
     sra_object: SRA
         An SRA object whose fastq files will be used
     out_dir: str
         Path to output directory
     out_suffix: string
         Suffix for the output sam file
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     
     :return: Returns the path of fastq files after QC. tuple has one item for single end files and 2 for paired.
     :rtype: tuple
         
     """
     #make out_dir
     if not out_dir:
             out_dir=sra_object.directory
     else:
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
            
     if sra_object.layout=='PAIRED':
         fq1=sra_object.fastq_path
         fq2=sra_object.fastq2_path
         out_fileName1=pu.get_file_basename(fq1)+out_suffix+".fastq"
         out_fileName2=pu.get_file_basename(fq2)+out_suffix+".fastq"
         out_file1Path=os.path.join(out_dir,out_fileName1)
         out_file2Path=os.path.join(out_dir,out_fileName2)
         
         internal_args=()
         internal_kwargs={"in":fq1,"in2":fq2,"out":out_file1Path,"out2":out_file2Path}
                     
         #run bbduk
         status=self.run(*internal_args,objectid=objectid,target=[out_file1Path,out_file2Path],**internal_kwargs)
         
         if status:
             if not pu.check_files_exist(out_file1Path,out_file2Path) and not _dryrun:
                     return("",)
                     
         return(out_file1Path,out_file2Path)
         
         
     else:
         fq=sra_object.fastq_path
         out_fileName=pu.get_file_basename(fq)+out_suffix+".fastq"
         out_filePath=os.path.join(out_dir,out_fileName)
         internal_args=()
         internal_kwargs={"in":fq,"out":out_filePath}
         
         #run bbduk
         status=self.run(*internal_args,objectid=objectid,target=out_filePath,**internal_kwargs)
         if status:
             if not pu.check_files_exist(out_filePath) and not _dryrun:
                 return("",)
             
         return(out_filePath,) 
Exemple #7
0
def test_portcullis():
    pc = tools.Portcullis()
    port_out = pc.run_portcullisFull(testVars.genome,
                                     testVars.portcullisBam,
                                     out_dir=testVars.testDir,
                                     threads=8)
    st = pu.check_paths_exist(port_out)

    assert st == True, "Failed portcullis run"
Exemple #8
0
 def init_from_path(self,path):
     if not pu.check_paths_exist(path):
         raise Exception("Please provide a valid path to scan for RNA-Seq data")
     
     #scan path for fastq
     self.search_fastq(path)
     #scan path for sra
     self.search_sra(path)
     if not (self.fastqFilesExistsLocally() or self.sraFileExistsLocally()):
             raise Exception("No files found at:"+ path+ "Please provide a valid path to scan for RNA-Seq data")
Exemple #9
0
    def init_from_path(self, path):
        if not pu.check_paths_exist(path):
            raise Exception(
                "Please provide a valid path to scan for RNA-Seq data")

        #scan path
        if not self.search_fastq(path):
            if not self.search_sra(path):
                raise Exception(
                    "Please provide a valid path to scan for RNA-Seq data")
Exemple #10
0
def test_transdecoder():
    td = tools.Transdecoder()
    longOrfOut = td.run_transdecoder_longorfs(testVars.cdna_small,
                                              out_dir=testVars.testDir +
                                              "/longorfsout")
    preddir = testVars.testDir + "/predout"
    predout = td.run_transdecoder_predict(testVars.cdna_small,
                                          longOrfOut,
                                          out_dir=preddir)
    st = pu.check_paths_exist(predout)
    assert st == True, "TransDecoder failed"
Exemple #11
0
    def perform_alignment(self,
                          sra_object,
                          out_suffix="_bowtie2",
                          out_dir="",
                          objectid="NA"):
        """Function to perform alignment using sra_object.
        
        Parameters
        ----------
        
        sra_object SRA object
            An object of type SRA. The path to fastq files will be obtained from this object.
        out_suffix: string
            Suffix for the output sam file
        out_dir: string
            Directory to save the results. Default value is sra_object.directory
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        :return: Returns the sorted bam file path after converting sam to bam and sorting it
        :rtype: string
        """
        if not out_dir:
            out_dir = sra_object.directory
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #create path to output sam file
        outSamFile = os.path.join(
            out_dir, sra_object.srr_accession + out_suffix + ".sam")
        #outBamFile=os.path.join(out_dir,sra_object.srr_accession+out_suffix+"_sorted.bam")

        #find layout and fq file paths
        if sra_object.layout == 'PAIRED':
            internal_kwargs = {
                "-1": sra_object.fastq_path,
                "-2": sra_object.fastq2_path,
                "-S": outSamFile
            }
        else:
            internal_kwargs = {"-U": sra_object.fastq_path, "-S": outSamFile}

        status = self.run(None,
                          objectid=sra_object.srr_accession,
                          target=outSamFile,
                          **internal_kwargs)

        if status:
            if not pu.check_files_exist(outSamFile) and not _dryrun:
                return ""
            #convert to bam before returning; returns outBamFile
            return tools.Samtools().sam_sorted_bam(outSamFile)

        return ""
Exemple #12
0
def generate_multiqc(directory,
                     tempDir,
                     outDir="",
                     coverage='a',
                     verbose=False,
                     cleanup=False):
    """
    Generate reports using multiqc

    Parameters
    ----------
    directory : str
        path to directory containing logs.
    tempDir : str
        temp dir.
    outDir : str, optional
        output dir. The default is "".
    coverage : char, optional
        commands to use in pyrpipe log: fa(i)led (p)assed or (a)ll. The default is 'a'.
    verbose : bool, optional
        print messages. The default is False.
    cleanup : bool, optional
        remove temp files. The default is False.

    Returns
    -------
    None.

    """
    #searg all _pyrpipe.log files under current directory
    files = pu.find_files(directory, ".*_pyrpipe\.log$", recursive=True)
    #extract stdout from each file and save to temp
    if not outDir:
        outDir = 'MultiQC_out'
    #create tempdir
    if not pu.check_paths_exist(tempDir):
        pu.mkdir(tempDir)
    for f in files:
        #dump stdout from logs to temp directory
        stdout = getStdoutFromLog(f, None, coverage)
        fid = f.split('_pyrpipe')[0].split('_')[-1]
        for o in stdout:
            thisName = o + "_" + fid + ".txt"
            tempFile = os.path.join(tempDir, thisName)
            f = open(tempFile, "w")
            f.write(stdout[o])
            #print('written',tempFile)
            f.close()

    #run multiqc
    mc.run(analysis_dir=directory, outdir=outDir)

    pass
Exemple #13
0
 def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"):
     """Run kallisto quant
     
     sra_object: SRA
         SRA object contatining paths to fastq files
     out_suffix: str
         suffix for output file
     out_dir: str
         path to output directory
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
    
     :return: Path to kallisto out directory
     :rtype: string
     """
     
     if not out_dir:
         out_dir=os.path.join(sra_object.directory,"kallisto_out")
     else:
         #create out_dir if not exists
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     
     
     if sra_object.layout == 'PAIRED':
         args=(sra_object.fastq_path,sra_object.fastq2_path)
         internal_kwargs={"-o":out_dir,"-i":self.index}
     else:
         args=(sra_object.fastq_path,)
         internal_kwargs={"-o":out_dir,"--single":"","-i":self.index}
         
     
     #targets
     outfile=os.path.join(out_dir,"abundance.tsv")
     newfile=os.path.join(out_dir,"abundance"+out_suffix+".tsv")
     #check if final files already exists
     if not _force and pu.check_files_exist(newfile):
         pu.print_green('Target files {} already exist.'.format(newfile))
         return newfile
     
     #call kallisto
     status=self.run(*args,subcommand='quant',objectid=sra_object.srr_accession,target=outfile,**internal_kwargs)
     
     if status:
         #return rename the bam  file and return path
         if not _dryrun:
             pe.move_file(outfile,newfile)
             if not pu.check_files_exist(newfile):
                 return ""            
         return newfile
     
     return ""
Exemple #14
0
    def perform_assembly(self,
                         bam_file,
                         out_dir=None,
                         out_suffix="_stringtie",
                         objectid="NA"):
        """Function to run stringtie using a bam file.
                
        Parameters
        ----------
        
        bam_file: string
            path to the bam file
        out_dir: string
            Path to out file
        out_suffix: string
            Suffix for the output gtf file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        :return: Returns the path to output GTF file
        :rtype: string
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)

        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)

        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")

        #Add output file name and input bam
        internal_args = (bam_file, )
        internal_kwargs = {"-o": out_gtf_file}
        #add positional args
        internal_kwargs['--'] = internal_args

        #call stringtie
        status = self.run(None,
                          objectid=objectid,
                          target=out_gtf_file,
                          **internal_kwargs)

        if status:
            #check if sam file is present in the location directory of sraOb
            if not pu.check_files_exist(out_gtf_file) and not _dryrun:
                return ""
            return out_gtf_file

        return ""
Exemple #15
0
 def build_index(self,index_path,index_name,fasta,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to  build kallisto index
     
     index_path: str
         path to the output directory
     index_name: str
         index name
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     kwargs: dict
         Options to pass to kallisto. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments).
         
     :return: Status of kallisto index
     :rtype: bool
     """
     
     #check input
     if not pu.check_files_exist(fasta):
         pu.print_boldred("{} does not exist. Exiting".format(fasta))
         return False
     
     #create out dir
     if not pu.check_paths_exist(index_path):
         if not pu.mkdir(index_path):
             print("ERROR in building kallisto index. Failed to create index directory.")
             return False
         
     indexOut=os.path.join(index_path,index_name)
     newOpts={"--":(fasta,),"-i":indexOut}
     mergedOpts={**kwargs,**newOpts}
     
     #call salmon
     status=self.run_kallisto("index",verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**mergedOpts)
     
     if status:
         #check if sam file is present in the location directory of sra_object
         if pu.check_files_exist(indexOut):
             self.kallisto_index=indexOut
             self.passedArgumentDict['-i']=self.kallisto_index
             pu.print_green("kallisto_index is:"+self.kallisto_index)
             return True
     else:
         pu.print_boldred("Failed to create kallisto index")
         return False
Exemple #16
0
 def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"):
     """run salmon quant
     sra_object: SRA
         An SRA object with valid fastq files
     out_suffix: str
         suffix string fout out file
     out_dir: str
         path to outdir
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     
     :return: Path to salmon out file
     :rtype: string
     """
         
     if not out_dir:
         out_dir=os.path.join(sra_object.directory,"salmon_out")
     else:
         #create out_dir if not exists
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     
     
     if sra_object.layout == 'PAIRED':
         internal_kwargs={"-o":out_dir,"-l":"A","-1":sra_object.fastq_path,"-2":sra_object.fastq2_path,"-i":self.index}
     else:
         internal_kwargs={"-o":out_dir,"-l":"A","-r":sra_object.fastq_path,"-i":self.index}
     
     #targets
     outfile=os.path.join(out_dir,"quant.sf")
     newfile=os.path.join(out_dir,"quant"+out_suffix+".sf")
     #check if final files already exists
     if not _force and pu.check_files_exist(newfile):
         pu.print_green('Target files {} already exist.'.format(newfile))
         return newfile
     
     #call salmon
     status=self.run(None,subcommand='quant',objectid=sra_object.srr_accession,target=newfile,**internal_kwargs)
     
     if status:
         #return rename the bam  file and return path
         if not _dryrun:
             pe.move_file(outfile,newfile)
             if not pu.check_files_exist(newfile):
                 return ""            
         return newfile
     
     return ""
def multiqc():
    print("Generating html report with multiqc")
    parser = argparse.ArgumentParser(
   
            description='pyrpipe diagnostic utility\nGenerate report with multiqc.',
            
            usage='''pyrpipe_diagnostic multiqc [<args>] <logfile>
                    
                    ''')    
    parser.add_argument('-o', help='out directory \ndefault: <./>',action="store")
    parser.add_argument('-c',help='Dump command options [(a)ll,fa(i)l,(p)ass]\ndefault: a',default='a',action="store")
    parser.add_argument('-v',help='verbose',action="store_true")
    parser.add_argument('-f',help='Filter by programs. Provide a comma-separated list e.g., prefetch,STAR,bowtie2 \ndefault None')
    parser.add_argument('-t',help='Temporary directory. \ndefault ./tmp',action="store")
    parser.add_argument('-r',help='Remove stdout files after processing. \ndefault ./tmp',action="store_true")
    parser.add_argument('logfile', help='The log file generated by pyrpipe',action="store")
    args = parser.parse_args(sys.argv[2:])
    
    logFile=args.logfile
    
    #parse args
    vFlag=args.v
    if vFlag:
        print("Generating MutiQC report")
    outDir=""
    if args.o is None:
        outDir=os.getcwd()
    else:
        outDir=args.o
    
    
    filters=[]
    if args.f is not None:
        filters= args.f.split(',')
    
    #create temp dir
    tempDir=""
    if args.t is not None:
        tempDir= args.t
    else:
        tempDir=os.path.join(os.getcwd(),"tmp")
    #create tmp dir
    if not pu.check_paths_exist(tempDir):
        pu.mkdir(tempDir) 
    
    #run multiqc
    generateMultiqcReport(logFile,filters,tempDir,outDir=outDir,coverage=args.c,verbose=args.v,cleanup=args.r)
Exemple #18
0
    def runMikadoSerialise(self,
                           jsonconf,
                           blastTargets,
                           orfs,
                           xml,
                           out_dir="",
                           verbose=False,
                           quiet=False,
                           logs=True,
                           objectid="NA",
                           **kwargs):
        """Wrapper to run mikado serialise
        """
        #check input files exist
        if not pu.check_files_exist(blastTargets, orfs, xml):
            print("Please check the input to mikado.")
            return ""
        if not out_dir:
            out_dir = os.getcwd()

        newOpts = {
            "--json-conf": jsonconf,
            "--blast_targets": blastTargets,
            "--xml": xml,
            "--orfs": orfs,
            "--output-dir": out_dir
        }

        #merge with kwargs
        mergedOpts = {**kwargs, **newOpts}

        status = self.runMikado("serialise",
                                verbose=verbose,
                                quiet=quiet,
                                logs=logs,
                                objectid=objectid,
                                **mergedOpts)

        if not status:
            print("Mikado serialise failed for:" + jsonconf)
            return ""

        #check if bam file exists
        if not pu.check_paths_exist(out_dir):
            return ""

        return out_dir
Exemple #19
0
    def runMikadoPick(self,
                      yamlconf,
                      threads=None,
                      out_dir=None,
                      verbose=False,
                      quiet=False,
                      logs=True,
                      objectid="NA",
                      **kwargs):
        """Wrapper to run mikado pick
        """
        #check input files exist
        if not pu.check_files_exist(yamlconf):
            print("Please check the input to mikado.")
            return ""
        if not out_dir:
            out_dir = os.getcwd()

        if not threads:
            threads = self.threads

        newOpts = {
            "--procs": str(threads),
            "--json-conf": yamlconf,
            "--output-dir": out_dir
        }

        #merge with kwargs
        mergedOpts = {**newOpts, **kwargs}

        status = self.runMikado("pick",
                                verbose=verbose,
                                quiet=quiet,
                                logs=logs,
                                objectid=objectid,
                                **mergedOpts)

        if not status:
            print("Mikado pick failed for:" + yamlconf)
            return ""

        #check if bam file exists
        if not pu.check_paths_exist(out_dir):
            return ""

        return out_dir
def benchmark():
    print("Generating benchmarks")
    parser = argparse.ArgumentParser(
   
            description='pyrpipe diagnostic utility\nGenerate benchmark report.',
            
            usage='''pyrpipe_diagnostic report [<args>] <logfile>
                    
                    ''')    
    parser.add_argument('-o', help='out file \ndefault: same as input logfile',action="store")
    parser.add_argument('-e', help='report output type: [MD,PDF,HTML] \ndefault: PDF',default='PDF',action="store")
    parser.add_argument('-v',help='verbose',action="store_true")
    parser.add_argument('-f',help='Filter by programs. Provide a comma-separated list e.g., prefetch,STAR,bowtie2 \ndefault None')
    parser.add_argument('-t',help='Temporary directory. \ndefault ./tmp',action="store")
    parser.add_argument('logfile', help='The log file generated by pyrpipe',action="store")
    args = parser.parse_args(sys.argv[2:])
    
    logFile=args.logfile
    envLog=checkEnvLog(logFile)    
    #parse args
    vFlag=args.v
    if vFlag:
        print("Generating benchmarks")
    outFile=""
    if args.o is None:
        outFile=pu.get_file_basename(args.logfile)
    else:
        outFile=args.o
    outFile+='.'+args.e
    
    filters=[]
    if args.f is not None:
        filters= args.f.split(',')
    #create temp dir
    tempDir=""
    if args.t is not None:
        tempDir= args.t
    else:
        tempDir=os.path.join(os.getcwd(),"tmp")
    #create tmp dir
    if not pu.check_paths_exist(tempDir):
        pu.mkdir(tempDir)
        
    generateBenchmarkReport(logFile,envLog,filters,tempDir,outFile=outFile,verbose=args.v)
Exemple #21
0
 def __init__(self,log_file,env_log,out_dir=""):
     
     
     if not pu.check_files_exist(log_file,env_log):
         raise Exception("Please check input for benchmark report. {} {}".format(log_file,env_log))
     if not out_dir:
         out_dir=os.getcwd()
     self.log_file=log_file
     self.env_log=env_log
     self.runtimes_by_prog={}
     self.runtimes_by_object={}
     #init
     pu.print_blue("parsing log...")
     self.parse_logs()
     pu.print_blue("done.")
     #out_dir
     self.benchmark_dir=os.path.join(out_dir,'benchmark_reports')
     if not pu.check_paths_exist(self.benchmark_dir):
         if not pu.mkdir(self.benchmark_dir):
             raise Exception("Error running benchmarks. Can not create output directory {}".format(self.benchmark_dir))
Exemple #22
0
    def createMikadoGTFlist(self,
                            out_file,
                            out_dir,
                            searchPath,
                            searchQuery="*.gtf",
                            strand=False):
        """Create a file to be used by mikado configure
        out_file: str
            outfile name
        out_dir: str
            path to out_dir
        searchPath: str
            Path where gtf/gff files will be searched
        searchQuery: str
            Query to perform search. Default: "*.gtf"
        strand: bool
            Stranded flag: Default false
        
            
        """

        files = pe.find_files(searchPath, searchQuery, recursive=True)
        args = files

        #create out dir
        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)
        outFilePath = os.path.join(out_dir, out_file + ".txt")

        gtfs = []
        for l in args:
            thisName = pu.get_file_basename(l)
            if thisName:
                gtfs.append("\t".join([l, thisName, str(strand)]))

        f = open(outFilePath, "w")
        f.write("\n".join(gtfs))
        f.close()

        pu.print_green("Mikado list file written to:" + outFilePath)
        return outFilePath
Exemple #23
0
    def runMikadoPrepare(self,
                         jsonconf,
                         out_dir="",
                         verbose=False,
                         quiet=False,
                         logs=True,
                         objectid="NA",
                         **kwargs):
        """Wrapper to run mikado prepare
        """

        #check input files exist
        if not pu.check_files_exist(jsonconf):
            print("Please check the input configuration to mikado.")
            return ""
        if not out_dir:
            out_dir = os.getcwd()

        newOpts = {"--output-dir": out_dir, "--json-conf": jsonconf}

        #merge with kwargs
        mergedOpts = {**kwargs, **newOpts}

        status = self.runMikado("prepare",
                                verbose=verbose,
                                quiet=quiet,
                                logs=logs,
                                objectid=objectid,
                                **mergedOpts)

        if not status:
            print("Mikado prepare failed for:" + jsonconf)
            return ""

        #check if bam file exists
        if not pu.check_paths_exist(out_dir):
            return ""

        return out_dir
Exemple #24
0
 def perform_assembly(self,sra_object=None,bam_file=None,out_dir="trinity_out_dir",max_memory=None,max_intron=10000,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run trinity with sra object or BAM file as input.
             
     Parameters
     ----------
     
     sra_object: SRA
         object of SRA class
     bam_file: string
         path to bam file
     out_dir: string
         path to out directory
     max_memory: string
         Max memory argument e.g. "2G"
     max_intron: int
         specify the "--genome_guided_max_intron" argument
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession.
     
     kwargs: dict
         Options to pass to Trinity. 
         
     :return: Return the path to output GTF file
     :rtype: string
     """
     
     #add trinity to outdir
     if "trinity" not in out_dir:
         out_dir+="_trinity"
         
     if not threads:
         threads=self.threads
     
     if not max_memory:
         max_memory=self.max_memory
     
         
     new_opts={}
     if sra_object is not None:
         parent_dir=sra_object.location
         out_dir=os.path.join(parent_dir,out_dir)
         if sra_object.layout == 'PAIRED':
             new_opts={"--seqType":"fq","--left":sra_object.localfastq1Path,"--right":sra_object.localfastq2Path,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)}
         else:
             new_opts={"--seqType":"fq","--single":sra_object.localfastqPath,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)}
     elif bam_file is not None:
         if not pu.check_files_exist(bam_file):
             pu.print_boldred("Input to trinity does not exist:"+bam_file)
             return ""
         parent_dir=pu.get_file_directory(bam_file)
         out_dir=os.path.join(parent_dir,out_dir)
         new_opts={"--genome_guided_bam":bam_file,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads),"--genome_guided_max_intron":max_intron}
     else:
         pu.print_boldred("Please provide valid input to run trinity")
         return ""
     
     merged_opts={**new_opts,**kwargs}
     
     #call trinity
     status=self.run_trinity(valid_args_list=None,verbose=False,quiet=False,logs=True,objectid="NA",**merged_opts)
     
     if status:
         #check out dir
         if pu.check_paths_exist(out_dir):
             return out_dir
     else:
         return ""
Exemple #25
0
 def perform_assembly(self,bam_file,out_dir="",out_suffix="_cufflinks",reference_gtf=None,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run cufflinks with BAM file as input.
             
     Parameters
     ----------
     bam_file: string
         path to bam file
     out_dir: 
         output directory
     out_suffix: string
         Suffix for the output gtf file
     reference_gtf: str
         Path to reference gtf 
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists.
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession.
     kwargs: dict
         Options to pass to cufflinks. 
         
     :return: Returns the path to output GTF file
     :rtype: string       
     """
     
     #create path to output file
     fname=pu.get_file_basename(bam_file)
     if not out_dir:
         out_dir=pu.get_file_directory(bam_file)
     else:
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf")
     
     """
     Handle overwrite
     """
     if not overwrite:
         #check if file exists. return if yes
         if os.path.isfile(out_gtf_file):
             print("The file "+out_gtf_file+" already exists. Exiting..")
             return out_gtf_file
     
     if not threads:
         threads=self.threads
         
     #Add output file name and input bam
     new_opts={"-o":out_dir,"--":(bam_file,),"-p":str(threads)}
     
     #add ref gtf
     if reference_gtf:
         if not pu.check_files_exist(reference_gtf):
             pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf))
             return ""
         
         new_opts["-g"]=reference_gtf
     
     merged_opts={**new_opts,**kwargs}
     
     #call cufflinks
     status=self.run_cufflinks(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts)
     
     if status:
         #move out_dir/transcripts.gtf to outfile
         pe.move_file(os.path.join(out_dir,"transcripts.gtf"),out_gtf_file)
         #check if sam file is present in the location directory of sraOb
         if pu.check_files_exist(out_gtf_file):
             return out_gtf_file
     else:
         return ""
Exemple #26
0
    def runMikadoConfigure(self,
                           listFile,
                           genome,
                           mode,
                           scoring,
                           junctions,
                           out_file,
                           out_dir=os.getcwd(),
                           verbose=False,
                           quiet=False,
                           logs=True,
                           objectid="NA",
                           **kwargs):
        """Wrapper to run mikado configure
        Make sure the paths in list file are global.
        Parameters
        ----------

        :return: Path to the created configuration file
        :rtype: string
        """

        #check all file exists
        if not pu.check_files_exist(listFile, genome, junctions, scoring):
            print("Please check mikado input")
            return ""

        #create out dir
        if not pu.check_paths_exist(out_dir):
            if not pu.mkdir(out_dir):
                raise Exception("Exception in mikado configure.")

        outFilePath = os.path.join(out_dir, out_file + ".yaml")

        newOpts = {
            "--list": listFile,
            "--reference": genome,
            "--mode": mode,
            "--scoring": scoring,
            "--junctions": junctions,
            "--": (outFilePath, )
        }

        #merge with kwargs
        mergedOpts = {**kwargs, **newOpts}

        status = self.runMikado("configure",
                                verbose=verbose,
                                quiet=quiet,
                                logs=logs,
                                objectid=objectid,
                                **mergedOpts)

        if not status:
            pu.print_boldred(
                "Mikado configure failed.\nPlease make sure the paths in list file are global."
            )
            return ""

        #check if bam file exists
        if not pu.check_files_exist(outFilePath):
            return ""

        return outFilePath
Exemple #27
0
    def run_portcullisFull(self,
                           reference_fasta,
                           bam_file,
                           out_dir="",
                           delete_bam=False,
                           verbose=False,
                           quiet=False,
                           logs=True,
                           objectid="NA",
                           **kwargs):
        """
        run portculis full
        
        Parameters
        ----------
        reference_fasta: string
            Path to the reference fasta file
        bam_file: string
            Path to input bam file
        out_dir: string
            Path to the out put dir. current directory is not given.
        
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to trimgalore. This will override the existing options 
        
        """

        if not pu.check_files_exist(reference_fasta, bam_file):
            print("Please check input for portcullis.")
            return ""

        newOpts = {"--": (reference_fasta, bam_file)}
        mergedOpts = {**kwargs, **newOpts}
        #add out dir path
        if not out_dir:
            out_dir = os.path.join(os.getcwd(), "portcullis_out")

        mergedOpts = {**mergedOpts, **{"-o": out_dir}}

        status = self.run_portcullis("full",
                                     verbose=verbose,
                                     quiet=quiet,
                                     logs=logs,
                                     objectid=objectid,
                                     **mergedOpts)

        if not status:
            print("portcullis full failed for:" + bam_file)
            return ""

        #check if bam file exists
        if not pu.check_paths_exist(out_dir):
            return ""

        if delete_bam:
            if not pe.deleteFileFromDisk(bam_file):
                print("Error deleting bam file:" + bam_file)

        return out_dir
Exemple #28
0
    def sam_to_bam(self,
                   sam_file,
                   out_dir="",
                   out_suffix="",
                   delete_sam=False,
                   verbose=False,
                   quiet=False,
                   logs=True,
                   objectid="NA",
                   **kwargs):
        """Convert sam file to a bam file. 
        Output bam file will have same name as input sam.
        
        out_suffix: string
            Suffix for the output sam file
        delete_sam: bool
            delete the sam file after conversion
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to trimgalore. This will override the existing options 

        :return: Returns the path to the bam file. Returns empty string if operation failed.
        :rtype: string
        """
        if not out_dir:
            out_dir = pu.get_file_directory(sam_file)
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        fname = pu.get_file_basename(sam_file)

        #output will be out_bam
        out_bam = os.path.join(out_dir, fname + out_suffix + '.bam')

        newOpts = {"--": (sam_file, ), "-o": out_bam, "-b": ""}
        mergedOpts = {**kwargs, **newOpts}

        status = self.run_samtools("view",
                                   verbose=verbose,
                                   quiet=quiet,
                                   logs=logs,
                                   objectid=objectid,
                                   **mergedOpts)

        if not status:
            print("Sam to bam failed for:" + sam_file)
            return ""

        #check if bam file exists
        if not pu.check_files_exist(out_bam):
            return ""

        #delete_sam_file
        if delete_sam:
            if not pe.deleteFileFromDisk(sam_file):
                print("Error deleting sam file:" + sam_file)

        #return path to file
        return out_bam
Exemple #29
0
    def merge_bam(self,
                  *args,
                  out_file="merged",
                  out_dir="",
                  delete_bams=False,
                  verbose=False,
                  quiet=False,
                  logs=True,
                  objectid="NA",
                  **kwargs):
        """Merge multiple bam files into a single file
        
        Parameters
        ----------
        out_file: string
            Output file name to save the results. .bam will be added at the end.
        args:tuple
            Paths to bam files to combine
        out_dir: string
            Path where to save the merged bam file. Default path is the same as the first bam_file's
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to trimgalore. This will override the existing options 

        :return: Returns the path to the merged bam file.
        :rtype: string
        """

        if len(args) < 2:
            print("Please supply at least 2 files to merge")
            return ""

        if not out_dir:
            out_dir = pu.get_file_directory(args[0])
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        outMergedFile = os.path.join(out_dir, out_file + ".bam")

        newOpts = {"--": (outMergedFile, ) + args}

        mergedOpts = {**kwargs, **newOpts}

        status = self.run_samtools("merge",
                                   verbose=verbose,
                                   quiet=quiet,
                                   logs=logs,
                                   objectid=objectid,
                                   **mergedOpts)

        if not status:
            print("Bam merge failed for:" + outMergedFile)
            return ""

        #check if bam file exists
        if not pu.check_files_exist(outMergedFile):
            return ""

        if delete_bams:
            for bam_file in args:
                if not pe.deleteFileFromDisk(bam_file):
                    print("Error deleting sam file:" + bam_file)

        return outMergedFile
Exemple #30
0
    def perform_qc(self,sra_object,out_dir="",out_suffix="_trimgalore",objectid="NA"):
        """Function to perform qc using trimgalore.
        The function perform_qc() is consistent for all QC classess.
        
        Parameters
        ----------
        
        sra_object: SRA
            An SRA object whose fastq files will be used
        out_dir: str
            Path to output directory
        out_suffix: string
            Suffix for the output sam file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
            
        :return: Returns the path of fastq files after QC. tuple has one item for single end files and two for paired.
        :rtype: tuple
        """
        if not out_dir:
            out_dir=sra_object.directory
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)
        
        #get layout
        if sra_object.layout=='PAIRED':
            fq1=sra_object.fastq_path
            fq2=sra_object.fastq2_path
            internal_args=(fq1,fq2)
            internal_kwargs={"--paired":"","-o":out_dir}
            
            
            """
            running trim galore will create two files named <input>_val_1.fq and <input>_val_2.fq
            move these files to the specified out files
            """
            file1=os.path.join(out_dir,pu.get_file_basename(fq1)+"_val_1.fq")
            file2=os.path.join(out_dir,pu.get_file_basename(fq2)+"_val_2.fq")
            #targets
            out_file1=os.path.join(out_dir,pu.get_file_basename(fq1)+out_suffix+".fastq")
            out_file2=os.path.join(out_dir,pu.get_file_basename(fq2)+out_suffix+".fastq")
            
            #check if final files already exists
            if not _force and pu.check_files_exist(out_file1,out_file2):
                pu.print_green('Target files {}, {} already exist.'.format(out_file1,out_file2))
                return out_file1,out_file2
            
            
            #run trimgalore
            status=self.run(*internal_args,objectid=objectid,target=[file1,file2],**internal_kwargs)
            
            if status:
                #return rename the bam  file and return path
                if not _dryrun:
                    pe.move_file(file1,out_file1,verbose=False)
                    pe.move_file(file2,out_file2,verbose=False)
                    if not pu.check_files_exist(out_file1,out_file2):
                        return ""
                
                return out_file1,out_file2
            
            return ("",)
            
            
        else:
            fq=sra_object.fastq_path
            internal_args=(fq,)
            internal_kwargs={"-o":out_dir}

            """
            running trim galore will create one file named <input>_trimmed.fq
            move these files to the specified out files
            """
            file=os.path.join(out_dir,pu.get_file_basename(fq)+"_trimmed.fq")
            #target
            out_file=os.path.join(out_dir, pu.get_file_basename(fq)+out_suffix+".fastq")
            #check if final files already exists
            if not _force and pu.check_files_exist(out_file):
                pu.print_green('Target files {} already exist.'.format(out_file))
                return (out_file,)
            
            #run trimgalore
            status=self.run(*internal_args,objectid=objectid,target=file,**internal_kwargs)
            if status:
                #return rename the bam  file and return path
                if not _dryrun:
                    pe.move_file(file,out_file)
                    if not pu.check_files_exist(out_file):
                        return ""
                
                return (out_file,)
            
            return ("",)