Beispiel #1
0
    def bamtofq(self, bam, oid, rm_bam=True):
        out_dir = pu.get_file_directory(bam)
        fastq_name = os.path.join(out_dir, oid)
        sname = os.path.join(out_dir, 's.fq')
        oname = os.path.join(out_dir, 'o.fq')
        o2name = os.path.join(out_dir, 'o2.fq')
        #tempfilename
        tmpdir = os.environ.get('LOCAL')
        if not tmpdir: tmpdir = './'
        tmpfile = os.path.join(tmpdir,
                               pu.get_file_basename(bam) + pu.get_timestamp())

        internal_kwargs = {
            'F': fastq_name + '_1.fastq',
            'F2': fastq_name + '_2.fastq',
            'S': sname,
            'O': oname,
            'O2': o2name,
            'T': tmpfile,
            'filename': bam
        }

        #call run
        status = self.run(None, objectid=oid, **internal_kwargs)
        if status and rm_bam: pe.delete_file(bam)
        return status
Beispiel #2
0
    def create_lock(self, target_list, message):
        """
        Cretes a temporary .Lock file associated with a target file and write a message in it.

        Parameters
        ----------
        target_list : List
            List of target files.
        message : Str
            Message to write in file.

        Returns
        -------
        templist : List
            A list of .Lock file names coressponding to the target files.

        """
        templist = []
        for f in target_list:
            temp_path = pu.get_file_directory(f)
            if not pu.check_paths_exist(temp_path): pu.mkdir(temp_path)
            prefix = pu.get_filename(f) + '_'
            temp = tempfile.NamedTemporaryFile(prefix=prefix,
                                               suffix='.Lock',
                                               dir=temp_path,
                                               delete=False)
            #TODO: dump command in lock
            timestamp = pu.get_timestamp()
            temp.write(str.encode(timestamp + '\t' + message))

            templist.append(temp.name)
        return templist
Beispiel #3
0
def sortbam(bam, oid):
    outfile = pu.get_file_basename(bam) + "_sorted.bam"
    outdir = pu.get_file_directory(bam)
    outpath = os.path.join(outdir, outfile)
    cmd = 'sambamba sort -t 25 -m 100G -o ' + outpath + ' ' + bam
    st = pe.execute_command(cmd.split(), logs=True, objectid=oid)
    if not st:
        return ""
    return outpath
def checkEnvLog(logFile):
    #check all logs exist
    logFileDir=pu.get_file_directory(logFile)
    basename=pu.get_file_basename(logFile)
    envLog=os.path.join(logFileDir,basename+"ENV.log")
    if not pu.check_files_exist(logFile,envLog):
        print("Please check missing log files. Exiting.")
        sys.exit(1)
    return envLog
Beispiel #5
0
    def perform_assembly(self,
                         bam_file,
                         out_dir=None,
                         out_suffix="_stringtie",
                         objectid="NA"):
        """Function to run stringtie using a bam file.
                
        Parameters
        ----------
        
        bam_file: string
            path to the bam file
        out_dir: string
            Path to out file
        out_suffix: string
            Suffix for the output gtf file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        :return: Returns the path to output GTF file
        :rtype: string
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)

        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)

        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")

        #Add output file name and input bam
        internal_args = (bam_file, )
        internal_kwargs = {"-o": out_gtf_file}
        #add positional args
        internal_kwargs['--'] = internal_args

        #call stringtie
        status = self.run(None,
                          objectid=objectid,
                          target=out_gtf_file,
                          **internal_kwargs)

        if status:
            #check if sam file is present in the location directory of sraOb
            if not pu.check_files_exist(out_gtf_file) and not _dryrun:
                return ""
            return out_gtf_file

        return ""
Beispiel #6
0
 def init_object(self,srr_accession,directory,fastq,fastq2,sra):
     
     #if fastq are provided
     if fastq and fastq2:
         self.layout="PAIRED"
         self.directory=pu.get_file_directory(fastq)
         if not pu.check_files_exist(fastq,fastq2) and not _dryrun:
             pu.print_boldred("ERROR: File not found")
             raise ValueError("Please check fastq files {} {}".format(fastq,fastq2))
         return True
             
     #if only one fastq (single)
     if fastq:
         self.layout="SINGLE"
         self.directory=pu.get_file_directory(fastq)
         if not pu.check_files_exist(fastq) and not _dryrun:
             pu.print_boldred("ERROR: File not found")
             raise ValueError("Please check fastq files {}".format(fastq))
         return True
 
     #init from srr_accession and directory
     return self.init_from_accession(srr_accession,directory)
Beispiel #7
0
    def get_lock_files(self, target):
        """
        Returns .Lock files associated with a target

        Parameters
        ----------
        target : Str
            Target file name.

        Returns
        -------
        lock_files : List
            List of .Lock files present.

        """
        #check if lock exists
        filepath = pu.get_file_directory(target)
        filename = pu.get_filename(target)
        pre = '.*'
        suff = '.*\.Lock$'
        pattern = pre + filename + suff
        lock_files = pu.find_files(filepath, pattern)
        return lock_files
Beispiel #8
0
def checkEnvLog(logFile):
    """
    Check log exist and return path to corresponding ENV log

    Parameters
    ----------
    logFile : str
        path to log file.

    Returns
    -------
    envLog : TYPE
        DESCRIPTION.

    """
    #check all logs exist
    logFileDir = pu.get_file_directory(logFile)
    basename = pu.get_file_basename(logFile)
    envLog = os.path.join(logFileDir, basename + "ENV.log")
    if not pu.check_files_exist(logFile, envLog):
        print("Please check missing log files. Exiting.")
        sys.exit(1)
    return envLog
Beispiel #9
0
    def runRibocode(self,
                    gtf,
                    genome,
                    bam,
                    l="no",
                    outsuffix="ribocode_out",
                    verbose=False,
                    quiet=False,
                    logs=True,
                    objectid="NA"):
        """Wrapper to run ribocode in one step
        """

        #check input
        if not pu.check_files_exist(gtf, genome, bam):
            pu.print_boldred("Please check input files for Ribocode")
            return ""

        out_dir = pu.get_file_directory(gtf)
        outFile = os.path.join(out_dir, outsuffix)

        newOpts = {"-g": gtf, "f": genome, "-r": bam, "-l": l, "-o": outFile}

        ribocode_Cmd = ['RiboCode_onestep']
        ribocode_Cmd.extend(pu.parse_unix_args(self.valid_args, newOpts))

        status = pe.execute_command(ribocode_Cmd,
                                    verbose=verbose,
                                    quiet=quiet,
                                    logs=logs,
                                    objectid=objectid)
        if not status:
            pu.print_boldred("ribocode failed")
            return ""

        return outFile
Beispiel #10
0
 def perform_assembly(self,bam_file,out_dir="",out_suffix="_cufflinks",reference_gtf=None,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run cufflinks with BAM file as input.
             
     Parameters
     ----------
     bam_file: string
         path to bam file
     out_dir: 
         output directory
     out_suffix: string
         Suffix for the output gtf file
     reference_gtf: str
         Path to reference gtf 
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists.
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession.
     kwargs: dict
         Options to pass to cufflinks. 
         
     :return: Returns the path to output GTF file
     :rtype: string       
     """
     
     #create path to output file
     fname=pu.get_file_basename(bam_file)
     if not out_dir:
         out_dir=pu.get_file_directory(bam_file)
     else:
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf")
     
     """
     Handle overwrite
     """
     if not overwrite:
         #check if file exists. return if yes
         if os.path.isfile(out_gtf_file):
             print("The file "+out_gtf_file+" already exists. Exiting..")
             return out_gtf_file
     
     if not threads:
         threads=self.threads
         
     #Add output file name and input bam
     new_opts={"-o":out_dir,"--":(bam_file,),"-p":str(threads)}
     
     #add ref gtf
     if reference_gtf:
         if not pu.check_files_exist(reference_gtf):
             pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf))
             return ""
         
         new_opts["-g"]=reference_gtf
     
     merged_opts={**new_opts,**kwargs}
     
     #call cufflinks
     status=self.run_cufflinks(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts)
     
     if status:
         #move out_dir/transcripts.gtf to outfile
         pe.move_file(os.path.join(out_dir,"transcripts.gtf"),out_gtf_file)
         #check if sam file is present in the location directory of sraOb
         if pu.check_files_exist(out_gtf_file):
             return out_gtf_file
     else:
         return ""
Beispiel #11
0
 def stringtie_merge(self,*args,out_dir=None,out_suffix="_stringtieMerge",threads=None,overwrite=False,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run stringtie merge.
     
     Parameters
     ----------
     
     args: tuple
         path to gtf files to merge
     out_suffix: string
         Suffix for output gtf file name
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists.
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     kwargs: dict
         Options to pass to stringtie. 
     :return: Returns the path to the merged GTF file
     :rtype: string
     """
     
     if len(args) < 1:
         print("ERROR: No input gtf for stringtie merge.")
         return ""
     
     #create path to output sam file
     fname=pu.get_file_basename(args[0])
     
     if not out_dir:
         out_dir=pu.get_file_directory(args[0])
     
     out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf")
     
     if not overwrite:
         #check if file exists. return if yes
         if os.path.isfile(out_gtf_file):
             print("The file "+out_gtf_file+" already exists. Exiting..")
             return out_gtf_file
     
     if not threads:
         threads=self.threads
         
     #Add merge flag, output file name and input bam
     new_opts={"--merge":"","-o":out_gtf_file,"--":args,"-p":str(threads)}
     
     merged_opts={**new_opts,**kwargs}
     
     #call stringtie
     status=self.run_stringtie(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts)
     
     if status:
         #check if sam file is present in the location directory of sraOb
         if pu.check_files_exist(out_gtf_file):
             return out_gtf_file
     else:
         return ""
Beispiel #12
0
    def sam_to_bam(self,
                   sam_file,
                   out_dir="",
                   out_suffix="",
                   delete_sam=False,
                   verbose=False,
                   quiet=False,
                   logs=True,
                   objectid="NA",
                   **kwargs):
        """Convert sam file to a bam file. 
        Output bam file will have same name as input sam.
        
        out_suffix: string
            Suffix for the output sam file
        delete_sam: bool
            delete the sam file after conversion
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to trimgalore. This will override the existing options 

        :return: Returns the path to the bam file. Returns empty string if operation failed.
        :rtype: string
        """
        if not out_dir:
            out_dir = pu.get_file_directory(sam_file)
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        fname = pu.get_file_basename(sam_file)

        #output will be out_bam
        out_bam = os.path.join(out_dir, fname + out_suffix + '.bam')

        newOpts = {"--": (sam_file, ), "-o": out_bam, "-b": ""}
        mergedOpts = {**kwargs, **newOpts}

        status = self.run_samtools("view",
                                   verbose=verbose,
                                   quiet=quiet,
                                   logs=logs,
                                   objectid=objectid,
                                   **mergedOpts)

        if not status:
            print("Sam to bam failed for:" + sam_file)
            return ""

        #check if bam file exists
        if not pu.check_files_exist(out_bam):
            return ""

        #delete_sam_file
        if delete_sam:
            if not pe.deleteFileFromDisk(sam_file):
                print("Error deleting sam file:" + sam_file)

        #return path to file
        return out_bam
Beispiel #13
0
    def merge_bam(self,
                  *args,
                  out_file="merged",
                  out_dir="",
                  delete_bams=False,
                  verbose=False,
                  quiet=False,
                  logs=True,
                  objectid="NA",
                  **kwargs):
        """Merge multiple bam files into a single file
        
        Parameters
        ----------
        out_file: string
            Output file name to save the results. .bam will be added at the end.
        args:tuple
            Paths to bam files to combine
        out_dir: string
            Path where to save the merged bam file. Default path is the same as the first bam_file's
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to trimgalore. This will override the existing options 

        :return: Returns the path to the merged bam file.
        :rtype: string
        """

        if len(args) < 2:
            print("Please supply at least 2 files to merge")
            return ""

        if not out_dir:
            out_dir = pu.get_file_directory(args[0])
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        outMergedFile = os.path.join(out_dir, out_file + ".bam")

        newOpts = {"--": (outMergedFile, ) + args}

        mergedOpts = {**kwargs, **newOpts}

        status = self.run_samtools("merge",
                                   verbose=verbose,
                                   quiet=quiet,
                                   logs=logs,
                                   objectid=objectid,
                                   **mergedOpts)

        if not status:
            print("Bam merge failed for:" + outMergedFile)
            return ""

        #check if bam file exists
        if not pu.check_files_exist(outMergedFile):
            return ""

        if delete_bams:
            for bam_file in args:
                if not pe.deleteFileFromDisk(bam_file):
                    print("Error deleting sam file:" + bam_file)

        return outMergedFile
Beispiel #14
0
    def perform_assembly(self,
                         bam_file,
                         out_dir=None,
                         out_suffix="_cufflinks",
                         objectid="NA"):
        """Function to run cufflinks with BAM file as input.
                
        Parameters
        ----------
        bam_file: string
            path to bam file
        out_dir: 
            output directory
        out_suffix: string
            Suffix for the output gtf file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession.
            
        :return: Returns the path to output GTF file
        :rtype: string       
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)
        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #Add output file name and input bam
        internal_args = (bam_file, )
        internal_kwargs = {"-o": out_dir}
        #add positional args
        internal_kwargs['--'] = internal_args

        #targets
        outfile = os.path.join(out_dir, "transcripts.gtf")
        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")

        #if final file already exists
        if not _force and pu.check_files_exist(out_gtf_file):
            pu.print_green(
                'Target files {} already exist.'.format(out_gtf_file))
            return out_gtf_file

        #call cufflinks
        status = self.run(None,
                          objectid=objectid,
                          target=outfile,
                          **internal_kwargs)

        if status:
            if not _dryrun:
                pe.move_file(outfile, out_gtf_file)
                if not pu.check_files_exist(out_gtf_file):
                    return ""

            return out_gtf_file

        return ""
Beispiel #15
0
    def perform_cleaning(self,sra_object,bbsplit_index,out_dir="",out_suffix="_bbsplit",objectid="NA",**kwargs):
        """
        Remove contaminated reads mapping to given reference using bbsplit
        
        Parameters
        ----------
        
        sra_object: SRA
            an SRA object
        bbsplit_index: string
            Path to bbsplit index or fasta file which will generate index
        out_dir: string
            Path to output dir. Default: sra_object.directory
        out_suffix: string
            Suffix for output file name
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.        
        kwargs: dict
            options passed to bbsplit

            :return: Returns the path of fastq files after QC. tuple has one item for single end files and 2 for paired.
            :rtype: tuple
        """
        
        #check index
        indexPath=""
        if not pu.check_paths_exist(bbsplit_index):
            #index folder doesn't exist
            #check if input is path to fasta
            if not pu.check_files_exist(bbsplit_index):
                print("Error: Please check bbsplit index")
                return ("",)
            #check if index folder "ref" exists in this directory
            indexPath=os.path.join(pu.get_file_directory(bbsplit_index),"ref")
            if pu.check_paths_exist(indexPath):
                print("Using bbsplit index: "+indexPath)
            else:
                #create new index
                print("Creating new index"+indexPath)
                newOpts={"ref_x":bbsplit_index,"path": pu.get_file_directory(bbsplit_index)}
                mergedOpts={**kwargs,**newOpts}
                #run bbduk
                if not self.run_bbsplit(objectid=objectid,**mergedOpts):
                    print("Error creating bbsplit index.")
                    return ("",)
                if not pu.check_paths_exist(indexPath):
                    print("Error creating bbsplit index.")
                    return ("",)
        else:
            indexPath=bbsplit_index
                
        
        #indexPath point to the ref directory, go one directory higher
        indexPath=os.path.dirname(indexPath)
        
        
        #make out_dir
        if not out_dir:
                out_dir=sra_object.directory
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)
        
        if sra_object.layout=='PAIRED':
            fq1=sra_object.fastq_path
            fq2=sra_object.fastq2_path
            #append input and output options
            
            out_fileName1=pu.get_file_basename(fq1)+out_suffix+".fastq"
            out_fileName2=pu.get_file_basename(fq2)+out_suffix+".fastq"
            out_file1Path=os.path.join(out_dir,out_fileName1)
            out_file2Path=os.path.join(out_dir,out_fileName2)
            
            newOpts={"in1":fq1,"in2":fq2,"outu1":out_file1Path,"outu2":out_file2Path,"path":indexPath}
            mergedOpts={**kwargs,**newOpts}
            
            #run bbsplit
            if self.run_bbsplit(objectid=objectid,target=[out_file1Path,out_file2Path],**mergedOpts):
                if pu.check_files_exist(out_file1Path,out_file2Path):
                    return(out_file1Path,out_file2Path)
            return("",)
            
            
        else:
            fq=sra_object.fastq_path
            #append input and output options
           
            out_fileName=pu.get_file_basename(fq)+out_suffix+".fastq"
            out_filePath=os.path.join(out_dir,out_fileName)
            newOpts={"in":fq,"outu":out_filePath,"path":indexPath}
            mergedOpts={**kwargs,**newOpts}
            
            #run bbsplit
            if self.run_bbsplit(objectid=objectid,target=out_filePath,**mergedOpts):
                if pu.check_files_exist(out_filePath):
                    return(out_filePath,)
            
            return("",)
Beispiel #16
0
 def build_index(self,index_path,transcriptome,objectid="NA"):
     """Function to  build kallisto index
     
     index_path: str
         path to the index
     transcriptome: str
         Path to transcriptome
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
             
     :return: Status of kallisto index
     :rtype: bool
     """
     #if index already exists then exit
     if not _force:
         #check if files exists
         if pu.check_files_exist(index_path):
             pu.print_green("Kallisto index {} already exists.".format(index_path))
             self.index=index_path
             return True
         
     #check input
     if not pu.check_files_exist(transcriptome):
         pu.print_boldred("{} does not exist. Exiting".format(transcriptome))
         raise ValueError("Please check input to kallisto index")
         
     
     #create out dir
     indexdir=pu.get_file_directory(index_path)
     #create the out dir
     if not pu.check_paths_exist(indexdir):
         if not pu.mkdir(indexdir):
             raise OSError("Error creating kallisto index. Failed to create index directory.")
     
     args=(transcriptome,)
     internal_kwargs={"-i":index_path}
     #read build parameters
     yamlfile=os.path.join(_params_dir,'kallisto_index.yaml')
     if pu.check_files_exist(yamlfile):
         yaml_params=pl.YAML_loader(yamlfile)
         yaml_kwargs=yaml_params.get_kwargs()
         internal_kwargs={**yaml_kwargs,**internal_kwargs}
     
     #add positional args
     internal_kwargs['--']=args
     
     validArgsIndex=valid_args._args_KALLISTO_INDEX
     
     kallisto_cmd=['kallisto','index']
     kallisto_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs))
     
     #call kallisto
     status=pe.execute_command(kallisto_cmd,objectid=objectid)
             
     if status:
         if pu.check_files_exist(index_path) and not _dryrun:
             #update object's index
             self.index=index_path
             if self.check_index():
                 return True
     else:
         raise OSError("Error building kallisto index")
     
     return False
Beispiel #17
0
    def build_index(self,index_path,transcriptome,objectid="NA"):
        """

        Parameters
        ----------
        index_path : TYPE
            DESCRIPTION.
        transcriptome : TYPE
            DESCRIPTION.
        objectid : TYPE, optional
            DESCRIPTION. The default is "NA".

        Raises
        ------
        OSError
            DESCRIPTION.

        Returns
        -------
        bool
            DESCRIPTION.

        """
        
        #if index already exists then exit
        if not _force:
            #check if files exists
            if pu.check_salmonindex(index_path):
                pu.print_green("Salmon index {} already exists.".format(index_path))
                self.index=index_path
                return True
            
        #check input
        if not pu.check_files_exist(transcriptome):
            pu.print_boldred("{} does not exist. Exiting".format(transcriptome))
            return False
        
        #create out dir
        indexdir=pu.get_file_directory(index_path)
        #create the out dir
        if not pu.check_paths_exist(indexdir):
            if not pu.mkdir(indexdir):
                raise OSError("Error creating salmon index. Failed to create index directory.")
        
        
        validArgsIndex=valid_args._args_SALMON_INDEX
        
            
        internal_kwargs={"--threads":_threads,"-t":transcriptome,"-i":index_path}
        #read build parameters
        yamlfile=os.path.join(_params_dir,'salmon_index.yaml')
        if pu.check_files_exist(yamlfile):
            yaml_params=pl.YAML_loader(yamlfile)
            yaml_kwargs=yaml_params.get_kwargs()
            internal_kwargs={**yaml_kwargs,**internal_kwargs}
            
        salmon_cmd=['salmon','index']
        salmon_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs))
        
        #call salmon
        status=pe.execute_command(salmon_cmd,objectid=objectid)
        
        if status:
            if pu.check_salmonindex(index_path) and not _dryrun:
                #update object's index
                self.index=index_path
                if self.check_index():
                    return True
        else:
            raise OSError("Error building salmon index")
        
        return False
Beispiel #18
0
    def sort_bam(self,
                 bam_file,
                 out_dir="",
                 out_suffix="",
                 threads=None,
                 delete_bam=False,
                 verbose=False,
                 quiet=False,
                 logs=True,
                 objectid="NA",
                 **kwargs):
        """Sorts an input bam file. Outpufile will end in _sorted.bam
        bam_file: str
            Path to the input bam file
        out_dir: str
            Path to output directory
        out_suffix: str
            Output file suffix
        threads: int
            Number of threads. Default: Use self.threads initialized in init().
        delete_bam: bool
            Delete input bam_file
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to samtools. This will override the existing options 

        :return: Returns path to the sorted bam file. Returns empty string if operation failed.
        :rtype: string
        
        """
        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        fname = pu.get_file_basename(bam_file)
        #output will be out_bam
        outSortedbam_file = os.path.join(out_dir,
                                         fname + out_suffix + '_sorted.bam')

        #handle threads
        if not threads:
            threads = self.threads

        newOpts = {
            "--": (bam_file, ),
            "-o": outSortedbam_file,
            "-@": str(threads)
        }
        mergedOpts = {**newOpts, **kwargs}

        status = self.run_samtools("sort",
                                   verbose=verbose,
                                   quiet=quiet,
                                   logs=logs,
                                   objectid=objectid,
                                   **mergedOpts)

        if not status:
            print("Bam sort failed for:" + bam_file)
            return ""

        #check if bam file exists
        if not pu.check_files_exist(outSortedbam_file):
            return ""

        if delete_bam:
            if not pe.deleteFileFromDisk(bam_file):
                print("Error deleting sam file:" + bam_file)

        #return path to file
        return outSortedbam_file
Beispiel #19
0
    def build_index(self, index_path, genome, objectid="NA"):
        """Build a bowtie2 index with given parameters and saves the new index to self.index.
        
        Parameters
        ----------
        
        index_path: string
            Path where the index will be created
        genome: string
            Path to the reference genome
        objectid : string 
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
            
        :return: Returns the status of bowtie2-build
        :rtype: bool
        """

        #check input references
        if not _force:
            if pu.check_bowtie2index(index_path):
                pu.print_green(
                    "bowtie index {} already exists.".format(index_path))
                self.index = index_path
                return True

        #check input files
        if not (pu.check_files_exist(genome)):
            pu.print_boldred(
                "Please provide a valid input fasta file to build bowtie2 index"
            )
            raise ValueError("Please check input to star build index")
            return False

        bowtie2_build_args = [
            '-f', '-c', '--large-index', '--debug', '--sanitized', '--verbose',
            '-a', '--noauto', '-p', '--packed', '--bmax', '--bmaxdivn',
            '--dcv', '--nodc', '-r', '--noref', '-3', '--justref', '-o',
            '--offrate', '-t', '--ftabchars', '--threads', '--seed', '-q',
            '--quiet'
        ]

        #create the out dir
        indexdir = pu.get_file_directory(index_path)
        if not pu.check_paths_exist(indexdir):
            if not pu.mkdir(indexdir):
                raise OSError(
                    "Error creating bowtie2 index. Failed to create index directory."
                )
                return False

        args = (genome, index_path)
        internal_kwargs = {"--threads": self._threads}

        #read build parameters
        yamlfile = os.path.join(_params_dir, 'bowtie2_index.yaml')
        if pu.check_files_exist(yamlfile):
            yaml_params = pl.YAML_loader(yamlfile)
            yaml_kwargs = yaml_params.get_kwargs()
            internal_kwargs = {**yaml_kwargs, **internal_kwargs}

        #add positional args
        internal_kwargs['--'] = args

        bowtie2Build_Cmd = ['bowtie2-build']
        #add options
        bowtie2Build_Cmd.extend(
            pu.parse_unix_args(bowtie2_build_args, internal_kwargs))

        #start ececution
        status = pe.execute_command(bowtie2Build_Cmd, objectid=objectid)
        if not status:
            pu.print_boldred("bowtie2-build failed")
            return False

        if status:
            if pu.check_bowtie2index(index_path) and not _dryrun:
                #update object's index
                self.index = index_path
                if self.check_index():
                    return True
        else:
            raise OSError("Error building bowtie2 index")

        return True
Beispiel #20
0
 def perform_assembly(self,sra_object=None,bam_file=None,out_dir="trinity_out_dir",max_memory=None,max_intron=10000,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run trinity with sra object or BAM file as input.
             
     Parameters
     ----------
     
     sra_object: SRA
         object of SRA class
     bam_file: string
         path to bam file
     out_dir: string
         path to out directory
     max_memory: string
         Max memory argument e.g. "2G"
     max_intron: int
         specify the "--genome_guided_max_intron" argument
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession.
     
     kwargs: dict
         Options to pass to Trinity. 
         
     :return: Return the path to output GTF file
     :rtype: string
     """
     
     #add trinity to outdir
     if "trinity" not in out_dir:
         out_dir+="_trinity"
         
     if not threads:
         threads=self.threads
     
     if not max_memory:
         max_memory=self.max_memory
     
         
     new_opts={}
     if sra_object is not None:
         parent_dir=sra_object.location
         out_dir=os.path.join(parent_dir,out_dir)
         if sra_object.layout == 'PAIRED':
             new_opts={"--seqType":"fq","--left":sra_object.localfastq1Path,"--right":sra_object.localfastq2Path,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)}
         else:
             new_opts={"--seqType":"fq","--single":sra_object.localfastqPath,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)}
     elif bam_file is not None:
         if not pu.check_files_exist(bam_file):
             pu.print_boldred("Input to trinity does not exist:"+bam_file)
             return ""
         parent_dir=pu.get_file_directory(bam_file)
         out_dir=os.path.join(parent_dir,out_dir)
         new_opts={"--genome_guided_bam":bam_file,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads),"--genome_guided_max_intron":max_intron}
     else:
         pu.print_boldred("Please provide valid input to run trinity")
         return ""
     
     merged_opts={**new_opts,**kwargs}
     
     #call trinity
     status=self.run_trinity(valid_args_list=None,verbose=False,quiet=False,logs=True,objectid="NA",**merged_opts)
     
     if status:
         #check out dir
         if pu.check_paths_exist(out_dir):
             return out_dir
     else:
         return ""
Beispiel #21
0
def test_get_dir():
    assert pu.get_file_directory('aaa/bb') == 'aaa', 'get_dir failed'
    assert pu.get_file_directory('bbb/aaa/bb') == 'bbb/aaa', 'get_dir failed'
Beispiel #22
0
    def perform_assembly(self,
                         bam_file,
                         out_dir="",
                         out_suffix="_stringtie",
                         overwrite=True,
                         verbose=False,
                         quiet=False,
                         logs=True,
                         objectid="NA",
                         **kwargs):
        """Function to run stringtie using a bam file.
                
        Parameters
        ----------
        
        bam_file: string
            path to the bam file
        out_suffix: string
            Suffix for the output gtf file
        overwrite: bool
            Overwrite if output file already exists.
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to stringtie. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments).
            
        :return: Returns the path to output GTF file
        :rtype: string
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)

        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)

        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")
        """
        Handle overwrite
        """
        if not overwrite:
            #check if file exists. return if yes
            if os.path.isfile(out_gtf_file):
                print("The file " + out_gtf_file +
                      " already exists. Exiting..")
                return out_gtf_file

        #Add output file name and input bam
        new_opts = {"-o": out_gtf_file, "--": (bam_file, )}
        merged_opts = {**kwargs, **new_opts}

        #call stringtie
        status = self.run_stringtie(verbose=verbose,
                                    quiet=quiet,
                                    logs=logs,
                                    objectid=objectid,
                                    **merged_opts)

        if status:
            #check if sam file is present in the location directory of sraOb
            if pu.check_files_exist(out_gtf_file):
                return out_gtf_file
        else:
            return ""
Beispiel #23
0

#####################################################################################

#argv[1]: file containg gtex/tcga ids
idsfile = sys.argv[1]
analysis = sys.argv[2]
runquant = False
runalign = False
if analysis == 'quant': runquant = True
if analysis == 'align': runalign = True

with open(idsfile) as f:
    data = f.read().splitlines()
#set infile dir as workdir
basedir = pu.get_file_directory(idsfile)

#pyrpipe objects
star = mapping.Star()
#Create stringtie object
stieobj = assembly.Stringtie()
#biobambam
biobb = Bamtofastq()
#salmon for quant
salmon = quant.Salmon()

#delete final sorted bam
delete_bam = True

#out_dir is same name as input file
out_dir = basedir
Beispiel #24
0
 def perform_assembly(self,bam_file,out_dir=None,out_suffix="_stringtie",reference_gtf=None,threads=None,overwrite=False,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run stringtie using a bam file.
             
     Parameters
     ----------
     
     bam_file: string
         path to the bam file
     out_suffix: string
         Suffix for the output gtf file
     reference_gtf: str
         Path to the reference gtf used as guide
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists.
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     kwargs: dict
         Options to pass to stringtie. 
     :return: Returns the path to output GTF file
     :rtype: string
     """
     
     #create path to output file
     fname=pu.get_file_basename(bam_file)
     
     if not out_dir:
         out_dir=pu.get_file_directory(bam_file)
         
     out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf")
     
     """
     Handle overwrite
     """
     if not overwrite:
         #check if file exists. return if yes
         if os.path.isfile(out_gtf_file):
             print("The file "+out_gtf_file+" already exists. Exiting..")
             return out_gtf_file
     
     if not threads:
         threads=self.threads
     
     #Add output file name and input bam
     new_opts={"-o":out_gtf_file,"--":(bam_file,),"-p":str(threads)}
     
     if reference_gtf:
         if not pu.check_files_exist(reference_gtf):
             pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf))
             return ""
         new_opts["-G"]=reference_gtf
         
     
     merged_opts={**new_opts,**kwargs}
     
     #call stringtie
     status=self.run_stringtie(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts)
     
     if status:
         #check if sam file is present in the location directory of sraOb
         if pu.check_files_exist(out_gtf_file):
             return out_gtf_file
     else:
         return ""
Beispiel #25
0
    def build_index(self, index_path, genome, objectid="NA"):
        """Build a hisat index with given parameters and saves the new index to self.index.
        
        Parameters
        ----------
        
        index_path: string
            Path where the index will be created
        genome: string
            Path to the reference genome
        objectid : string 
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
            
        :return: Returns the status of hisat2-build
        :rtype: bool
        """

        #if index already exists then exit
        if not _force:
            #check if files exists
            if pu.check_hisatindex(index_path):
                pu.print_green(
                    "Hisat2 index {} already exists.".format(index_path))
                self.index = os.path.join(index_path)
                return True

        #check input files
        if not pu.check_files_exist(genome):
            pu.print_boldred(
                "Please provide a valid input fasta file to build Hisat2 index"
            )
            raise ValueError("Please check input to hisat2 build index")

        indexdir = pu.get_file_directory(index_path)
        #create the out dir
        if not pu.check_paths_exist(indexdir):
            if not pu.mkdir(indexdir):
                raise OSError(
                    "Error creating hisat2 index. Failed to create index directory."
                )

        hisat2Buildvalid_args = valid_args._args_HISAT2BUILD

        args = (genome, index_path)
        internal_kwargs = {"-p": self._threads}
        #read build parameters
        yamlfile = os.path.join(_params_dir, 'hisat2_index.yaml')
        if pu.check_files_exist(yamlfile):
            yaml_params = pl.YAML_loader(yamlfile)
            yaml_kwargs = yaml_params.get_kwargs()
            internal_kwargs = {**yaml_kwargs, **internal_kwargs}

        #add positional args
        internal_kwargs['--'] = args

        hisat2Build_Cmd = ['hisat2-build']
        hisat2Build_Cmd.extend(
            pu.parse_unix_args(hisat2Buildvalid_args, internal_kwargs))

        #execute command
        status = pe.execute_command(hisat2Build_Cmd, objectid=objectid)

        if status:
            if pu.check_hisatindex(index_path) and not _dryrun:
                #update object's index
                self.index = index_path
                if self.check_index():
                    return True
        else:
            raise OSError("Error building Hisat2 index")

        return True