def submit_slurm_job(outfile, jobname, time, command, email, conda_profile, conda_env): line = '#!/bin/bash' line = line + '\n#SBATCH -J ' + jobname line += '\n#SBATCH -N 1' line += '\n#SBATCH -p RM' line += '\n#SBATCH --ntasks-per-node 28' line += '\n#SBATCH -t ' + time line += '\n#SBATCH -C EGRESS' line += '\n#SBATCH --mail-user='******'\n#SBATCH --mail-type=ALL' line += '\n\n#load required modules' line += '\nsource ' + conda_profile line += '\nsource ~/.bashrc' line += '\nconda activate ' + conda_env line += '\n\n# run commands' cmds = '\n'.join(command) line += '\n' + cmds + '\n' f = open(outfile, 'w') f.write(line) f.close() #submiti print('Submitting job {}'.format(outfile)) pe.execute_command(['sbatch', outfile])
def run_bbduk(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run bbduk.sh """ #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} #create command to run bbduk_cmd = ["bbduk.sh"] #bbduk.sh follows java style arguments bbduk_cmd.extend(pu.parse_java_args(self.valid_args, mergedArgsDict)) #start ececution status = pe.execute_command(bbduk_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("bbduk failed") #return status return status
def run_bbsplit(self,objectid="NA",**kwargs): """wrapper to run bbsplit :return: Status of bbsplit command :rtype: bool """ bbsplit_args=['ref','ref_x','build','path','in','in1','in2','outu','outu2','outu1','qin','interleaved', 'maxindel','minratio','minhits','ambiguous','ambiguous2', 'qtrim','untrim','out_','basename','bs','scafstats', 'refstats','nzo','-Xmx','-eoom','-da'] #create command to run bbsp_cmd=["bbsplit.sh"] #bbduk.sh follows java style arguments bbsp_cmd.extend(pu.parse_java_args(bbsplit_args,kwargs)) #start ececution status=pe.execute_command(bbsp_cmd,objectid=objectid) if not status: pu.print_boldred("bbsplit failed") #return status return status
def runMikado(self, sub_command, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado """ #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} mikado_Cmd = ['mikado', sub_command] #add options mikado_Cmd.extend(pe.parse_unix_args(self.valid_args, mergedArgsDict)) #print("Executing:"+" ".join(mergedArgsDict)) #start ececution status = pe.execute_command(mikado_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("mikado failed") #return status return status
def runMikado(self, sub_command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado """ valid_commands = [ 'configure', 'prepare', 'serialise', 'pick', 'compare' ] if sub_command not in valid_commands: pu.print_boldred("Invalid command: " + sub_command + ". Exiting...") return False mikado_Cmd = ['mikado', sub_command] #add options mikado_Cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #print("Executing:"+" ".join(mergedArgsDict)) #start ececution status = pe.execute_command(mikado_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("mikado failed") #return status return status
def run_trinity(self,valid_args_list=None,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Wrapper for running trinity Parameters ---------- valid_args: list list of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options passed to trinity :return: Return the status of trinity command. :rtype: bool """ trinity_cmd=['Trinity'] #add options trinity_cmd.extend(pu.parse_unix_args(valid_args_list,kwargs)) #start ececution status=pe.execute_command(trinity_cmd,verbose=verbose,quiet=quiet,logs=logs,objectid=objectid) if not status: pu.print_boldred("trinity failed") #return status return status
def test_multiqc(): cmd = "pyrpipe_diagnostic.py multiqc -o tests/testout/mqcreport -t tests/testout/mqctmp tests/test_files/pyrpipe_logs/2020-01-22-18_14_47_pyrpipe.log" st = pe.execute_command(cmd.split(), verbose=True, quiet=False, logs=False, objectid="", command_name="") assert st == True, "shell failed"
def test_benchmark(): cmd = "pyrpipe_diagnostic.py benchmark -t tests/testout/bmeport tests/test_files/pyrpipe_logs/2020-01-22-18_14_47_pyrpipe.log" st = pe.execute_command(cmd.split(), verbose=True, quiet=False, logs=False, objectid="", command_name="") assert st == True, "benchmark failed"
def sortbam(bam, oid): outfile = pu.get_file_basename(bam) + "_sorted.bam" outdir = pu.get_file_directory(bam) outpath = os.path.join(outdir, outfile) cmd = 'sambamba sort -t 25 -m 100G -o ' + outpath + ' ' + bam st = pe.execute_command(cmd.split(), logs=True, objectid=oid) if not st: return "" return outpath
def run_cuff(self, command, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running cuff* commands Parameters ---------- command: string the command name verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options passed to command :return: Returns the status of the command. :rtype: bool """ validCommands = [ 'cuffcompare', 'cuffdiff', 'cufflinks', 'cuffmerge', 'cuffnorm', 'cuffquant' ] if command in validCommands: #override existing arguments merged_args_dict = {**self.passed_args_dict, **kwargs} cuff_cmd = [command] #add options cuff_cmd.extend( pu.parse_unix_args(self.valid_args_list, merged_args_dict)) #start ececution status = pe.execute_command(cuff_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("cufflinks failed") #return status return status else: pu.print_boldred("Unknown command {}" + command) return False
def run_star(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running star. The self.star_index index used. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). kwargs: dict arguments to pass to star. This will override parametrs already existing in the self.passedArgumentList list but NOT replace all of them. :return: Returns the status of star. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid star index. Please run build index to generate an index." ) #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} star_cmd = ['STAR'] #add options star_cmd.extend(pu.parse_unix_args(self.valid_args, mergedArgsDict)) #execute command cmd_status = pe.execute_command(star_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("STAR failed:" + " ".join(star_cmd)) #return status return cmd_status
def run_salmon(self, subcommand, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running salmon. Parameters ---------- subcommand: str subcommand for salmon valid_args: list List of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to salmon. This will override the existing options :return: Returns the status of salmon. True is passed, False if failed. :rtype: bool """ #check for a valid index if subcommand != "index": if not self.check_index(): raise Exception( "ERROR: Invalid salmon index. Please run build index to generate an index." ) salmon_Cmd = ['salmon', subcommand] salmon_Cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #start ececution status = pe.execute_command(salmon_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, command_name=" ".join(salmon_Cmd[0:2])) if not status: pu.print_boldred("salmon failed") return status
def run_hisat2(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running hisat2. Run HISAT2 using and SRA object and produce .bam file as result. The HISAT2 index used will be self.hisat2_index. All output will be written to SRA.location by default. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. arg: dict arguments to pass to hisat2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of hisat2. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid HISAT2 index. Please run build index to generate an index." ) #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} hisat2_Cmd = ['hisat2'] #add options hisat2_Cmd.extend(pu.parse_unix_args(self.valid_args, mergedArgsDict)) #execute command cmd_status = pe.execute_command(hisat2_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("hisat2 failed:" + " ".join(hisat2_Cmd)) #return status return cmd_status
def run_portcullis(self, sub_command, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """ Wrapper to run portcullis. Parameters ---------- sub_command: string sub_command to pass to portcullis e.g. full, prep, junc etc. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to portcullis. This will override parametrs already existing in the self.passedArgumentDict list but NOT replace them. :return: Returns the status of portcullis. True is passed, False if failed. :rtype: bool """ #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} portcullis_cmd = ['portcullis', sub_command] #add options portcullis_cmd.extend( pu.parse_unix_args(self.valid_args, mergedArgsDict)) print("Executing:" + " ".join(portcullis_cmd)) #start ececution status = pe.execute_command(portcullis_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("portcullis failed") #return status return status
def run_diamond(self, subcommand, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running diamond. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to hisat2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of diamond. True is passed, False if failed. :rtype: bool """ #check for a valid index if subcommand == "blastx" or subcommand == "blastp": if not self.check_index(): raise Exception( "ERROR: Invalid Diamond index. Please run build_index() to generate an index." ) diamond_cmd = ['diamond', subcommand] #add options diamond_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #execute command cmd_status = pe.execute_command(diamond_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("Diamond failed:" + " ".join(diamond_cmd)) #return status return cmd_status
def run_hisat2(self, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running hisat2. Parameters ---------- valid_args: list list of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to hisat2. :return: Returns the status of hisat2. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid HISAT2 index. Please run build index to generate an index." ) hisat2_Cmd = ['hisat2'] #add options hisat2_Cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #execute command cmd_status = pe.execute_command(hisat2_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("hisat2 failed:" + " ".join(hisat2_Cmd)) #return status return cmd_status
def run_bowtie2(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running bowtie2. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to bowtie2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of bowtie2. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid Bowtie2 index. Please run build index to generate an index." ) #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} bowtie2_cmd = ['bowtie2'] bowtie2_cmd.extend(pu.parse_unix_args(self.valid_args, mergedArgsDict)) #print("Executing:"+" ".join(bowtie2_cmd)) #start ececution status = pe.execute_command(bowtie2_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("bowtie2 failed") return status
def run_samtools(self, sub_command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """A wrapper to run samtools. Parameters ---------- sub_command: string sub_command to pass to samtools e.g. sort, merge etc valid_args: list A list containing valid parameters. Parameters in kwargs not in this list will be ignored. Default: None arg1: dict arguments to pass to samtools. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to samtools. This will override the existing options :return: Returns the status of samtools. True is passed, False if failed. :rtype: bool """ samtools_cmd = ['samtools', sub_command] #add options samtools_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #start ececution status = pe.execute_command(samtools_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("samtools failed") #return status return status
def run_portcullis(self, sub_command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """ Wrapper to run portcullis. Parameters ---------- sub_command: string sub_command to pass to portcullis e.g. full, prep, junc etc. valid_args: list A list of valid arguments. Arguments outside this list will be ignored. If empty or None, accepts all arguments. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to portcullis. :return: Returns the status of portcullis. True is passed, False if failed. :rtype: bool """ portcullis_cmd = ['portcullis', sub_command] #add options portcullis_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #start ececution status = pe.execute_command(portcullis_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("portcullis failed") #return status return status
def run_stringtie(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running stringtie. This can be used to run stringtie without using perform_assembly() function. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). :return: Returns the status of stringtie command. :rtype: bool """ #override existing arguments merged_args_dict = {**self.passed_args_dict, **kwargs} stie_cmd = ['stringtie'] #add options stie_cmd.extend( pu.parse_unix_args(self.valid_args_list, merged_args_dict)) #start ececution status = pe.execute_command(stie_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("stringtie failed") #return status return status
def run_trimgalore(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running trimgalore Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore (will override existing parameters) :return: Status of trimgalore command :rtype: bool """ #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} #create command to run trimgalore_cmd = ['trim_galore'] trimgalore_cmd.extend( pu.parse_unix_args(self.valid_args, mergedArgsDict)) #start ececution status = pe.execute_command(trimgalore_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("trimgalore failed") #return status return status
def run_cufflinks(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running cufflinks Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options passed to cufflinks :return: Returns the status of cufflinks command. :rtype: bool """ #override existing arguments merged_args_dict = {**self.passed_args_dict, **kwargs} cufflinks_cmd = ['cufflinks'] #add options cufflinks_cmd.extend( pu.parse_unix_args(self.valid_args_list, merged_args_dict)) #start ececution status = pe.execute_command(cufflinks_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("cufflinks failed") #return status return status
def run_transdecoder(self, command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running transdecoder. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to hisat2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of diamond. True is passed, False if failed. :rtype: bool """ txd_cmd = [command] #add options txd_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #execute command cmd_status = pe.execute_command(txd_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("Transdecoder failed:" + " ".join(txd_cmd)) #return status return cmd_status
def run_bbsplit(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """wrapper to run bbsplit :return: Status of bbsplit command :rtype: bool """ bbsplit_args = [ 'ref', 'ref_x', 'build', 'path', 'in', 'in1', 'in2', 'outu', 'outu2', 'outu1', 'qin', 'interleaved', 'maxindel', 'minratio', 'minhits', 'ambiguous', 'ambiguous2', 'qtrim', 'untrim', 'out_', 'basename', 'bs', 'scafstats', 'refstats', 'nzo', '-Xmx', '-eoom', '-da' ] #override existing arguments #don't use class arguments mergedArgsDict = {**kwargs} #create command to run bbsp_cmd = ["bbsplit.sh"] #bbduk.sh follows java style arguments bbsp_cmd.extend(pu.parse_java_args(bbsplit_args, mergedArgsDict)) #start ececution status = pe.execute_command(bbsp_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("bbsplit failed") #return status return status
def run_bbduk(self, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run bbduk.sh valid_args: list A list of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict options passed to bbduk """ #create command to run bbduk_cmd = ["bbduk.sh"] #bbduk.sh follows java style arguments bbduk_cmd.extend(pu.parse_java_args(valid_args, kwargs)) #start ececution status = pe.execute_command(bbduk_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("bbduk failed") #return status return status
def runRibocode(self, gtf, genome, bam, l="no", outsuffix="ribocode_out", verbose=False, quiet=False, logs=True, objectid="NA"): """Wrapper to run ribocode in one step """ #check input if not pu.check_files_exist(gtf, genome, bam): pu.print_boldred("Please check input files for Ribocode") return "" out_dir = pu.get_file_directory(gtf) outFile = os.path.join(out_dir, outsuffix) newOpts = {"-g": gtf, "f": genome, "-r": bam, "-l": l, "-o": outFile} ribocode_Cmd = ['RiboCode_onestep'] ribocode_Cmd.extend(pu.parse_unix_args(self.valid_args, newOpts)) status = pe.execute_command(ribocode_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("ribocode failed") return "" return outFile
def run_fasterqdump(self, delete_sra=False, verbose=False, quiet=False, logs=True, **kwargs): """Execute fasterq-dump to convert .sra file to fastq files. The fastq files will be stored in the same directory as the sra file. All fastq files should be consistently named using the extension .fastq Parameters ---------- delete_sra: bool delete sra file after completion verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs kwargs: dict A dict containing fasterq-dump arguments :return: Return status of the fasterq-dump command. True if successful download and False if failed. :rtype: bool Examples -------- >>> object.run_fasterqdump() True """ #check if fastq files exists already if self.fastqFilesExistsLocally(): pu.print_green("Fastq files exist already") return True #first check is sra exists if not self.sraFileExistsLocally(): pu.print_boldred( "Error executing fasterq-dump: .sra file not found. Please run download_sra()." ) return False #else directly run fasterq-dump on accession ? fasterqdumpArgsList = [ '-f', '-t', '-s', '-N', '-X', '-a', '-p', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q', '-b', '-m', '-e', '-x', '-S', '-3', '-P', '-M', '-B', '--option-file', '--strict', '--table', '--include-technical', '--skip-technical', '--concatenate-reads' ] #ignore location and file name arguments if given if '-O' in kwargs: print("Ignoring -O flag." + " location is: " + self.location) #delete -O parameter del kwargs['-O'] if '-o' in kwargs: print("Ignoring -o flag." + " File name is: " + self.srr_accession) #delete -o parameter del kwargs['-o'] #execute command fstrqd_Cmd = ['fasterq-dump'] fstrqd_Cmd.extend(pu.parse_unix_args(fasterqdumpArgsList, kwargs)) #add location fstrqd_Cmd.extend(['-O', self.location]) #add output filename. output will be <srr_accession>.fastq or <srr_accession>_1.fastq and <srr_accession>_2.fastq fstrqd_Cmd.extend(['-o', self.srr_accession + ".fastq"]) fstrqd_Cmd.append(self.localSRAFilePath) #execute command cmdStatus = pe.execute_command(fstrqd_Cmd, objectid=self.srr_accession) if not cmdStatus: print("fasterqdump failed for:" + self.srr_accession) return False #check if fastq files are downloaded if (self.layout == "SINGLE"): self.localfastqPath = os.path.join(self.location, self.srr_accession + ".fastq") if not pu.check_files_exist(self.localfastqPath): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastqPath + " does not exist!!!") return False else: self.localfastq1Path = os.path.join( self.location, self.srr_accession + "_1.fastq") self.localfastq2Path = os.path.join( self.location, self.srr_accession + "_2.fastq") if not pu.check_files_exist(self.localfastq1Path, self.localfastq2Path): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastq1Path + " does not exist!!!") return False #delete sra file if specified if delete_sra: self.delete_sra() return True
def download_sra(self, verbose=False, quiet=False, logs=True, **kwargs): """This function downloads .sra file from NCBI SRA servers using the prefetch command. NCBI sra-toolkit 2.9 or higher must be installed on the system in order to use prefetch. prefetch will create a folder with name same as <srr_accession> under the location (path) specified. The path of downloaded file is saved in the object as localSRAPath. This localSRAPath is then used by other functions to access the downloaded data. The **kwargs is for passing arguments to the prefetch command. Parameters ---------- kwargs: dict dict containing additional prefetch arguments :return: Return status of the prefetch command. True if successful download and False if failed. :rtype: bool Examples -------- >>> object.download_sra() True """ #store path to the downloaded sra file self.localSRAFilePath = os.path.join(self.location, self.srr_accession + ".sra") #check if already exists if pu.check_files_exist(self.localSRAFilePath): pu.print_green("File already exists:" + self.localSRAFilePath) #save file .sra file size self.sraFileSize = pu.get_file_size(self.localSRAFilePath) #test if file is paired or single end if pe.is_paired(self.localSRAFilePath): self.layout = "PAIRED" else: self.layout = "SINGLE" return True pu.print_info("Downloading " + self.srr_accession + " ...") #scan for prefetch arguments prefetchArgsList = [ '-f', '-t', '-l', '-n', '-s', '-R', '-N', '-X', '-o', '-a', '--ascp-options', '-p', '--eliminate-quals', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q' ] #ignore location and file name arguments if given if '-O' in kwargs: print("Ignoring -O flag." + " location is: " + self.location) #delete -O parameter del kwargs['-O'] if '-o' in kwargs: print("Ignoring -o flag." + " File name is: " + self.srr_accession) #delete -o parameter del kwargs['-o'] prefetch_Cmd = ['prefetch'] prefetch_Cmd.extend(pu.parse_unix_args(prefetchArgsList, kwargs)) prefetch_Cmd.extend(['-O', self.location]) prefetch_Cmd.append(self.srr_accession) cmdStatus = pe.execute_command(prefetch_Cmd, objectid=self.srr_accession) if not cmdStatus: pu.print_boldred("prefetch failed for:" + self.srr_accession) return False #validate path exists if not pu.check_files_exist(self.localSRAFilePath): pu.print_boldred("Error downloading file. File " + self.localSRAFilePath + " does not exist!!!") return False print("Downloaded file: " + self.localSRAFilePath + " {0} ".format(pu.get_file_size(self.localSRAFilePath))) #save file .sra file size self.sraFileSize = pu.get_file_size(self.localSRAFilePath) #test if file is paired or single end if pe.is_paired(self.localSRAFilePath): self.layout = "PAIRED" else: self.layout = "SINGLE" return True
def download_fastq(self, verbose=False, quiet=False, logs=True, procs=2, **kwargs): """Function to download fastq files """ #check if fastq files exists already if self.fastqFilesExistsLocally(): pu.print_green("Fastq files exist already") return True fasterqdumpArgsList = [ '-f', '-t', '-s', '-N', '-X', '-a', '-p', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q', '-b', '-m', '-x', '-S', '-3', '-P', '-M', '-B', '--option-file', '--strict', '--table', '--include-technical', '--skip-technical', '--concatenate-reads' ] fstrqd_Cmd = ['fasterq-dump'] fstrqd_Cmd.extend(pu.parse_unix_args(fasterqdumpArgsList, kwargs)) #add location fstrqd_Cmd.extend(['-O', self.location]) #add output filename. output will be <srr_accession>.fastq or <srr_accession>_1.fastq and <srr_accession>_2.fastq fstrqd_Cmd.extend(['-o', self.srr_accession + ".fastq"]) fstrqd_Cmd.extend(['-e', str(procs)]) if self.sraFileExistsLocally(): fstrqd_Cmd.append(self.localSRAFilePath) else: fstrqd_Cmd.append(self.srr_accession) #execute command cmdStatus = pe.execute_command(fstrqd_Cmd, objectid=self.srr_accession) if not cmdStatus: print("fasterqdump failed for:" + self.srr_accession) return False if not hasattr(self, 'layout'): fq_files = pe.find_files(self.location, self.srr_accession + "*.fastq") if len(fq_files) == 1: self.layout = 'SINGLE' else: self.layout = 'PAIRED' #check if fastq files are downloaded if (self.layout == "SINGLE"): self.localfastqPath = os.path.join(self.location, self.srr_accession + ".fastq") if not pu.check_files_exist(self.localfastqPath): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastqPath + " does not exist!!!") return False else: self.localfastq1Path = os.path.join( self.location, self.srr_accession + "_1.fastq") self.localfastq2Path = os.path.join( self.location, self.srr_accession + "_2.fastq") if not pu.check_files_exist(self.localfastq1Path, self.localfastq2Path): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastq1Path + " does not exist!!!") return False return True
from pyrpipe import pyrpipe_utils as pu from pyrpipe import pyrpipe_engine as pe maizeRun = [ 'SRR1573523', 'SRR999058', 'SRR520999', 'SRR1168424', 'SRR1621015', 'SRR3084882', 'SRR1620828', 'SRR3053545', 'SRR1620949', 'SRR1620947' ] workingDir = "maize_out" if not pu.check_paths_exist(workingDir): pu.mkdir(workingDir) GENOME = workingDir + "/Zm-B73-REFERENCE-NAM-5.0.fa" if not pu.check_files_exist(GENOME): print("Downloading genome fasta file") wget = "wget https://download.maizegdb.org/Zm-B73-REFERENCE-NAM-5.0/Zm-B73-REFERENCE-NAM-5.0.fa.gz -q -O " + GENOME + ".gz" pe.execute_command(wget.split(), verbose=True, logs=False) pe.execute_command(['gunzip', GENOME + ".gz"], verbose=True, logs=False) sraObjects = [] for x in maizeRun: thisSraOb = sra.SRA(x, workingDir) if thisSraOb.download_fastq(): sraObjects.append(thisSraOb) else: print("Download failed:" + x) print("Following runs downloaded:") for ob in sraObjects: print(ob.srr_accession)