def main(args=None): user_options = Options(prog="sequana") if args is None: args = sys.argv # If --help or no options provided, show the help if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) if options.version: import sequana print(sequana.version) sys.exit() if options.jobs > 20 and options.bypass is False: raise ValueError('The number of jobs is limited to 20. You can ' + 'force this limit by using --bypass-job-limit') if misc.on_cluster("tars-") and options.unlock is False: if options.cluster is None: raise ValueError("You are on TARS (Institut Pasteur). You " + " must use --cluster option to provide the scheduler " + " options (typically ' --cluster 'sbatch --qos normal' )") # valid codecs: valid_extensions = [("fastq." + ext2).rstrip(".") for ext2 in ['', 'bz2', 'gz', 'dsrc']] valid_extensions += [("fq." + ext2).rstrip(".") for ext2 in ['', 'bz2', 'gz', 'dsrc']] valid_combos = [(x, y) for x in valid_extensions for y in valid_extensions if x!=y] if (options.source, options.target) not in valid_combos: raise ValueError("""--target and --source combo not valid. Must be one of fastq, fastq.gz, fastq.bz2 or fastq.dsrc""") # Create the config file locally module = Module("compressor") with TempFile(suffix=".yaml", dir=".") as temp: cfg = SequanaConfig(module.config) cfg.config.compressor.source = options.source cfg.config.compressor.target = options.target cfg.config.compressor.recursive = options.recursive cfg.config.compressor.verbose = options.verbose cfg.config.compressor.threads = options.threads cfg._update_yaml() cfg.save(filename=temp.name) # The Snakefile can stay in its original place: rule = module.path + os.sep + "compressor.rules" # Run the snakemake command itself. cmd = 'snakemake -s %s --configfile %s -j %s ' % \ (rule, temp.name, options.jobs) if options.dryrun: cmd += " --dryrun " if options.verbose is False: cmd += " --quiet " else: cmd += " -p " # for slurm only: --cores-per-socket if options.cluster: cluster = ' --cluster "%s" ' % options.cluster cmd += cluster if options.snakemake: if " -s " in options.snakemake or " -j " in options.snakemake: raise ValueError("-s or -j cannot be used in " + " --snakemake-options (already used internally") cmd += options.snakemake if options.unlock: cmd += " --unlock " if options.verbose: print(cmd) # On travis, snakemake.shell command from snakemake fails. # Most probably because travis itself uses a subprocess. # excute from easydev uses pexpect.spawn, which seems to work well from easydev import execute execute(cmd, showcmd=False)
def sequana_init(options): import sequana from sequana.misc import textwrap from sequana import SequanaConfig, sequana_data sa = Tools(verbose=options.verbose) # Check that the pipeline is well defined module = Module(options.pipeline) if os.path.exists(options.target_dir): txt = "Will override the following files if present: %s.rules " +\ "config.yaml, runme.sh, ..." sa.blue(txt % options.pipeline) if options.force is True: choice = "y" else: choice = input( red("Do you want to proceed (to avoid this " + " message, use --force)? [y]/n:")) if choice == "n": sys.exit(0) # Copying snakefile logger.info("Copying snakefile") sa.mkdir(options.target_dir) shutil.copy(module.snakefile, options.target_dir + os.sep + options.pipeline + ".rules") # Creating README to print on the screen and in a file txt = "User command::\n\n" txt += " %s \n\n" % " ".join(sys.argv) txt += "You can now run snakemake yourself or type::" txt += purple(""" snakemake -s %s.rules --stats stats.txt -p -j 4 """ % options.pipeline) txt += """ # -j 4 means you will use 4 cores # -p prints the commands used # --stats stats.txt must be used since stats.txt is expected to be found. or just run the bash script:: sh runme.sh EDIT THE config.yaml if needed Once finished with success, the report/ directory contains a summary.html and relevant files (depends on the pipeline). """ logger.info("Creating README") with open(options.target_dir + os.sep + "README", "w") as fh: fh.write(txt.replace("\x1b[35m", "").replace("\x1b[39;49;00m", "")) # Creating Config file logger.info("Creating the config file") # Create (if needed) and update the config file config_filename = options.target_dir + os.sep + "config.yaml" if options.config: # full existing path if os.path.exists(options.config): shutil.copy(options.config, config_filename) else: # or a sequana config file in the module path ? raise (IOError("Config file %s not found locally" % options.config)) else: copy_config_from_sequana(module, "config.yaml", config_filename) # Copy multiqc if it is available multiqc_filename = options.target_dir + os.sep + "multiqc_config.yaml" copy_config_from_sequana(module, "multiqc_config.yaml", multiqc_filename) cluster_cfg_filename = options.target_dir + os.sep + "cluster_config.json" copy_config_from_sequana(module, "cluster_config.json", cluster_cfg_filename) # The input cfg = SequanaConfig(config_filename) cfg.config.input_directory = options.input_directory cfg.config.input_pattern = options.pattern cfg.config.input_extension = options.extension cfg.config.input_samples.file1 = options.file1 cfg.config.input_samples.file2 = options.file2 cfg.config.input_readtag = options.input_readtag # Dedicated section for quality control section if options.pipeline == "quality_control": if options.design: shutil.copy(options.design, options.target_dir + os.sep) cfg.config['cutadapt'].design_file = os.path.basename( options.design) if options.kraken: cfg.config.kraken.database_directory = os.path.abspath( options.kraken) cfg.config.kraken.do = True else: cfg.config.kraken.do = False cfg.config['cutadapt'].fwd = options.adapter_fwd cfg.config['cutadapt'].rev = options.adapter_rev cfg.config['cutadapt'].adapter_type = options.adapters # Foir all pipeline using BWA if options.reference: cfg.config.bwa_mem.reference = os.path.abspath(options.reference) if options.pipeline == "variant_calling": if options.reference: cfg.config.bwa_mem_ref.reference = os.path.abspath( options.reference) if options.pipeline in ["rnaseq", "smallrnaseq"]: if options.design: shutil.copy(options.design, options.target_dir + os.sep) cfg.config['cutadapt'].design_file = os.path.basename( options.design) cfg.config['cutadapt'].fwd = options.adapter_fwd cfg.config['cutadapt'].rev = options.adapter_rev cfg.config['cutadapt'].adapter_choice = options.adapters cfg.copy_requirements(target=options.target_dir) # FIXME If invalid, no error raised if options.config_params: params = [this.strip() for this in options.config_params.split(",")] for param in params: if param.count(":") not in [1, 2, 3]: txt = "incorrect format following --config-params" txt += "Expected at least one : sign or at most 2 of them" txt += "Config file section such as :\n" txt += "project: tutorial\n" txt += "should be encoded project:tutorial" raise ValueError(txt) if param.count(":") == 1: k, v = param.split(':') cfg.config[k] = v elif param.count(":") == 2: k1, k2, v = param.split(":") cfg.config[k1][k2] = v elif param.count(":") == 3: k1, k2, k3, v = param.split(":") cfg.config[k1][k2][k3] = v # important to update yaml with content of config cfg._update_yaml() cfg.save(config_filename) # Creating a unique runme.sh file runme_filename = options.target_dir + os.sep + "runme.sh" with open(runme_filename, "w") as fout: cmd = "#!/bin/sh\n" cmd += "# generated with sequana version %s with this command:\n" % sequana.version cmd += "# %s\n" % " ".join(sys.argv) cmd += "snakemake -s %(project)s.rules --stats stats.txt -p -j %(jobs)s --nolock" if options.forceall: cmd += " --forceall " if options.cluster: # Do we want to include the cluster config option ? cluster_config = Module(options.pipeline).cluster_config if options.ignore_cluster_config is True: cluster_config = None if cluster_config is None: cmd += ' --cluster "%s"' % options.cluster else: cmd += ' --cluster "%s" --cluster-config %s' %\ (options.cluster, os.path.basename(cluster_config)) if options.redirection: cmd += " 1>run.out 2>run.err" fout.write( cmd % { 'project': options.pipeline, 'jobs': options.jobs, "version": sequana.version }) # change permission of runme.sh to 755 st = os.stat(runme_filename) os.chmod(runme_filename, st.st_mode | 0o755) sa.green("Initialisation of %s succeeded" % options.target_dir) sa.green("Please, go to the project directory ") sa.purple("\n cd %s\n" % options.target_dir) sa.green("Check out the README and config.yaml files") sa.green("A basic script to run the analysis is named runme.sh ") sa.purple("\n sh runme.sh\n") sa.purple("On a slurm cluster, you may type:") sa.purple("\n srun --qos normal runme.sh\n") sa.green( "In case of trouble, please post an issue on https://github.com/sequana/sequana/issue " ) sa.green( "or type sequana --issue and fill a post with the error and the config file (NO DATA PLEASE)" ) # Change permission try: #python 3 os.chmod(runme_filename, 0o755) except: logger.info( "Please use Python3. Change the mode of %s manually to 755" % runme_filename)
def test_pipeline_manager(): # test missing input_directory cfg = SequanaConfig({}) try: pm = snaketools.PipelineManager("custom", cfg) assert False except: assert True # normal behaviour but no input provided: config = Module("compressor")._get_config() cfg = SequanaConfig(config) cfg.cleanup() # remove templates try: pm = snaketools.PipelineManager("custome", cfg) assert False except: assert True cfg = SequanaConfig(config) cfg.cleanup() # remove templates file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz") cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1) #file2 = sequana_data("Hm2_GTGAAA_L005_R2_001.fastq.gz") pm = snaketools.PipelineManager("custom", cfg) assert pm.paired == False cfg = SequanaConfig(config) cfg.cleanup() # remove templates cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1) cfg.config.input_pattern = "Hm*gz" #file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz") pm = snaketools.PipelineManager("custom", cfg) pm.plot_stats() assert pm.paired == True pm.getlogdir("fastqc") pm.getwkdir("fastqc") pm.getrawdata() pm.getreportdir("test") pm.getname("fastqc") # Test different configuration of input_directory, input_readtag, # input_pattern # Test the _R[12]_ paired with tempfile.TemporaryDirectory() as tmpdir: cfg = SequanaConfig() cfgname = tmpdir + "/config.yaml" cfg.config.input_pattern = "*fastq.gz" cfg.config.input_directory = tmpdir cfg.config.input_readtag = "_R[12]_" cfg._update_yaml() cfg.save(cfgname) cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) cmd = "touch {}/test_R2_.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) pm = snaketools.PipelineManager("test", cfgname) assert pm.paired == True # Test the _[12]_ paired with tempfile.TemporaryDirectory() as tmpdir: cfg = SequanaConfig() cfgname = tmpdir + "/config.yaml" cfg.config.input_pattern = "*fastq.gz" cfg.config.input_directory = tmpdir cfg.config.input_readtag = "_[12]." cfg._update_yaml() cfg.save(cfgname) cmd = "touch {}/test_1.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) cmd = "touch {}/test_2.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) pm = snaketools.PipelineManager("test", cfgname) assert pm.paired is True # Test the _R[12]_ single end with tempfile.TemporaryDirectory() as tmpdir: cfg = SequanaConfig() cfgname = tmpdir + "/config.yaml" cfg.config.input_pattern = "*fastq.gz" cfg.config.input_directory = tmpdir cfg.config.input_readtag = "_R[12]_" cfg._update_yaml() cfg.save(cfgname) cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) pm = snaketools.PipelineManager("test", cfgname) assert pm.paired is False # Test the _R[12]_ single end with tempfile.TemporaryDirectory() as tmpdir: cfg = SequanaConfig() cfgname = tmpdir + "/config.yaml" cfg.config.input_pattern = "*fq.gz" # wrong on purpose cfg.config.input_directory = tmpdir cfg.config.input_readtag = "_R[12]_" cfg._update_yaml() cfg.save(cfgname) cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) try: pm = snaketools.PipelineManager("test", cfgname) assert False except: assert True # Test the _R[12]_ single end with tempfile.TemporaryDirectory() as tmpdir: cfg = SequanaConfig() cfgname = tmpdir + "/config.yaml" cfg.config.input_pattern = "*fastq.gz" cfg.config.input_directory = tmpdir cfg.config.input_readtag = "R[12]_" cfg._update_yaml() cfg.save(cfgname) cmd = "touch {}/testR1_.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) cmd = "touch {}/testR2_.fastq.gz".format(tmpdir) subprocess.call(cmd.split()) try: pm = snaketools.PipelineManager("test", cfgname) assert False except: assert True