Beispiel #1
0
def main(args=None):

    user_options = Options(prog="sequana")

    if args is None:
        args = sys.argv

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
       options = user_options.parse_args(args[1:])

    if options.version:
        import sequana
        print(sequana.version)
        sys.exit()

    if options.jobs > 20 and options.bypass is False:
        raise ValueError('The number of jobs is limited to 20. You can ' +
            'force this limit by using --bypass-job-limit')

    if misc.on_cluster("tars-") and options.unlock is False:
        if options.cluster is None:
            raise ValueError("You are on TARS (Institut Pasteur). You " +
                " must use --cluster option to provide the scheduler " +
                " options (typically ' --cluster 'sbatch --qos normal' )")

    # valid codecs:
    valid_extensions = [("fastq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_extensions += [("fq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_combos = [(x, y) for x in valid_extensions
                           for y in valid_extensions
                           if x!=y]

    if (options.source, options.target) not in valid_combos:
        raise ValueError("""--target and --source combo not valid.
Must be one of fastq, fastq.gz, fastq.bz2 or fastq.dsrc""")

    # Create the config file locally
    module = Module("compressor")

    with TempFile(suffix=".yaml", dir=".") as temp:
        cfg = SequanaConfig(module.config)
        cfg.config.compressor.source = options.source
        cfg.config.compressor.target = options.target
        cfg.config.compressor.recursive = options.recursive
        cfg.config.compressor.verbose = options.verbose
        cfg.config.compressor.threads = options.threads
        cfg._update_yaml()
        cfg.save(filename=temp.name)

        # The Snakefile can stay in its original place:
        rule = module.path + os.sep +  "compressor.rules"

        # Run the snakemake command itself.
        cmd = 'snakemake -s %s  --configfile %s -j %s ' % \
                (rule, temp.name, options.jobs)

        if options.dryrun:
            cmd += " --dryrun "

        if options.verbose is False:
            cmd += " --quiet "
        else:
            cmd += " -p "

        # for slurm only: --cores-per-socket
        if options.cluster:
            cluster = ' --cluster "%s" ' % options.cluster
            cmd += cluster

        if options.snakemake:
            if " -s " in options.snakemake or " -j " in options.snakemake:
                raise ValueError("-s or -j cannot be used in " +
                    " --snakemake-options    (already used internally")
            cmd += options.snakemake

        if options.unlock:
            cmd += " --unlock "

        if options.verbose:
            print(cmd)

        # On travis, snakemake.shell command from snakemake fails.
        # Most probably because travis itself uses a subprocess.
        # excute from easydev uses pexpect.spawn, which seems to work well
        from easydev import execute
        execute(cmd, showcmd=False)
Beispiel #2
0
def sequana_init(options):
    import sequana
    from sequana.misc import textwrap
    from sequana import SequanaConfig, sequana_data
    sa = Tools(verbose=options.verbose)

    # Check that the pipeline is well defined
    module = Module(options.pipeline)

    if os.path.exists(options.target_dir):
        txt = "Will override the following files if present: %s.rules " +\
              "config.yaml, runme.sh, ..."
        sa.blue(txt % options.pipeline)

        if options.force is True:
            choice = "y"
        else:
            choice = input(
                red("Do you want to proceed (to avoid this " +
                    " message, use --force)? [y]/n:"))
        if choice == "n":
            sys.exit(0)

    # Copying snakefile
    logger.info("Copying snakefile")
    sa.mkdir(options.target_dir)
    shutil.copy(module.snakefile,
                options.target_dir + os.sep + options.pipeline + ".rules")

    # Creating README to print on the screen and in a file
    txt = "User command::\n\n"
    txt += "    %s \n\n" % " ".join(sys.argv)
    txt += "You can now run snakemake yourself or type::"
    txt += purple("""

    snakemake -s %s.rules --stats stats.txt -p -j 4

    """ % options.pipeline)
    txt += """
    # -j 4 means you will use 4 cores
    # -p prints the commands used
    # --stats stats.txt must be used since stats.txt is expected to be found.

    or just run the bash script::

        sh runme.sh

    EDIT THE config.yaml if needed

    Once finished with success, the report/ directory contains a summary.html
    and relevant files (depends on the pipeline).
    """
    logger.info("Creating README")
    with open(options.target_dir + os.sep + "README", "w") as fh:
        fh.write(txt.replace("\x1b[35m", "").replace("\x1b[39;49;00m", ""))

    # Creating Config file
    logger.info("Creating the config file")

    # Create (if needed) and update the config file
    config_filename = options.target_dir + os.sep + "config.yaml"

    if options.config:
        # full existing path
        if os.path.exists(options.config):
            shutil.copy(options.config, config_filename)
        else:  # or a sequana config file in the module path ?
            raise (IOError("Config file %s not found locally" %
                           options.config))
    else:
        copy_config_from_sequana(module, "config.yaml", config_filename)

    # Copy multiqc if it is available
    multiqc_filename = options.target_dir + os.sep + "multiqc_config.yaml"
    copy_config_from_sequana(module, "multiqc_config.yaml", multiqc_filename)
    cluster_cfg_filename = options.target_dir + os.sep + "cluster_config.json"
    copy_config_from_sequana(module, "cluster_config.json",
                             cluster_cfg_filename)

    # The input
    cfg = SequanaConfig(config_filename)
    cfg.config.input_directory = options.input_directory
    cfg.config.input_pattern = options.pattern
    cfg.config.input_extension = options.extension
    cfg.config.input_samples.file1 = options.file1
    cfg.config.input_samples.file2 = options.file2
    cfg.config.input_readtag = options.input_readtag

    # Dedicated section for quality control section
    if options.pipeline == "quality_control":
        if options.design:
            shutil.copy(options.design, options.target_dir + os.sep)
            cfg.config['cutadapt'].design_file = os.path.basename(
                options.design)

        if options.kraken:
            cfg.config.kraken.database_directory = os.path.abspath(
                options.kraken)
            cfg.config.kraken.do = True
        else:
            cfg.config.kraken.do = False

        cfg.config['cutadapt'].fwd = options.adapter_fwd
        cfg.config['cutadapt'].rev = options.adapter_rev
        cfg.config['cutadapt'].adapter_type = options.adapters
        # Foir all pipeline using BWA
        if options.reference:
            cfg.config.bwa_mem.reference = os.path.abspath(options.reference)
    if options.pipeline == "variant_calling":
        if options.reference:
            cfg.config.bwa_mem_ref.reference = os.path.abspath(
                options.reference)

    if options.pipeline in ["rnaseq", "smallrnaseq"]:
        if options.design:
            shutil.copy(options.design, options.target_dir + os.sep)
            cfg.config['cutadapt'].design_file = os.path.basename(
                options.design)
        cfg.config['cutadapt'].fwd = options.adapter_fwd
        cfg.config['cutadapt'].rev = options.adapter_rev
        cfg.config['cutadapt'].adapter_choice = options.adapters

    cfg.copy_requirements(target=options.target_dir)

    # FIXME If invalid, no error raised
    if options.config_params:
        params = [this.strip() for this in options.config_params.split(",")]
        for param in params:
            if param.count(":") not in [1, 2, 3]:
                txt = "incorrect format following --config-params"
                txt += "Expected at least one : sign or at most 2 of them"
                txt += "Config file section such as :\n"
                txt += "project: tutorial\n"
                txt += "should be encoded project:tutorial"
                raise ValueError(txt)
            if param.count(":") == 1:
                k, v = param.split(':')
                cfg.config[k] = v
            elif param.count(":") == 2:
                k1, k2, v = param.split(":")
                cfg.config[k1][k2] = v
            elif param.count(":") == 3:
                k1, k2, k3, v = param.split(":")
                cfg.config[k1][k2][k3] = v

    # important to update yaml with content of config
    cfg._update_yaml()
    cfg.save(config_filename)

    # Creating a unique runme.sh file
    runme_filename = options.target_dir + os.sep + "runme.sh"
    with open(runme_filename, "w") as fout:
        cmd = "#!/bin/sh\n"
        cmd += "# generated with sequana version %s with this command:\n" % sequana.version
        cmd += "# %s\n" % " ".join(sys.argv)
        cmd += "snakemake -s %(project)s.rules --stats stats.txt -p -j %(jobs)s --nolock"
        if options.forceall:
            cmd += " --forceall "

        if options.cluster:
            # Do we want to include the cluster config option ?
            cluster_config = Module(options.pipeline).cluster_config
            if options.ignore_cluster_config is True:
                cluster_config = None

            if cluster_config is None:
                cmd += ' --cluster "%s"' % options.cluster
            else:
                cmd += ' --cluster "%s"  --cluster-config %s' %\
                    (options.cluster, os.path.basename(cluster_config))

        if options.redirection:
            cmd += " 1>run.out 2>run.err"
        fout.write(
            cmd % {
                'project': options.pipeline,
                'jobs': options.jobs,
                "version": sequana.version
            })
    # change permission of runme.sh to 755
    st = os.stat(runme_filename)
    os.chmod(runme_filename, st.st_mode | 0o755)

    sa.green("Initialisation of %s succeeded" % options.target_dir)
    sa.green("Please, go to the project directory ")
    sa.purple("\n   cd %s\n" % options.target_dir)
    sa.green("Check out the README and config.yaml files")
    sa.green("A basic script to run the analysis is named runme.sh ")
    sa.purple("\n    sh runme.sh\n")
    sa.purple("On a slurm cluster, you may type:")
    sa.purple("\n  srun --qos normal runme.sh\n")
    sa.green(
        "In case of trouble, please post an issue on https://github.com/sequana/sequana/issue "
    )
    sa.green(
        "or type sequana --issue and fill a post with the error and the config file (NO DATA PLEASE)"
    )

    # Change permission
    try:  #python 3
        os.chmod(runme_filename, 0o755)
    except:
        logger.info(
            "Please use Python3. Change the mode of %s manually to 755" %
            runme_filename)
Beispiel #3
0
def test_pipeline_manager():

    # test missing input_directory
    cfg = SequanaConfig({})
    try:
        pm = snaketools.PipelineManager("custom", cfg)
        assert False
    except:
        assert True

    # normal behaviour but no input provided:
    config = Module("compressor")._get_config()
    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    try:
        pm = snaketools.PipelineManager("custome", cfg)
        assert False
    except:
        assert True

    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1)
    #file2 = sequana_data("Hm2_GTGAAA_L005_R2_001.fastq.gz")
    pm = snaketools.PipelineManager("custom", cfg)
    assert pm.paired == False

    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1)
    cfg.config.input_pattern = "Hm*gz"
    #file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    pm = snaketools.PipelineManager("custom", cfg)
    pm.plot_stats()
    assert pm.paired == True

    pm.getlogdir("fastqc")
    pm.getwkdir("fastqc")
    pm.getrawdata()
    pm.getreportdir("test")
    pm.getname("fastqc")

    # Test different configuration of input_directory, input_readtag,
    # input_pattern
    # Test the _R[12]_ paired
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz"
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        cmd = "touch {}/test_R2_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        pm = snaketools.PipelineManager("test", cfgname)
        assert pm.paired == True

    # Test the _[12]_ paired 
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz"
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_[12]."
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_1.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        cmd = "touch {}/test_2.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        pm = snaketools.PipelineManager("test", cfgname)
        assert pm.paired is True

    # Test the _R[12]_ single end
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz"
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        pm = snaketools.PipelineManager("test", cfgname)
        assert pm.paired is False

    # Test the _R[12]_ single end
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fq.gz" # wrong on purpose
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        try:
            pm = snaketools.PipelineManager("test", cfgname)
            assert False
        except:
            assert True

    # Test the _R[12]_ single end
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz" 
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/testR1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        cmd = "touch {}/testR2_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        try:
            pm = snaketools.PipelineManager("test", cfgname)
            assert False
        except:
            assert True