Esempio n. 1
0
def get_batch_ids_raw(runname, config, tag=None, checkpoints=None):
    tag_barcode = get_tag_barcode(tag, runname, config) if tag else None
    if tag_barcode and checkpoints:
        if hasattr(checkpoints, config['demux_default'] + '_barcode'):
            barcode_batch_dir = getattr(checkpoints, config['demux_default'] + '_barcode').get(runname=runname).output.barcodes
        else:
            raise NotImplementedError("Demultiplexing with {} is not implemented.".format(config['demux_default']))
        barcode_batch = os.path.join(barcode_batch_dir, tag_barcode, '{id}.txt')
        batches_txt, = glob_wildcards(barcode_batch)
        return batches_txt
    else:
        batches_tar, = glob_wildcards("{datadir}/{runname}/reads/{{id}}.tar".format(datadir=config["storage_data_raw"], runname=runname))
        batches_fast5, = glob_wildcards("{datadir}/{runname}/reads/{{id}}.fast5".format(datadir=config["storage_data_raw"], runname=runname))
        return batches_tar + batches_fast5
Esempio n. 2
0
def predict_genes_genomes(in_dir, out_dir, log):

    path = os.path.join(in_dir, '{genome}.fasta')
    os.makedirs(out_dir, exist_ok=True)

    for genome in glob_wildcards(path).genome:
        predict_genes(genome, path.format(genome=genome), out_dir, log)
Esempio n. 3
0
def rename_genomes(input_folder, mapfile_genomes, mapfile_contigs, output_dir):

    file_name = f"{input_folder}/{{binid}}.fasta"
    bin_ids, = glob_wildcards(file_name)

    old2new_name = dict(
        zip(bin_ids, utils.gen_names_for_range(len(bin_ids), prefix='MAG')))
    os.makedirs(output_dir)

    with open(mapfile_contigs,
              'w') as out_contigs, open(mapfile_genomes,
                                        'w') as old2new_mapping_file:
        old2new_mapping_file.write(f"BinID\tMAG\n")
        for binid in bin_ids:

            fasta_in = file_name.format(binid=binid)
            new_name = old2new_name[binid]

            old2new_mapping_file.write(f"{binid}\t{new_name}\n")

            fasta_out = os.path.join(output_dir, f"{new_name}.fasta")

            # write names of contigs in mapping file
            with open(fasta_in) as ffi, open(fasta_out, 'w') as ffo:
                Nseq = 0
                for line in ffi:
                    if line[0] == ">":
                        Nseq += 1
                        new_header = f'{new_name}_{Nseq}'
                        out_contigs.write(f"{new_header}\t{new_name}\n")
                        ffo.write(f">{new_header}\n")
                    else:
                        ffo.write(line)
Esempio n. 4
0
def get_wildcards(inputmap, wildcard_constraints):
    """Given a list of snakemake IO filenames, extract the wildcards.

    Params:
      inputmap (list): list of input wildcard/filename tuples
    """
    d = {}
    try:
        all_wc = []
        all_files = []
        for wc, filename in inputmap:
            try:
                wc = eval(wc)
            except:
                pass
            wc = update_wildcard_constraints(wc, wildcard_constraints, {})
            all_wc.append(wc)
            if filename is None:
                continue
            if isinstance(filename, str):
                filename = [filename]
            all_files = all_files + filename
        for f in all_files:
            for wc in all_wc:
                wildcards = glob_wildcards(wc, [os.path.basename(f)])
                for k, v in wildcards._asdict().items():
                    if len(v) > 0:
                        d[k] = v[0]
    except:
        logger.debug("Failed to get wildcards for inputmap ", inputmap)
        raise
    return d
Esempio n. 5
0
 def aggregate_input(wildcards):
     ops = base_checkpoint_obj.get(**wildcards).output
     checkpoint_output = _output_accessor(ops, output_key)
     expand_base_rule = os.path.join(checkpoint_output, base_rule)
     expand_target_rule = target_rule or expand_base_rule
     return expand(expand_target_rule,
                   **glob_wildcards(expand_base_rule)._asdict())
Esempio n. 6
0
def get_batch_ids_raw(runname, config, tag=None, checkpoints=None):
    tag_barcode = get_tag_barcode(tag, runname, config) if tag else None
    if tag_barcode and checkpoints:
        barcode_batch_dir = checkpoints.demux_split_barcodes.get(
            demultiplexer=config['demux_default'],
            runname=runname).output.barcodes
        barcode_batch = os.path.join(barcode_batch_dir, tag_barcode,
                                     '{id}.txt')
        batches_txt, = glob_wildcards(barcode_batch)
        return batches_txt
    else:
        batches_tar, = glob_wildcards(
            "{datadir}/{runname}/reads/{{id}}.tar".format(
                datadir=config["storage_data_raw"], runname=runname))
        batches_fast5, = glob_wildcards(
            "{datadir}/{runname}/reads/{{id}}.fast5".format(
                datadir=config["storage_data_raw"], runname=runname))
        return batches_tar + batches_fast5
Esempio n. 7
0
def expand_basenames(source_folder: str,
                     source_extension: str,
                     years: int = None):
    opts_digits: str = r'\d*'
    any_digits: str = r'\d+'
    year_constraint: str = (
        rf"{{year,{years}\d*}}" if isinstance(
            years,
            (int, str),
        ) else f"{{year,{'|'.join(f'{y}{opts_digits}' for y in years)}}}"
        if isinstance(years, list) else rf"{{year,{any_digits}}}")
    source_years, target_basenames = glob_wildcards(
        jj(source_folder, year_constraint, f"{{file}}.{source_extension}"))
    return source_years, target_basenames
Esempio n. 8
0
def test_expand_call_arguments():
    target_folder = nj(
        "/data/riksdagen_corpus_data/riksdagen-corpus-exports/speech_xml")
    source_folder = nj("/data/riksdagen_corpus_data/riksdagen-corpus/corpus/")
    extension = "xml"
    years, basenames = glob_wildcards(
        jj(source_folder, "{year}", f"{{file}}.{extension}"))

    filenames = expand(jj(target_folder, '{year}',
                          f'{{basename}}.{extension}'),
                       zip,
                       year=years,
                       basename=basenames)

    assert len(filenames) == len(years)
Esempio n. 9
0
def auto_report(dag, path, stylesheet=None):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports."
        )

    mode_embedded = True
    if path.endswith(".zip"):
        mode_embedded = False
    elif not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html or .zip")

    custom_stylesheet = None
    if stylesheet is not None:
        try:
            with open(stylesheet) as s:
                custom_stylesheet = s.read()
        except (Exception, BaseException) as e:
            raise WorkflowError("Unable to read custom report stylesheet.", e)

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(lambda: defaultdict(list))
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError(
                        "File {} marked for report but does " "not exist.".format(f)
                    )
                report_obj = get_flag_value(f, "report")

                def register_file(
                    f, wildcards_overwrite=None, aux_files=None, name_overwrite=None
                ):
                    wildcards = wildcards_overwrite or job.wildcards
                    category = Category(
                        report_obj.category, wildcards=wildcards, job=job
                    )
                    subcategory = Category(
                        report_obj.subcategory, wildcards=wildcards, job=job
                    )

                    results[category][subcategory].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            dag.workflow,
                            wildcards_overwrite=wildcards_overwrite,
                            mode_embedded=mode_embedded,
                            aux_files=aux_files,
                            name_overwrite=name_overwrite,
                        )
                    )
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                elif os.path.isdir(f):
                    if report_obj.htmlindex:
                        if mode_embedded:
                            raise WorkflowError(
                                "Directory marked for report specifies htmlindex. "
                                "This is unsupported when requesting a pure HTML report. "
                                "Please use store as zip instead (--report report.zip)."
                            )
                        aux_files = []
                        index_found = False
                        for root, dirs, files in os.walk(f):
                            for name in files:
                                if name != ".snakemake_timestamp":
                                    filepath = os.path.join(root, name)
                                    if (
                                        os.path.relpath(filepath, f)
                                        != report_obj.htmlindex
                                    ):
                                        aux_files.append(filepath)
                                    else:
                                        index_found = True
                        if not index_found:
                            raise WorkflowError(
                                "Given htmlindex {} not found in directory "
                                "marked for report".format(report_obj.htmlindex)
                            )
                        register_file(
                            os.path.join(f, report_obj.htmlindex),
                            aux_files=aux_files,
                            name_overwrite="{}.html".format(os.path.basename(f)),
                        )
                    elif report_obj.patterns:
                        if not isinstance(report_obj.patterns, list):
                            raise WorkflowError(
                                "Invalid patterns given for report. Must be list.",
                                rule=job.rule,
                            )

                        for pattern in report_obj.patterns:
                            pattern = os.path.join(f, pattern)
                            wildcards = glob_wildcards(pattern)._asdict()
                            names = wildcards.keys()
                            for w in zip(*wildcards.values()):
                                w = dict(zip(names, w))
                                w.update(job.wildcards_dict)
                                w = Wildcards(fromdict=w)
                                f = apply_wildcards(pattern, w)
                                register_file(f, wildcards_overwrite=w)
                    else:
                        raise WorkflowError(
                            "Directory marked for report but neither file patterns "
                            "given via patterns=[...], nor htmlindex given. "
                            "See report documentation.",
                            rule=job.rule,
                        )

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning(
                    "Missing metadata for file {}. Maybe metadata "
                    "was deleted or it was created using an older "
                    "version of Snakemake. This is a non critical "
                    "warning.".format(f)
                )
                continue

            def get_time(rectime, metatime, sel_func):
                if metatime is None:
                    return rectime
                return sel_func(metatime, rectime)

            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = get_time(rec.starttime, meta["starttime"], min)
                rec.endtime = get_time(rec.endtime, meta["endtime"], max)
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning(
                    "Metadata for file {} was created with a too "
                    "old Snakemake version.".format(f)
                )

    for subcats in results.values():
        for catresults in subcats.values():
            catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [
        {"rule": rec.rule, "runtime": rec.endtime - rec.starttime}
        for rec in sorted(records.values(), key=lambda rec: rec.rule)
    ]

    def get_datetime(rectime):
        try:
            return datetime.datetime.fromtimestamp(rectime).isoformat()
        except OSError:
            return None

    # prepare end times
    timeline = [
        {
            "rule": rec.rule,
            "starttime": get_datetime(rec.starttime),
            "endtime": get_datetime(rec.endtime),
        }
        for rec in sorted(records.values(), key=lambda rec: rec.rule)
    ]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res
        for cat in results.values()
        for subcat in cat.values()
        for res in subcat
        if res.target not in seen
    ]

    rst_links = textwrap.dedent(
        """

    .. _Workflow: javascript:show_panel('workflow')
    .. _Statistics: javascript:show_panel('statistics')
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}")
    {% endfor %}
    {% for res in files %}
    .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}")
    {% endfor %}
    """
    )
    for cat, subcats in results.items():
        for subcat, catresults in subcats.items():
            for res in catresults:
                res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with dag.workflow.sourcecache.open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(
                    snakemake=Snakemake, categories=results, files=files
                ),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(
        res.size
        for cat in results.values()
        for subcat in cat.values()
        for res in subcat
    )

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports."
        )

    template = env.get_template("report.html.jinja2")

    logger.info("Downloading resources and rendering HTML.")

    rendered = template.render(
        results=results,
        results_size=results_size,
        configfiles=configfiles,
        text=text,
        rulegraph_nodes=rulegraph["nodes"],
        rulegraph_links=rulegraph["links"],
        rulegraph_width=xmax + 20,
        rulegraph_height=ymax + 20,
        runtimes=runtimes,
        timeline=timeline,
        rules=[rec for recs in rules.values() for rec in recs],
        version=__version__,
        now=now,
        pygments_css=HtmlFormatter(style="trac").get_style_defs(".source"),
        custom_stylesheet=custom_stylesheet,
        mode_embedded=mode_embedded,
    )

    # TODO look into supporting .WARC format, also see (https://webrecorder.io)

    if not mode_embedded:
        with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout:
            folder = Path(Path(path).stem)
            # store results in data folder
            for subcats in results.values():
                for catresults in subcats.values():
                    for result in catresults:
                        # write raw data
                        zipout.write(result.path, str(folder.joinpath(result.data_uri)))
                        # write thumbnail
                        if result.is_img and result.png_content:
                            zipout.writestr(
                                str(folder.joinpath(result.png_uri)), result.png_content
                            )
                        # write aux files
                        parent = folder.joinpath(result.data_uri).parent
                        for aux_path in result.aux_files:
                            # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path)))))
                            zipout.write(
                                aux_path,
                                str(
                                    parent.joinpath(
                                        os.path.relpath(
                                            aux_path, os.path.dirname(result.path)
                                        )
                                    )
                                ),
                            )

            # write report html
            zipout.writestr(str(folder.joinpath("report.html")), rendered)
    else:
        with open(path, "w", encoding="utf-8") as htmlout:
            htmlout.write(rendered)

    logger.info("Report created: {}.".format(path))
Esempio n. 10
0
import os
from snakemake.shell import shell
from snakemake.io import glob_wildcards
from multiprocessing.dummy import Pool

pool = Pool(snakemake.threads)

for dir in snakemake.params.dirs:
    if not os.path.exists(dir):
        os.makedirs(dir)

path = os.path.join(snakemake.input[0],
                    "{genome}" + snakemake.params.fasta_extension)
all_genomes = glob_wildcards(path).genome

print(
    f"Call genes of {len(all_genomes)} gneomes in {snakemake.threads} threads."
)


def callgenes(genome):

    fasta = path.format(genome=genome)

    if not os.path.exists(f"annotations/faa/{genome}.faa.gz"):

        shell("callgenes.sh in={fasta} outa=annotations/faa/{genome}.faa.gz"
              " out=annotations/gff/{genome}.gff.gz"
              " out16S=annotations/16S/{genome}.fasta"
              " stats=annotations/stats/{genome}.json json=t ow > /dev/null")
Esempio n. 11
0
def auto_report(dag, path):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports.")

    if not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html")

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(list)
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError("File {} marked for report but does "
                                        "not exist.".format(f))
                report_obj = get_flag_value(f, "report")
                category = Category(report_obj.category)

                def register_file(f, wildcards_overwrite=None):
                    results[category].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            wildcards_overwrite=wildcards_overwrite,
                        ))
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                if os.path.isdir(f):
                    if not isinstance(report_obj.patterns, list):
                        raise WorkflowError(
                            "Invalid patterns given for report. Must be list.",
                            rule=job.rule,
                        )
                    if not report_obj.patterns:
                        raise WorkflowError(
                            "Directory marked for report but no file patterns given via patterns=[...]. "
                            "See report documentation.",
                            rule=job.rule,
                        )
                    for pattern in report_obj.patterns:
                        pattern = os.path.join(f, pattern)
                        wildcards = glob_wildcards(pattern)._asdict()
                        names = wildcards.keys()
                        for w in zip(*wildcards.values()):
                            w = dict(zip(names, w))
                            w.update(job.wildcards_dict)
                            w = Wildcards(fromdict=w)
                            f = apply_wildcards(pattern, w)
                            register_file(f, wildcards_overwrite=w)

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning("Missing metadata for file {}. Maybe metadata "
                               "was deleted or it was created using an older "
                               "version of Snakemake. This is a non critical "
                               "warning.".format(f))
                continue
            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = min(rec.starttime, meta["starttime"])
                rec.endtime = max(rec.endtime, meta["endtime"])
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning("Metadata for file {} was created with a too "
                               "old Snakemake version.".format(f))

    for catresults in results.values():
        catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [{
        "rule": rec.rule,
        "runtime": rec.endtime - rec.starttime
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare end times
    timeline = [{
        "rule":
        rec.rule,
        "starttime":
        datetime.datetime.fromtimestamp(rec.starttime).isoformat(),
        "endtime":
        datetime.datetime.fromtimestamp(rec.endtime).isoformat(),
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res for cat in results.values() for res in cat
        if res.target not in seen
    ]

    rst_links = textwrap.dedent("""

    .. _Results: #results
    .. _Rules: #rules
    .. _Statistics: #stats
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: #{{ cat.id }}
    {% for res in files %}
    .. _{{ res.target }}: #{{ res.id }}
    {% endfor %}
    {% endfor %}
    .. _
    """)
    for cat, catresults in results.items():
        for res in catresults:
            res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(snakemake=Snakemake,
                                             categories=results,
                                             files=files),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(res.size for cat in results.values() for res in cat)

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports.")

    # render HTML
    template = env.get_template("report.html")
    with open(path, "w", encoding="utf-8") as out:
        out.write(
            template.render(
                results=results,
                results_size=results_size,
                configfiles=configfiles,
                text=text,
                rulegraph_nodes=rulegraph["nodes"],
                rulegraph_links=rulegraph["links"],
                rulegraph_width=xmax + 20,
                rulegraph_height=ymax + 20,
                runtimes=runtimes,
                timeline=timeline,
                rules=[rec for recs in rules.values() for rec in recs],
                version=__version__,
                now=now,
                pygments_css=HtmlFormatter(
                    style="trac").get_style_defs(".source"),
            ))
    logger.info("Report created.")
Esempio n. 12
0
    def __check_config_dic(self):
        """Configuration file checking"""
        # check output mandatory directory
        self._check_dir_or_string(level1="DATA", level2="OUTPUT")
        self.reference = self.get_config_value('DATA', 'REFERENCE_FILE')
        self.bam_path = self.get_config_value(level1="DATA", level2="BAM")
        self.vcf_path = self.get_config_value(level1="DATA", level2="VCF")

        # check cleaning activation
        self.list_cleaning_tool_activated = self.__build_tools_activated(
            "CLEANING", AVAIL_CLEANING)
        if len(self.list_cleaning_tool_activated) > 0:
            self.cleaning_tool = "_" + self.list_cleaning_tool_activated[0]
            self.cleaning_activated = True
            self._check_file_or_string(level1="DATA",
                                       level2="REFERENCE_FILE",
                                       mandatory=["CLEANING"])
        elif len(self.list_cleaning_tool_activated) > 1:
            raise ValueError(
                f'CONFIG FILE CHECKING FAIL for section "CLEANING": please activate only one cleaning tool avail\n'
            )

        # check mapping activation, if not use folder name to set self.mapping_tool_activated instead of mapping tool
        self.mapping_activated = var_2_bool(tool="MAPPING",
                                            key="ACTIVATE",
                                            to_convert=self.get_config_value(
                                                "MAPPING", "ACTIVATE"))
        self.mapping_stats_activated = var_2_bool(
            tool="MAPPING",
            key="BUILD_STATS",
            to_convert=self.get_config_value("MAPPING", "BUILD_STATS"))
        if self.mapping_activated:
            self.mapping_tool_activated = self.get_config_value(
                "MAPPING", "TOOL")
            self._check_file_or_string(level1="DATA",
                                       level2="REFERENCE_FILE",
                                       mandatory=[self.mapping_tool_activated])
            if self.mapping_tool_activated not in AVAIL_MAPPING:
                raise ValueError(
                    f'CONFIG FILE CHECKING FAIL for section "MAPPING" key "TOOL": {self.mapping_tool_activated} not avail on RattleSNP\n'
                )
        elif self.mapping_stats_activated:
            raise ValueError(
                f'CONFIG FILE CHECKING FAIL for section "MAPPING" key "BUILD_STATS" is "True" but no mapping activate, please change "ACTIVATE" to "True"\n'
            )

        # if cleaning or mapping check fastq path and
        if self.cleaning_activated or self.mapping_activated:
            self._check_dir_or_string(level1="DATA", level2="FASTQ")
            self.__check_fastq_files()
            self.samples, = glob_wildcards(
                f"{self.fastq_path}{{fastq,[^/]+}}_R1{self.fastq_files_ext}",
                followlinks=True)
            for sample in self.samples:
                if not Path(
                        f"{self.fastq_path}{sample}_R2{self.fastq_files_ext}"
                ).exists():
                    ValueError(
                        f"DATA CHECKING FAIL : The samples '{sample}' are single-end, please only use paired data: \n"
                    )
            self._check_file_or_string(level1="DATA",
                                       level2="REFERENCE_FILE",
                                       mandatory=[])

        # check SNP calling activation:
        self.calling_activated = var_2_bool(
            tool="SNPCALLING",
            key="",
            to_convert=self.get_config_value(level1="SNPCALLING"))

        if not self.mapping_activated and self.calling_activated:
            self._check_dir_or_string(level1="DATA", level2="BAM")
            self.samples, = glob_wildcards(f"{self.bam_path}{{bam,[^/]+}}.bam",
                                           followlinks=True)
            self._check_file_or_string(level1="DATA",
                                       level2="REFERENCE_FILE",
                                       mandatory=["SNPCALLING"])

        # check VCF filter activation
        self.vcf_filter_activated = var_2_bool(
            tool="FILTER",
            key="",
            to_convert=self.get_config_value(level1="FILTER"))
        # If only VCF filtration get vcf path
        if not self.mapping_activated and not self.calling_activated and self.vcf_filter_activated:
            self._check_file_or_string(level1="DATA",
                                       level2="VCF",
                                       mandatory=["VCFTOOL FILTER"])

        self.run_RAXML = var_2_bool(
            tool="RAXML",
            key="",
            to_convert=self.get_config_value(level1="RAXML"))
        self.run_RAXML_NG = var_2_bool(
            tool="RAXML_NG",
            key="",
            to_convert=self.get_config_value(level1="RAXML_NG"))

        # check mitochondrial name is in fasta is not Nome
        if self.cleaning_activated or self.mapping_activated or self.calling_activated:
            self.mito_name = self.get_config_value('PARAMS',
                                                   'MITOCHONDRIAL_NAME')
            self.CHROMOSOMES = get_list_chromosome_names(
                self.get_config_value('DATA', 'REFERENCE_FILE'))
            if self.mito_name and self.mito_name not in self.CHROMOSOMES:
                raise NameError(
                    f'CONFIG FILE CHECKING FAIL : in the "PARAMS" section, "MITOCHONDRIAL_NAME" key: the name "{self.mito_name}" is not in fasta file {self.get_config_value("DATA", "REFERENCE_FILE")}\n'
                )
            self.CHROMOSOMES_WITHOUT_MITO = self.CHROMOSOMES.copy()
            if self.mito_name and self.mito_name in self.CHROMOSOMES:
                self.CHROMOSOMES_WITHOUT_MITO.remove(self.mito_name)

        if self.calling_activated and self.mapping_activated and self.bam_path:
            raise ValueError(
                f"CONFIG FILE CHECKING FAIL : You want to run mapping with {self.mapping_tool_activated} but provided bam path '{self.bam_path}'\n"
            )

        # check VCF filter activation if raxml or raxml_ng
        self.raxml_activated = var_2_bool(
            tool="RAXML",
            key="",
            to_convert=self.get_config_value(level1="RAXML"))
        self.raxml_ng_activated = var_2_bool(
            tool="RAXML_NG",
            key="",
            to_convert=self.get_config_value(level1="RAXML_NG"))
        if (self.raxml_activated
                or self.raxml_ng_activated) and not self.vcf_filter_activated:
            self._check_file_or_string(level1="DATA",
                                       level2="VCF",
                                       mandatory=["FILTER", "RAXML"])
Esempio n. 13
0
def get_batches(wildcards, config):
    batches, = glob_wildcards("{datadir}/{wildcards.runname}/reads/{{id}}.tar".format(datadir=config["storage_data_raw"], wildcards=wildcards))
    return batches