Python validate Examples, snakemake.utils.validate Python Examples

Example #1

0

Show file

File: config.py Project: parharn/BLR

def change_config(filename: Path, changes_set: List[Tuple[str, str]]):
    """
    Change config YAML file at filename using the changes_set key-value pairs.
    :param filename: Path to YAML config file to change.
    :param changes_set: changes to incorporate.
    """
    # Get configs from file.
    configs, yaml = load_yaml(filename)

    # Update configs
    for key, value in changes_set:
        if key in configs:
            value = YAML(typ='safe').load(value)
            logger.info(
                f"Changing value of '{key}': {configs[key]} --> {value}.")
            configs[key] = value
        else:
            logger.warning(
                f"KEY = {key} not in config. Config not updated with set ({key}, {value})"
            )

    # Confirm that configs is valid.
    schema_path = pkg_resources.resource_filename("blr", SCHEMA_FILE)
    validate(configs, schema_path)

    # Write first to temporary file then overwrite filename.
    tmpfile = Path(str(filename) + ".tmp")
    with open(tmpfile, "w") as file:
        yaml.dump(configs, stream=file)
    tmpfile.rename(filename)

Example #2

0

Show file

def test_dataframe(df_schema):
    df = pd.DataFrame([{'sample': 'foo', 'condition': 'bar'}])
    validate(df, str(df_schema), False)
    assert sorted(df.columns) == sorted(['sample', 'condition'])
    validate(df, str(df_schema))
    assert sorted(df.columns) == sorted(
        ['sample', 'condition', 'case', 'date'])
    assert df.case.loc[0]

Example #3

0

Show file

def test_dataframe(df_schema):
    df = pd.DataFrame([{"sample": "foo", "condition": "bar"}])
    validate(df, str(df_schema), False)
    assert sorted(df.columns) == sorted(["sample", "condition"])
    validate(df, str(df_schema))
    assert sorted(df.columns) == sorted(
        ["sample", "condition", "case", "date"])
    assert df.case.loc[0]

Example #4

0

Show file

def test_config_ref(config_schema_ref):
    config = {}
    validate(config, str(config_schema_ref))
    assert config['param']['foo'] == 'bar'
    assert config['param']['bar'] == 'yaml'
    assert config['param']['jsonbar'] == 'json'
    # Make sure regular validator works
    config['param']['bar'] = 1
    config['param']['jsonbar'] = 2
    from snakemake import WorkflowError
    with pytest.raises(WorkflowError):
        validate(config, str(config_schema_ref), False)

Example #5

0

Show file

def test_config_ref(config_schema_ref):
    config = {}
    validate(config, str(config_schema_ref))
    assert config["param"]["foo"] == "bar"
    assert config["param"]["bar"] == "yaml"
    assert config["param"]["jsonbar"] == "json"
    # Make sure regular validator works
    config["param"]["bar"] = 1
    config["param"]["jsonbar"] = 2
    from snakemake import WorkflowError

    with pytest.raises(WorkflowError):
        validate(config, str(config_schema_ref), False)

Example #6

0

Show file

File: tools.py Project: pythseq/CulebrONT_pipeline

    def __init__(self,
                 config=None,
                 path_config=None,
                 tools_config=None,
                 culebront_path=None):
        self.config = config
        self.tools_config = tools_config
        self.assembly_tools_activated = []
        self.polishing_tools_activated = []
        self.correction_tools_activated = []
        self.quality_tools_activated = []
        self.quality_step = []
        self.last_steps_list = []
        self.pipeline_stop = None

        self.fastq_files_list = []
        self.fastq_files_ext = []
        self.fastq_gzip = None

        self.illumina_files_list = []
        self.illumina_files_ext = []
        self.illumina_gzip = None

        self.add_circular_name = None
        self.TMP = None
        self.TCM = None
        self.TAG = None

        self.draft_to_correction = None
        self.draft_to_correction_index_fai = None
        self.draft_to_correction_index_mmi = None

        self.nb_racon_rounds = None

        self.__check_config_dic(config)
        self.__cleaning_for_rerun()
        try:
            validate(
                config,
                culebront_path.joinpath(
                    "schemas/config.schema.yaml").resolve().as_posix())
        except Exception as e:
            raise ValueError(
                f"{e}\n\nCONFIG FILE CHECKING STRUCTURE FAIL : you need to verify {path_config} KEYS:VALUES: {str(e)[30:76]}\n"
            )

Example #7

0

Show file

def test_config(config_schema):
    config = {}
    validate(config, str(config_schema), False)
    assert config == {}
    validate(config, str(config_schema))
    assert dict(config) == {'param': {'foo': 'bar'}}

Example #8

0

Show file

def parse_samplelist(f, config, PREPROCESS):
    df = pandas.read_csv(f, sep="\t", dtype={"sampleID": str, "runID": int, "assemblyGroup": str,
                                         "fileName": str, "pair": str, "interleaved": str})
    validate(df, "config/samples.schema.yaml")
    dict_shell = lambda: defaultdict(dict_shell)  # Dictionary with arbitrary number of levels
    assemblyGroups = dict_shell()
    samples = dict_shell()
    df.fillna("", inplace=True)

    for i in list(df.index):
        sample = df.iloc[i]["sampleID"]
        runID = str(df.iloc[i]["runID"])
        R1 = df.iloc[i]["fileName"]
        groups = []
        r2 = False

        # Initiate keys for all assembly group values
        if "assemblyGroup" in df.columns:
            groups = df.iloc[i]["assemblyGroup"].split(",")
            # Remove empty assembly groups
            groups = [g for g in groups if g!= ""]
            for g in groups:
                if g not in assemblyGroups.keys() and g != "":
                    assemblyGroups[g] = dict_shell()

        if "interleaved" in df.columns and df.iloc[i]["interleaved"]:
            # If interleaved fastq is provided, add filepaths to split fastq files and later produce these using the
            # deinterleave_fastq rule in preprocessing.rules.
            inter = R1
            R1 = os.path.join(config["intermediate_path"], "deinterleaved", "{}_{}_R1.fastq.gz".format(sample, runID))
            R2 = os.path.join(config["intermediate_path"], "deinterleaved", "{}_{}_R2.fastq.gz".format(sample, runID))
            samples[sample][runID]["interleaved"] = inter
            samples[sample][runID]["R1"] = R1
            samples[sample][runID]["R2"] = R2
            for g in groups:
                assemblyGroups[g][sample][runID]["R1"] = [os.path.join(config["intermediate_path"], "preprocess",
                                         "{}_{}_R1{}.fastq.gz".format(sample, runID, PREPROCESS))]
                assemblyGroups[g][sample][runID]["R2"] = [os.path.join(config["intermediate_path"], "preprocess",
                                         "{}_{}_R2{}.fastq.gz".format(sample, runID, PREPROCESS))]
            continue

        # Handling of paired and/or single end sequence files
        # If the sample annotation file has a 'pair' column, add the read files as 'R1' and 'R2'
        if "pair" in df.columns:
            R2 = df.iloc[i]["pair"]
            r2 = True
            samples[sample][runID]["R1"] = R1
            samples[sample][runID]["R2"] = R2
            # Add filepaths to preprocessed output files for each of the read files in each of the assembly groups
            # This will be the initial input to the assembly rule
            for g in groups:
                if r2:
                    assemblyGroups[g][sample][runID]["R1"] = [os.path.join(config["intermediate_path"], "preprocess",
                                         "{}_{}_R1{}.fastq.gz".format(sample, runID, PREPROCESS))]
                    assemblyGroups[g][sample][runID]["R2"] = [os.path.join(config["intermediate_path"], "preprocess",
                                         "{}_{}_R2{}.fastq.gz".format(sample, runID, PREPROCESS))]
                else:
                    assemblyGroups[g][sample][runID]["se"] = [os.path.join(config["intermediate_path"], "preprocess",
                                        "{}_{}_se{}.fastq.gz".format(sample, runID, PREPROCESS))]

        # If there is no 'pair' column, add the single file path as 'se'
        else:
            samples[sample][runID]["se"] = R1
            for g in groups:
                assemblyGroups[g][sample][runID]["se"] = [os.path.join(config["intermediate_path"], "preprocess",
                                         "{}_{}_se{}.fastq.gz".format(sample, runID, PREPROCESS))]
    return samples, assemblyGroups

Example #9

0

Show file

import sys
from argparse import ArgumentParser
import pandas as pd
from snakemake.utils import validate

parser = ArgumentParser()
parser.add_argument('--config', dest='config', required=True,
                    help='Path to DS config file')

o = parser.parse_args()

samples = pd.read_csv(o.config).set_index("sample", drop=False)
validate(samples, f"{sys.path[0]}/../DS_baseSchema.yaml")

def get_sample(sample):
    return samples.loc[sample, "sample"]

def get_rglb(sample):
    return samples.loc[sample, "rglb"]

def get_rgpl(sample):
    return samples.loc[sample, "rgpl"]

def get_rgpu(sample):
    return samples.loc[sample, "rgpu"]

def get_rgsm(sample):
    return samples.loc[sample, "rgsm"]

def get_reference(sample):
    return samples.loc[sample, "reference"]

Example #10

0

Show file

def test_config(config_schema):
    config = {}
    validate(config, str(config_schema), False)
    assert config == {}
    validate(config, str(config_schema))
    assert dict(config) == {"param": {"foo": "bar"}}

Example #11

0

Show file

import pandas as pd
from snakemake.utils import validate, min_version
##### set minimum snakemake version #####
min_version("5.1.2")


##### load config and sample sheets #####

configfile: "config.yaml"
validate(config, schema="schemas/config.schema.yaml")

samples = pd.read_table(config["samples"]).set_index("sample", drop=False)
validate(samples, schema="schemas/samples.schema.yaml")

units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False)
units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
validate(units, schema="schemas/units.schema.yaml")


##### target rules #####

rule all:
    input:
        expand(["results/diffexp/{contrast}.diffexp.tsv",
                "results/diffexp/{contrast}.ma-plot.svg"],
               contrast=config["diffexp"]["contrasts"]),
        "results/pca.svg"


##### setup singularity #####