def change_config(filename: Path, changes_set: List[Tuple[str, str]]): """ Change config YAML file at filename using the changes_set key-value pairs. :param filename: Path to YAML config file to change. :param changes_set: changes to incorporate. """ # Get configs from file. configs, yaml = load_yaml(filename) # Update configs for key, value in changes_set: if key in configs: value = YAML(typ='safe').load(value) logger.info( f"Changing value of '{key}': {configs[key]} --> {value}.") configs[key] = value else: logger.warning( f"KEY = {key} not in config. Config not updated with set ({key}, {value})" ) # Confirm that configs is valid. schema_path = pkg_resources.resource_filename("blr", SCHEMA_FILE) validate(configs, schema_path) # Write first to temporary file then overwrite filename. tmpfile = Path(str(filename) + ".tmp") with open(tmpfile, "w") as file: yaml.dump(configs, stream=file) tmpfile.rename(filename)
def test_dataframe(df_schema): df = pd.DataFrame([{'sample': 'foo', 'condition': 'bar'}]) validate(df, str(df_schema), False) assert sorted(df.columns) == sorted(['sample', 'condition']) validate(df, str(df_schema)) assert sorted(df.columns) == sorted( ['sample', 'condition', 'case', 'date']) assert df.case.loc[0]
def test_dataframe(df_schema): df = pd.DataFrame([{"sample": "foo", "condition": "bar"}]) validate(df, str(df_schema), False) assert sorted(df.columns) == sorted(["sample", "condition"]) validate(df, str(df_schema)) assert sorted(df.columns) == sorted( ["sample", "condition", "case", "date"]) assert df.case.loc[0]
def test_config_ref(config_schema_ref): config = {} validate(config, str(config_schema_ref)) assert config['param']['foo'] == 'bar' assert config['param']['bar'] == 'yaml' assert config['param']['jsonbar'] == 'json' # Make sure regular validator works config['param']['bar'] = 1 config['param']['jsonbar'] = 2 from snakemake import WorkflowError with pytest.raises(WorkflowError): validate(config, str(config_schema_ref), False)
def test_config_ref(config_schema_ref): config = {} validate(config, str(config_schema_ref)) assert config["param"]["foo"] == "bar" assert config["param"]["bar"] == "yaml" assert config["param"]["jsonbar"] == "json" # Make sure regular validator works config["param"]["bar"] = 1 config["param"]["jsonbar"] = 2 from snakemake import WorkflowError with pytest.raises(WorkflowError): validate(config, str(config_schema_ref), False)
def __init__(self, config=None, path_config=None, tools_config=None, culebront_path=None): self.config = config self.tools_config = tools_config self.assembly_tools_activated = [] self.polishing_tools_activated = [] self.correction_tools_activated = [] self.quality_tools_activated = [] self.quality_step = [] self.last_steps_list = [] self.pipeline_stop = None self.fastq_files_list = [] self.fastq_files_ext = [] self.fastq_gzip = None self.illumina_files_list = [] self.illumina_files_ext = [] self.illumina_gzip = None self.add_circular_name = None self.TMP = None self.TCM = None self.TAG = None self.draft_to_correction = None self.draft_to_correction_index_fai = None self.draft_to_correction_index_mmi = None self.nb_racon_rounds = None self.__check_config_dic(config) self.__cleaning_for_rerun() try: validate( config, culebront_path.joinpath( "schemas/config.schema.yaml").resolve().as_posix()) except Exception as e: raise ValueError( f"{e}\n\nCONFIG FILE CHECKING STRUCTURE FAIL : you need to verify {path_config} KEYS:VALUES: {str(e)[30:76]}\n" )
def test_config(config_schema): config = {} validate(config, str(config_schema), False) assert config == {} validate(config, str(config_schema)) assert dict(config) == {'param': {'foo': 'bar'}}
def parse_samplelist(f, config, PREPROCESS): df = pandas.read_csv(f, sep="\t", dtype={"sampleID": str, "runID": int, "assemblyGroup": str, "fileName": str, "pair": str, "interleaved": str}) validate(df, "config/samples.schema.yaml") dict_shell = lambda: defaultdict(dict_shell) # Dictionary with arbitrary number of levels assemblyGroups = dict_shell() samples = dict_shell() df.fillna("", inplace=True) for i in list(df.index): sample = df.iloc[i]["sampleID"] runID = str(df.iloc[i]["runID"]) R1 = df.iloc[i]["fileName"] groups = [] r2 = False # Initiate keys for all assembly group values if "assemblyGroup" in df.columns: groups = df.iloc[i]["assemblyGroup"].split(",") # Remove empty assembly groups groups = [g for g in groups if g!= ""] for g in groups: if g not in assemblyGroups.keys() and g != "": assemblyGroups[g] = dict_shell() if "interleaved" in df.columns and df.iloc[i]["interleaved"]: # If interleaved fastq is provided, add filepaths to split fastq files and later produce these using the # deinterleave_fastq rule in preprocessing.rules. inter = R1 R1 = os.path.join(config["intermediate_path"], "deinterleaved", "{}_{}_R1.fastq.gz".format(sample, runID)) R2 = os.path.join(config["intermediate_path"], "deinterleaved", "{}_{}_R2.fastq.gz".format(sample, runID)) samples[sample][runID]["interleaved"] = inter samples[sample][runID]["R1"] = R1 samples[sample][runID]["R2"] = R2 for g in groups: assemblyGroups[g][sample][runID]["R1"] = [os.path.join(config["intermediate_path"], "preprocess", "{}_{}_R1{}.fastq.gz".format(sample, runID, PREPROCESS))] assemblyGroups[g][sample][runID]["R2"] = [os.path.join(config["intermediate_path"], "preprocess", "{}_{}_R2{}.fastq.gz".format(sample, runID, PREPROCESS))] continue # Handling of paired and/or single end sequence files # If the sample annotation file has a 'pair' column, add the read files as 'R1' and 'R2' if "pair" in df.columns: R2 = df.iloc[i]["pair"] r2 = True samples[sample][runID]["R1"] = R1 samples[sample][runID]["R2"] = R2 # Add filepaths to preprocessed output files for each of the read files in each of the assembly groups # This will be the initial input to the assembly rule for g in groups: if r2: assemblyGroups[g][sample][runID]["R1"] = [os.path.join(config["intermediate_path"], "preprocess", "{}_{}_R1{}.fastq.gz".format(sample, runID, PREPROCESS))] assemblyGroups[g][sample][runID]["R2"] = [os.path.join(config["intermediate_path"], "preprocess", "{}_{}_R2{}.fastq.gz".format(sample, runID, PREPROCESS))] else: assemblyGroups[g][sample][runID]["se"] = [os.path.join(config["intermediate_path"], "preprocess", "{}_{}_se{}.fastq.gz".format(sample, runID, PREPROCESS))] # If there is no 'pair' column, add the single file path as 'se' else: samples[sample][runID]["se"] = R1 for g in groups: assemblyGroups[g][sample][runID]["se"] = [os.path.join(config["intermediate_path"], "preprocess", "{}_{}_se{}.fastq.gz".format(sample, runID, PREPROCESS))] return samples, assemblyGroups
import sys from argparse import ArgumentParser import pandas as pd from snakemake.utils import validate parser = ArgumentParser() parser.add_argument('--config', dest='config', required=True, help='Path to DS config file') o = parser.parse_args() samples = pd.read_csv(o.config).set_index("sample", drop=False) validate(samples, f"{sys.path[0]}/../DS_baseSchema.yaml") def get_sample(sample): return samples.loc[sample, "sample"] def get_rglb(sample): return samples.loc[sample, "rglb"] def get_rgpl(sample): return samples.loc[sample, "rgpl"] def get_rgpu(sample): return samples.loc[sample, "rgpu"] def get_rgsm(sample): return samples.loc[sample, "rgsm"] def get_reference(sample): return samples.loc[sample, "reference"]
def test_config(config_schema): config = {} validate(config, str(config_schema), False) assert config == {} validate(config, str(config_schema)) assert dict(config) == {"param": {"foo": "bar"}}
import pandas as pd from snakemake.utils import validate, min_version ##### set minimum snakemake version ##### min_version("5.1.2") ##### load config and sample sheets ##### configfile: "config.yaml" validate(config, schema="schemas/config.schema.yaml") samples = pd.read_table(config["samples"]).set_index("sample", drop=False) validate(samples, schema="schemas/samples.schema.yaml") units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False) units.index = units.index.set_levels([i.astype(str) for i in units.index.levels]) # enforce str in index validate(units, schema="schemas/units.schema.yaml") ##### target rules ##### rule all: input: expand(["results/diffexp/{contrast}.diffexp.tsv", "results/diffexp/{contrast}.ma-plot.svg"], contrast=config["diffexp"]["contrasts"]), "results/pca.svg" ##### setup singularity #####