Example #1
0
def extract_contigs_bin_cov(file_path, key, data_dict):
    yaml = datahandling.load_yaml(file_path)
    data_dict["results"][key] = yaml
    for bin_value in GLOBAL_BIN_VALUES:
        data_dict["summary"]["raw_length_at_{}x".format(
            bin_value)] = yaml["binned_depth"][bin_value - 1]
    return data_dict
Example #2
0
def extract_contig_variants(file_path, key, data_dict):
    yaml = datahandling.load_yaml(file_path)
    data_dict["results"][key] = yaml
    data_dict["summary"]["snp_filter_10x_10%"] = yaml["variant_table"][9][9]
    data_dict["summary"]["snp_filter_indels"] = yaml["indels"]
    data_dict["summary"]["snp_filter_deletions"] = yaml["deletions"]
    return data_dict
Example #3
0
def script__datadump(folder, sample, sample_file_name, component_file_name):
    db_sample = datahandling.load_sample(sample_file_name)
    db_component = datahandling.load_component(component_file_name)

    folder = str(folder)
    sample = str(sample)

    datadump_dict = datahandling.load_sample_component(sample)
    datadump_dict["summary"] = datadump_dict.get("summary", {})
    datadump_dict["results"] = datadump_dict.get("results", {})

    species = db_sample["properties"]["species"]

    datadump_dict["summary"]["db"] = []
    datadump_dict["summary"]["strain"] = []
    datadump_dict["summary"]["alleles"] = []
    datadump_dict["summary"]["component"] = {"id": db_component["_id"], "date": datetime.datetime.utcnow()}

    mlst_species = db_component["mlst_species_mapping"][species]
    for mlst_entry in mlst_species:
        mlst_entry_db = datahandling.load_yaml("cge_mlst/" + mlst_entry + "/data.json")
        datadump_dict["results"][mlst_entry] = mlst_entry_db
        datadump_dict["summary"]["db"].append(mlst_entry)
        datadump_dict["summary"]["strain"].append(mlst_entry_db["mlst"]["results"].get("sequence_type","NA"))
        datadump_dict["summary"]["alleles"].append(",".join([mlst_entry_db["mlst"]["results"]["allele_profile"][i]["allele_name"] for i in [i for i in mlst_entry_db["mlst"]["results"]["allele_profile"]]]))

    db_sample["properties"]["mlst"] = datadump_dict["summary"]
    datahandling.save_sample_component(datadump_dict, sample)
    datahandling.save_sample(db_sample, sample_file_name)

    return 0
Example #4
0
def passes_check_reads_pipeline(sample, requirements_file, log_err):
    """
    Checks if the component is a pipeline. In that case it will require reads to be present
    so the component can run.
    """
    sample_db = datahandling.load_yaml(sample)
    if requirements_file["type"] == "pipeline":
        if "reads" not in sample_db:
            datahandling.log(
                log_err,
                "Pipeline component can't run on a sample with no reads. db:{}"
                .format(sample_db))
            return False
    return True
Example #5
0
def extract_contigs_sum_cov(file_path, key, data_dict):
    yaml = datahandling.load_yaml(file_path)
    data_dict["results"][key] = yaml
    for bin_value in GLOBAL_BIN_VALUES:
        total_length = 0
        total_depth = 0
        total_contigs = 0
        for contig in yaml["contig_depth"]:
            if yaml["contig_depth"][contig]["coverage"] >= float(bin_value):
                total_length += yaml["contig_depth"][contig]["total_length"]
                total_depth += yaml["contig_depth"][contig]["total_depth"]
                total_contigs += 1
        data_dict["summary"]["bin_contigs_at_{}x".format(
            bin_value)] = total_contigs
        data_dict["summary"]["bin_length_at_{}x".format(
            bin_value)] = total_length
        data_dict["summary"]["bin_coverage_at_{}x".format(bin_value)] = float(
            total_depth / total_length)
    return data_dict
Example #6
0
def requirements_met(requirements_file, sample, log_out, log_err):
    requirements_file = datahandling.load_yaml(requirements_file)
    sample_name = datahandling.load_yaml(sample)["name"]
    if not passes_check_reads_pipeline(sample, requirements_file, log_err):
        return False
    no_failures = True
    if requirements_file.get('requirements', None) is not None:
        df = pandas.io.json.json_normalize(
            requirements_file.get('requirements')
        )  # flattens json into key while maintaining values https://stackoverflow.com/a/41801708
        requirements_dict = df.to_dict(orient='records')[0]

        requirements = []
        for key in requirements_dict:
            values = key.split('.')
            if values[0] == 'components':
                file_location = sample_name + "__" + values[1] + ".yaml"
                requirement = values[2:]  # TODO: add check for no requirements
                expected_value = requirements_dict[key]
                requirements.append(
                    [file_location, requirement, expected_value])
            elif values[0] == 'sample':
                file_location = "sample.yaml"
                requirement = values[1:]  # TODO: add check for no requirements
                expected_value = requirements_dict[key]
                requirements.append(
                    [file_location, requirement, expected_value])
            elif values[0] == 'run':
                file_location = "run.yaml"
                requirement = values[1:]  # TODO: add check for no requirements
                expected_value = requirements_dict[key]
                requirements.append(
                    [file_location, requirement, expected_value])
            else:
                datahandling.log(log_err,
                                 "Improper requirement {}".format(key))

        for requirement in requirements:
            # requirements are in form [file_path, [keys,key2,...], value_of_key (optional otherwise None)]
            file_location = requirement[0]
            keys = requirement[1]
            desired_value = requirement[2]

            db = datahandling.load_yaml(file_location)
            """
            What this does is run dict.get interatively on the db based on the keys till it can 't go
            deeper then returns the value or None if it couldn' t reach that level. While used with dict.get
            any function can be passed
            https://pymotw.com/3/functools/
            """
            actual_value = functools.reduce(dict.get, keys, db)
            """
            Check has been adjusted to check for a list to allow multiple potential options to match
            """
            if not isinstance(desired_value, list):
                desired_value = [desired_value]

            # As it should be a list because of the last line.
            if actual_value is not None:
                # Not sure why desired is [None] instead of None
                if desired_value != [None]:
                    if actual_value in desired_value:
                        datahandling.log(
                            log_err,
                            "Found required entry (value checked) for\ndb: {}\nentry: {}\n"
                            .format(":".join(keys), db))
                    else:
                        datahandling.log(
                            log_err,
                            "Requirements not met for\ndb: {}\nentry: {}\ndesired_entry: {}\n"
                            .format(":".join(keys), db, desired_value))
                        no_failures = False
                else:
                    datahandling.log(
                        log_err,
                        "Found required entry (value not checked) for\ndb: {}\nentry: {}\n"
                        .format(":".join(keys), db))
            else:
                datahandling.log(
                    log_err,
                    "Requirements not met for\ndb: {}\nentry: {}\n".format(
                        file_location, ":".join(keys)))
                no_failures = False
    if no_failures:
        return True
    else:
        return False
Example #7
0
#!/usr/bin/env python3
"""
Launcher file for accessing dockerfile commands
"""
import argparse
import json
import subprocess
import os
import sys
import traceback
from bifrostlib import datahandling

COMPONENT: dict = datahandling.load_yaml(
    os.path.join(os.path.dirname(__file__), 'config.yaml'))


def parse_args():
    """
    Arg parsing via argparse
    """
    description: str = (
        f"-Description------------------------------------\n"
        f"{COMPONENT['details']['description']}"
        f"------------------------------------------------\n\n"
        f"*Run command************************************\n"
        f"docker run \ \n"
        f" -e BIFROST_DB_KEY=mongodb://<user>:<password>@<server>:<port>/<db_name> \ \n"
        f" -v <input_path>:/input \ \n"
        f" -v <output_path>:/output \ \n"
        f" {COMPONENT['dockerfile']} \ \n"
        f"    -id <sample_id>\n"