Ejemplo n.º 1
0
def script__test_ssi_stamper(sample, sample_yaml, sample_component, log_err):
    # Genering error handling to redirect output to stderr file
    try:
        comps = get_components(sample)
        if "detected_species" in sample["properties"]:
            species = datahandling.load_species(
                sample["properties"]["detected_species"])
        else:
            species = datahandling.load_species(None)

        results, summary, stamp = stamps.ssi_stamp.test(
            comps['whats_my_species'], comps['assemblatron'], species, sample)

        datadump_dict = datahandling.load_sample_component(sample_component)
        datadump_dict["summary"] = summary
        datadump_dict["results"] = results
        datahandling.save_sample_component(datadump_dict, sample_component)

        # Get the _id back

        datadump_dict = datahandling.load_sample_component(sample_component)
        stamp["_sample_component"] = datadump_dict["_id"]

        stamp_dict = sample.get("stamps", {})
        stamp_list = stamp_dict.get("stamp_list", [])
        stamp_list.append(stamp)
        stamp_dict["stamp_list"] = stamp_list
        stamp_dict[stamp["name"]] = stamp
        sample["stamps"] = stamp_dict

        datahandling.save_sample(sample, sample_yaml)
        return 0
    except Exception:
        datahandling.log(log_err, str(traceback.format_exc()))
        exit(1)
Ejemplo n.º 2
0
def passes_check_reads_pipeline(sample, requirements_file, log_err):
    """
    Checks if the component is a pipeline. In that case it will require reads to be present
    so the component can run.
    """
    sample_db = datahandling.load_yaml(sample)
    if requirements_file["type"] == "pipeline":
        if "reads" not in sample_db:
            datahandling.log(
                log_err,
                "Pipeline component can't run on a sample with no reads. db:{}"
                .format(sample_db))
            return False
    return True
Ejemplo n.º 3
0
def script__initialization(requirements_file, sample, sample_component,
                           output_file, log_out, log_err):
    set_status_to_running(sample_component)
    if requirements_met(requirements_file, sample, log_out, log_err) == True:
        datahandling.log(log_out, "{}\n{}\n".format(os.getcwd(), output_file))
        with open(str(output_file), "w") as handle:
            handle.write("Requirements met")
            pass
    else:
        datahandling.log(log_err, "Requirements not met")
        sample_component_entry = datahandling.load_sample_component(
            sample_component)
        sample_component_entry["status"] = "Requirements not met"
        datahandling.save_sample_component(sample_component_entry,
                                           sample_component)
    return 0
Ejemplo n.º 4
0
def requirements_met(requirements_file, sample, log_out, log_err):
    requirements_file = datahandling.load_yaml(requirements_file)
    sample_name = datahandling.load_yaml(sample)["name"]
    if not passes_check_reads_pipeline(sample, requirements_file, log_err):
        return False
    no_failures = True
    if requirements_file.get('requirements', None) is not None:
        df = pandas.io.json.json_normalize(
            requirements_file.get('requirements')
        )  # flattens json into key while maintaining values https://stackoverflow.com/a/41801708
        requirements_dict = df.to_dict(orient='records')[0]

        requirements = []
        for key in requirements_dict:
            values = key.split('.')
            if values[0] == 'components':
                file_location = sample_name + "__" + values[1] + ".yaml"
                requirement = values[2:]  # TODO: add check for no requirements
                expected_value = requirements_dict[key]
                requirements.append(
                    [file_location, requirement, expected_value])
            elif values[0] == 'sample':
                file_location = "sample.yaml"
                requirement = values[1:]  # TODO: add check for no requirements
                expected_value = requirements_dict[key]
                requirements.append(
                    [file_location, requirement, expected_value])
            elif values[0] == 'run':
                file_location = "run.yaml"
                requirement = values[1:]  # TODO: add check for no requirements
                expected_value = requirements_dict[key]
                requirements.append(
                    [file_location, requirement, expected_value])
            else:
                datahandling.log(log_err,
                                 "Improper requirement {}".format(key))

        for requirement in requirements:
            # requirements are in form [file_path, [keys,key2,...], value_of_key (optional otherwise None)]
            file_location = requirement[0]
            keys = requirement[1]
            desired_value = requirement[2]

            db = datahandling.load_yaml(file_location)
            """
            What this does is run dict.get interatively on the db based on the keys till it can 't go
            deeper then returns the value or None if it couldn' t reach that level. While used with dict.get
            any function can be passed
            https://pymotw.com/3/functools/
            """
            actual_value = functools.reduce(dict.get, keys, db)
            """
            Check has been adjusted to check for a list to allow multiple potential options to match
            """
            if not isinstance(desired_value, list):
                desired_value = [desired_value]

            # As it should be a list because of the last line.
            if actual_value is not None:
                # Not sure why desired is [None] instead of None
                if desired_value != [None]:
                    if actual_value in desired_value:
                        datahandling.log(
                            log_err,
                            "Found required entry (value checked) for\ndb: {}\nentry: {}\n"
                            .format(":".join(keys), db))
                    else:
                        datahandling.log(
                            log_err,
                            "Requirements not met for\ndb: {}\nentry: {}\ndesired_entry: {}\n"
                            .format(":".join(keys), db, desired_value))
                        no_failures = False
                else:
                    datahandling.log(
                        log_err,
                        "Found required entry (value not checked) for\ndb: {}\nentry: {}\n"
                        .format(":".join(keys), db))
            else:
                datahandling.log(
                    log_err,
                    "Requirements not met for\ndb: {}\nentry: {}\n".format(
                        file_location, ":".join(keys)))
                no_failures = False
    if no_failures:
        return True
    else:
        return False