コード例 #1
0
    def prepare_specs(self):
        with self.yaml_path.open("r") as file:
            specs = yaml.safe_load(file)

        self.instruction_name = Util.check_instruction_type(
            specs, DataSimulationTool.__name__, self.expected_instruction)
        self.export_format = Util.check_export_format(
            specs, DataSimulationTool.__name__, self.instruction_name)

        ParameterValidator.assert_keys_present(specs["definitions"],
                                               ["datasets"],
                                               DataSimulationTool.__name__,
                                               "definitions/datasets")
        ParameterValidator.assert_type_and_value(
            specs['definitions']['datasets'], dict,
            DataSimulationTool.__name__, "definitions/datasets")

        dataset_names = list(specs['definitions']['datasets'].keys())
        assert len(dataset_names) == 1, f"{DataSimulationTool.__name__}: one dataset has to be defined under definitions/datasets, got " \
                                        f"{dataset_names} instead."

        self.dataset_name = dataset_names[0]

        Util.check_paths(specs, DataSimulationTool.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
コード例 #2
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting and setting[
                        "preprocessing"] is not None:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                        if not all(preproc.keeps_example_count()
                                   for preproc in preprocessing_sequence):
                            raise ValueError(
                                f"{TrainMLModelParser.__name__}: preprocessing sequence {preproc_name} includes preprocessing that "
                                f"change the number of examples at runtime and as such cannot be used with this instruction. See the "
                                f"documentation for the preprocessing or alternatively use them with other instructions."
                            )
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                ml_method = symbol_table.get(setting["ml_method"])
                ml_method.check_encoder_compatibility(encoder)

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=ml_method,
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
コード例 #3
0
    def _prepare_parameters(reference: dict,
                            max_edit_distances: dict,
                            name: str = None):
        location = "MatchedReceptorsEncoder"

        legal_chains = [
            chain
            for receptor in (TCABReceptor(), TCGDReceptor(), BCReceptor())
            for chain in receptor.get_chains()
        ]

        if type(max_edit_distances) is int:
            max_edit_distances = {
                chain: max_edit_distances
                for chain in legal_chains
            }
        elif type(max_edit_distances) is dict:
            ParameterValidator.assert_keys(max_edit_distances.keys(),
                                           legal_chains,
                                           location,
                                           "max_edit_distances",
                                           exclusive=False)
        else:
            ParameterValidator.assert_type_and_value(max_edit_distances, dict,
                                                     location,
                                                     'max_edit_distances')

        reference_receptors = MatchedReferenceUtil.prepare_reference(
            reference, location=location, paired=True)

        return {
            "reference_receptors": reference_receptors,
            "max_edit_distances": max_edit_distances,
            "name": name
        }
コード例 #4
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> ExploratoryAnalysisInstruction:
        exp_analysis_units = {}

        ParameterValidator.assert_keys(
            instruction, ["analyses", "type", "number_of_processes"],
            "ExploratoryAnalysisParser", "ExploratoryAnalysis")
        ParameterValidator.assert_type_and_value(
            instruction["number_of_processes"], int,
            ExploratoryAnalysisParser.__name__, "number_of_processes")

        for analysis_key, analysis in instruction["analyses"].items():

            params = self._prepare_params(analysis, symbol_table,
                                          f"{key}/{analysis_key}")
            params["number_of_processes"] = instruction["number_of_processes"]
            exp_analysis_units[analysis_key] = ExploratoryAnalysisUnit(
                **params)

        process = ExploratoryAnalysisInstruction(
            exploratory_analysis_units=exp_analysis_units, name=key)
        return process
コード例 #5
0
    def build_object(cls, **kwargs):
        comparison_label = kwargs[
            "comparison_label"] if "comparison_label" in kwargs else None
        color_grouping_label = kwargs[
            "color_grouping_label"] if "color_grouping_label" in kwargs else None
        row_grouping_label = kwargs[
            "row_grouping_label"] if "row_grouping_label" in kwargs else None
        column_grouping_label = kwargs[
            "column_grouping_label"] if "column_grouping_label" in kwargs else None
        log_scale = kwargs["log_scale"] if "log_scale" in kwargs else None
        keep_fraction = float(
            kwargs["keep_fraction"]) if "keep_fraction" in kwargs else 1.0
        ParameterValidator.assert_type_and_value(keep_fraction,
                                                 float,
                                                 "FeatureComparison",
                                                 "keep_fraction",
                                                 min_inclusive=0,
                                                 max_inclusive=1)
        ParameterValidator.assert_type_and_value(log_scale, bool,
                                                 "FeatureComparison",
                                                 "log_scale")

        assert comparison_label is not None, "FeatureComparison: the parameter 'comparison_label' must be set in order to be able to compare across this label"

        assert comparison_label != color_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as color_grouping_label"
        assert comparison_label != row_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as row_grouping_label"
        assert comparison_label != column_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as column_grouping_label"

        return FeatureComparison(**kwargs)
コード例 #6
0
    def _prepare_specs(self):
        with self.yaml_path.open("r") as file:
            specs = yaml.safe_load(file)

        ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], GalaxyTrainMLModel.__name__, "YAML specification")
        ParameterValidator.assert_all_in_valid_list(specs.keys(), ["definitions", "instructions", "output"], GalaxyTrainMLModel.__name__,
                                                    "YAML specification")

        ParameterValidator.assert_type_and_value(specs["instructions"], dict, GalaxyTrainMLModel.__name__, "instructions")

        assert len(list(specs["instructions"].keys())) == 1, f"{GalaxyTrainMLModel.__name__}: one instruction has to be specified under " \
                                                             f"`instructions`, got the following instead: {list(specs['instructions'].keys())}."

        self.instruction_name = list(specs["instructions"].keys())[0]

        ParameterValidator.assert_type_and_value(specs['instructions'][self.instruction_name], dict, GalaxyTrainMLModel.__name__,
                                                 self.instruction_name)
        ParameterValidator.assert_keys_present(specs['instructions'][self.instruction_name].keys(), ['type'], GalaxyTrainMLModel.__name__,
                                               self.instruction_name)

        assert specs['instructions'][self.instruction_name]['type'] == TrainMLModelInstruction.__name__[:-11], \
            f"{GalaxyTrainMLModel.__name__}: instruction `type` under {self.instruction_name} has to be {TrainMLModelInstruction.__name__[:-11]} " \
            f"for this tool."

        assert len(
            specs['instructions'][self.instruction_name]['labels']) == 1, f"{GalaxyTrainMLModel.__name__}: one label has to be specified under " \
                                                                          f"`labels`, got the following instead: {specs['instructions'][self.instruction_name]['labels']}."
        Util.check_paths(specs, GalaxyTrainMLModel.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
コード例 #7
0
    def _prepare_parameters(motif_filepath: str, match_v_genes: bool, sum_counts: bool, name: str = None):

        ParameterValidator.assert_type_and_value(match_v_genes, bool, "MatchedRegexEncoder", "match_v_genes")
        ParameterValidator.assert_type_and_value(sum_counts, bool, "MatchedRegexEncoder", "sum_counts")

        motif_filepath = Path(motif_filepath)
        assert motif_filepath.is_file(), f"MatchedRegexEncoder: the file {motif_filepath} does not exist. " \
                                               f"Specify the correct path under motif_filepath."

        file_columns = list(pd.read_csv(motif_filepath, sep="\t", iterator=False, dtype=str, nrows=0).columns)

        ParameterValidator.assert_all_in_valid_list(file_columns, ["id"] + [f"{c.value}V" for c in Chain] + [f"{c.value}_regex" for c in Chain], "MatchedRegexEncoder", "motif_filepath (column names)")

        chains = [colname.split("_")[0] for colname in file_columns if colname.endswith("_regex")]
        if match_v_genes:
            for chain in chains:
                assert f"{chain}V" in file_columns, f"MatchedRegexEncoder: expected column {chain}V to be present in the columns of motif_filepath. " \
                                                    f"Remove {chain}_regex from columns, or set match_v_genes to False."

        return {
            "motif_filepath": motif_filepath,
            "match_v_genes": match_v_genes,
            "sum_counts": sum_counts,
            "chains": chains,
            "name": name
        }
コード例 #8
0
    def _prepare_report_config(self, instruction_key, instruction, split_key,
                               symbol_table):
        if "reports" in instruction[split_key] and len(
                instruction[split_key]["reports"]) > 0:
            location = f"{instruction_key}/{split_key}/reports"
            report_types = list(signature(ReportConfig).parameters.keys())
            ParameterValidator.assert_all_in_valid_list(
                instruction[split_key]["reports"].keys(), report_types,
                location, "reports")

            for report_type in instruction[split_key]["reports"]:
                ParameterValidator.assert_type_and_value(
                    instruction[split_key]["reports"][report_type], list,
                    f"{location}/{report_type}", report_type)

            report_config_input = {
                report_type: {
                    report_id: symbol_table.get(report_id)
                    for report_id in instruction[split_key]["reports"]
                    [report_type]
                }
                for report_type in instruction[split_key]["reports"]
            }
        else:
            report_config_input = {}

        return report_config_input
コード例 #9
0
 def _prepare_reports(self, reports: list, symbol_table: SymbolTable) -> dict:
     if reports is not None:
         ParameterValidator.assert_type_and_value(reports, list, TrainMLModelParser.__name__, "reports")
         report_objects = {report_id: symbol_table.get(report_id) for report_id in reports}
         ParameterValidator.assert_all_type_and_value(report_objects.values(), TrainMLModelReport, TrainMLModelParser.__name__, 'reports')
         return report_objects
     else:
         return {}
コード例 #10
0
    def _check_label_format(self, labels: list, instruction_key: str):
        ParameterValidator.assert_type_and_value(labels, list, TrainMLModelParser.__name__, f'{instruction_key}/labels')
        assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \
            f"{TrainMLModelParser.__name__}: labels under {instruction_key} were not defined properly. The list of labels has to either be a list of " \
            f"label names, or there can be a parameter 'positive_class' defined under the label name."

        assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0]
                   and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \
            f"{TrainMLModelParser.__name__}: labels that are specified by more than label name, can include only one parameter called 'positive_class'."
コード例 #11
0
    def __init__(self, percentage: float, show_warnings: bool = True):
        super().__init__()

        ParameterValidator.assert_type_and_value(percentage, float, "TCRdistClassifier", "percentage", min_inclusive=0., max_inclusive=1.)

        self.percentage = percentage
        self.k = None
        self.label = None
        self.show_warnings = show_warnings
コード例 #12
0
 def build_object(cls, **kwargs):
     location = cls.__name__
     ParameterValidator.assert_keys(kwargs.keys(), ["filter_sequence_type", "batch_size", "count_agg"], location,
                                    "DuplicateSequenceFilter")
     ParameterValidator.assert_in_valid_list(kwargs["filter_sequence_type"].upper(), [item.name for item in SequenceType],
                                             location, "filter_sequence_type")
     ParameterValidator.assert_in_valid_list(kwargs["count_agg"].upper(), [item.name for item in CountAggregationFunction], location,
                                             "count_agg")
     ParameterValidator.assert_type_and_value(kwargs["batch_size"], int, location, "batch_size", 1)
     return DuplicateSequenceFilter(filter_sequence_type=SequenceType[kwargs["filter_sequence_type"].upper()],
                                    batch_size=kwargs["batch_size"], count_agg=CountAggregationFunction[kwargs["count_agg"].upper()])
コード例 #13
0
    def build_object(cls, **kwargs):

        if kwargs["additional_node_attributes"] is None:
            kwargs["additional_node_attributes"] = []
        if kwargs["additional_edge_attributes"] is None:
            kwargs["additional_edge_attributes"] = []

        ParameterValidator.assert_type_and_value(kwargs["additional_node_attributes"], list, "CytoscapeNetworkExporter", "additional_node_attributes")
        ParameterValidator.assert_type_and_value(kwargs["additional_edge_attributes"], list, "CytoscapeNetworkExporter", "additional_edge_attributes")

        return CytoscapeNetworkExporter(**kwargs)
コード例 #14
0
    def _prepare_parameters(normalization_type: str, reads: str, sequence_encoding: str, k: int = 0, k_left: int = 0,
                            k_right: int = 0, min_gap: int = 0, max_gap: int = 0, metadata_fields_to_include: list = None, name: str = None,
                            scale_to_unit_variance: bool = False, scale_to_zero_mean: bool = False, sequence_type: str = None):

        location = KmerFrequencyEncoder.__name__

        ParameterValidator.assert_in_valid_list(normalization_type.upper(), [item.name for item in NormalizationType], location, "normalization_type")
        ParameterValidator.assert_in_valid_list(reads.upper(), [item.name for item in ReadsType], location, "reads")
        ParameterValidator.assert_in_valid_list(sequence_encoding.upper(), [item.name for item in SequenceEncodingType], location, "sequence_encoding")
        ParameterValidator.assert_type_and_value(scale_to_zero_mean, bool, location, "scale_to_zero_mean")
        ParameterValidator.assert_type_and_value(scale_to_unit_variance, bool, location, "scale_to_unit_variance")
        ParameterValidator.assert_type_and_value(sequence_type, str, location, 'sequence_type')
        ParameterValidator.assert_in_valid_list(sequence_type.upper(), [st.name for st in SequenceType], location, 'sequence_type')

        if "IMGT" in sequence_encoding.upper():
            assert sequence_type.upper() == SequenceType.AMINO_ACID.name, f"{location}: for IMGT-based k-mer frequency encoding (here: " \
                                                                     f"{sequence_encoding.upper()}), sequence type has to be 'amino_acid'."

        vars_to_check = {"k": k, "k_left": k_left, "k_right": k_right, "min_gap": min_gap, "max_gap": max_gap}
        for param in vars_to_check.keys():
            ParameterValidator.assert_type_and_value(vars_to_check[param], int, location, param, min_inclusive=0)

        if "gap" in sequence_encoding.lower():
            assert k_left != 0 and k_right != 0, f"KmerFrequencyEncoder: sequence encoding {sequence_encoding} was chosen, but k_left " \
                                                 f"({k_left}) or k_right ({k_right}) have to be set and larger than 0."

        return {
            "normalization_type": NormalizationType[normalization_type.upper()],
            "reads": ReadsType[reads.upper()],
            "sequence_encoding": SequenceEncodingType[sequence_encoding.upper()],
            "name": name,
            "scale_to_zero_mean": scale_to_zero_mean, "scale_to_unit_variance": scale_to_unit_variance,
            'sequence_type': SequenceType[sequence_type.upper()],
            **vars_to_check
        }
コード例 #15
0
 def __init__(self,
              k: int,
              skip_first_n_aa: int,
              skip_last_n_aa: int,
              abundance: str,
              normalize_all_features: bool,
              name: str = None):
     location = "AtchleyKmerEncoder"
     ParameterValidator.assert_type_and_value(k, int, location, "k", 1)
     ParameterValidator.assert_type_and_value(skip_first_n_aa, int,
                                              location, "skip_first_n_aa",
                                              0)
     ParameterValidator.assert_type_and_value(skip_last_n_aa, int, location,
                                              "skip_last_n_aa", 0)
     ParameterValidator.assert_in_valid_list(
         abundance.upper(), [ab.name for ab in RelativeAbundanceType],
         location, "abundance")
     ParameterValidator.assert_type_and_value(normalize_all_features, bool,
                                              location,
                                              "normalize_all_features")
     self.k = k
     self.skip_first_n_aa = skip_first_n_aa
     self.skip_last_n_aa = skip_last_n_aa
     self.abundance = RelativeAbundanceType[abundance.upper()]
     self.normalize_all_features = normalize_all_features
     self.name = name
     self.scaler_path = None
     self.vectorizer_path = None
コード例 #16
0
ファイル: OneHotEncoder.py プロジェクト: uio-bmi/immuneML
    def _prepare_parameters(use_positional_info: bool,
                            distance_to_seq_middle: int,
                            flatten: bool,
                            sequence_type: str,
                            name: str = None):

        location = OneHotEncoder.__name__

        ParameterValidator.assert_type_and_value(use_positional_info, bool,
                                                 location,
                                                 "use_positional_info")
        if use_positional_info:
            ParameterValidator.assert_type_and_value(distance_to_seq_middle,
                                                     int,
                                                     location,
                                                     "distance_to_seq_middle",
                                                     min_inclusive=1)
        else:
            distance_to_seq_middle = None

        ParameterValidator.assert_type_and_value(flatten, bool, location,
                                                 "flatten")
        ParameterValidator.assert_type_and_value(sequence_type, str, location,
                                                 'sequence_type')
        ParameterValidator.assert_in_valid_list(
            sequence_type.upper(), [item.name for item in SequenceType],
            location, 'sequence_type')

        return {
            "use_positional_info": use_positional_info,
            "distance_to_seq_middle": distance_to_seq_middle,
            "flatten": flatten,
            "sequence_type": SequenceType[sequence_type.upper()],
            "name": name
        }
コード例 #17
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=symbol_table.get(setting["ml_method"]),
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
コード例 #18
0
ファイル: Util.py プロジェクト: sailfish009/immuneML
    def check_export_format(specs: dict, tool_name: str,
                            instruction_name: str):
        ParameterValidator.assert_keys_present(
            list(specs['instructions'][instruction_name].keys()),
            ["export_formats"], tool_name,
            f"{instruction_name}/export_formats")
        ParameterValidator.assert_type_and_value(
            specs['instructions'][instruction_name]["export_formats"], list,
            tool_name, f"{instruction_name}/export_formats")

        assert len(specs['instructions'][instruction_name]["export_formats"]) == 1, \
            f"{tool_name}: only one format can be specified under export_formats parameter under " \
            f"{instruction_name}/export_formats, got {specs['instructions'][instruction_name]['export_formats']} instead."

        return specs['instructions'][instruction_name]["export_formats"][0]
コード例 #19
0
    def _check_instruction(self, specs):
        instruction_name = Util.check_instruction_type(
            specs, DatasetGenerationTool.__name__,
            DatasetExportInstruction.__name__[:-11])

        for key in ['datasets', 'export_formats']:
            ParameterValidator.assert_keys_present(
                list(specs['instructions'][instruction_name].keys()), [key],
                DatasetGenerationTool.__name__, instruction_name)
            ParameterValidator.assert_type_and_value(
                specs["instructions"][instruction_name][key], list,
                DatasetGenerationTool.__name__, f"{instruction_name}/{key}")

            assert len(specs['instructions'][instruction_name][key]) == 1, \
                f"{DatasetGenerationTool.__name__}: this tool accepts only one item under {key}, got {specs['instructions'][instruction_name][key]} " \
                f"instead."
コード例 #20
0
    def _check_dataset_specs(self, workflow_specification, location):
        ParameterValidator.assert_type_and_value(
            workflow_specification['definitions'], dict, location,
            'definitions')
        ParameterValidator.assert_keys_present(
            workflow_specification['definitions'].keys(), ['datasets'],
            location, 'definitions')
        ParameterValidator.assert_type_and_value(
            workflow_specification['definitions']['datasets'], dict, location,
            'datasets')

        dataset_names = list(
            workflow_specification['definitions']['datasets'].keys())

        assert len(dataset_names) > 1, \
            f"MultiDatasetBenchmarkTool: there is only one dataset specified ({dataset_names[0]}), while this tool operates on multiple datasets. " \
            f"If only one dataset is needed, consider using the training instruction directly."
コード例 #21
0
ファイル: LabelHelper.py プロジェクト: uio-bmi/immuneML
    def check_label_format(labels: list, instruction_name: str,
                           yaml_location: str):
        ParameterValidator.assert_type_and_value(labels, list,
                                                 instruction_name,
                                                 f'{yaml_location}/labels')

        assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \
            f"{instruction_name}: labels under {yaml_location} were not defined properly. The list of labels has to either be a list of " \
            f"label names, or there can be a parameter 'positive_class' defined under the label name, for example:\n" \
            f"labels: # one label with no positive class (T1D) and one with a positive class (CMV)\n" \
            f"- T1D\n" \
            f"- CMV: # when defining a positive class, make sure to use the correct indentation\n" \
            f"    positive_class: True\n" \

        assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0]
                   and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \
            f"{instruction_name}: The only legal parameter under a label name is 'positive_class'. If 'positive_class' is not specified, please remove the colon after the label name. "
コード例 #22
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> SubsamplingInstruction:
        valid_keys = [
            "type", "dataset", "subsampled_dataset_sizes",
            "dataset_export_formats"
        ]
        ParameterValidator.assert_keys(instruction.keys(), valid_keys,
                                       SubsamplingParser.__name__, key)

        dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET)
        ParameterValidator.assert_in_valid_list(instruction['dataset'],
                                                dataset_keys,
                                                SubsamplingParser.__name__,
                                                f'{key}/dataset')

        dataset = symbol_table.get(instruction['dataset'])
        ParameterValidator.assert_type_and_value(
            instruction['subsampled_dataset_sizes'], list,
            SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes')
        ParameterValidator.assert_all_type_and_value(
            instruction['subsampled_dataset_sizes'], int,
            SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1,
            dataset.get_example_count())

        valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataExporter, 'Exporter', "dataset_export/")
        ParameterValidator.assert_type_and_value(
            instruction['dataset_export_formats'], list,
            SubsamplingParser.__name__, f"{key}/dataset_export_formats")
        ParameterValidator.assert_all_in_valid_list(
            instruction['dataset_export_formats'], valid_export_formats,
            SubsamplingParser.__name__, f"{key}/dataset_export_formats")

        return SubsamplingInstruction(
            dataset=dataset,
            subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'],
            dataset_export_formats=[
                ReflectionHandler.get_class_by_name(export_format + "Exporter",
                                                    "dataset_export/")
                for export_format in instruction['dataset_export_formats']
            ],
            name=key)
コード例 #23
0
    def build_object(cls, **kwargs):
        location = "DeepRCMotifDiscovery"
        name = kwargs["name"] if "name" in kwargs else None
        ParameterValidator.assert_type_and_value(kwargs["n_steps"],
                                                 int,
                                                 location,
                                                 "n_steps",
                                                 min_inclusive=1)
        ParameterValidator.assert_type_and_value(kwargs["threshold"],
                                                 float,
                                                 location,
                                                 "threshold",
                                                 min_inclusive=0,
                                                 max_inclusive=1)

        return DeepRCMotifDiscovery(n_steps=kwargs["n_steps"],
                                    threshold=kwargs["threshold"],
                                    name=name)
コード例 #24
0
    def build_object(cls, **kwargs):

        ParameterValidator.assert_keys(kwargs.keys(),
                                       ['metadata_labels', 'name'],
                                       ConfounderAnalysis.__name__,
                                       ConfounderAnalysis.__name__)
        ParameterValidator.assert_type_and_value(kwargs['metadata_labels'],
                                                 list,
                                                 ConfounderAnalysis.__name__,
                                                 'metadata_labels')
        ParameterValidator.assert_all_type_and_value(
            kwargs['metadata_labels'], str, ConfounderAnalysis.__name__,
            'metadata_labels')
        ParameterValidator.assert_type_and_value(kwargs['name'], str,
                                                 ConfounderAnalysis.__name__,
                                                 'name')

        return ConfounderAnalysis(metadata_labels=kwargs['metadata_labels'],
                                  name=kwargs['name'])
コード例 #25
0
ファイル: Coefficients.py プロジェクト: knutdrand/immuneML
    def build_object(cls, **kwargs):
        location = "Coefficients"
        coefs_to_plot = [coef.upper() for coef in kwargs["coefs_to_plot"]]

        name = kwargs["name"] if "name" in kwargs else None

        ParameterValidator.assert_all_in_valid_list(
            coefs_to_plot,
            [item.name.upper() for item in CoefficientPlottingSetting],
            location, "coefs_to_plot")

        if CoefficientPlottingSetting.CUTOFF.name in coefs_to_plot:
            cutoff = kwargs["cutoff"]
            ParameterValidator.assert_type_and_value(cutoff, list, location,
                                                     "cutoff")
            ParameterValidator.assert_all_type_and_value(cutoff,
                                                         Number,
                                                         location,
                                                         "cutoff",
                                                         min_inclusive=1e-15)
        else:
            cutoff = []

        if CoefficientPlottingSetting.N_LARGEST.name in coefs_to_plot:
            n_largest = kwargs["n_largest"]
            ParameterValidator.assert_type_and_value(n_largest, list, location,
                                                     "n_largest")
            ParameterValidator.assert_all_type_and_value(n_largest,
                                                         int,
                                                         location,
                                                         "n_largest",
                                                         min_inclusive=1)
        else:
            n_largest = []

        coefs = CoefficientPlottingSettingList()
        for keyword in coefs_to_plot:
            coefs.append(CoefficientPlottingSetting[keyword.upper()])

        return Coefficients(coefs_to_plot=coefs,
                            cutoff=cutoff,
                            n_largest=n_largest,
                            name=name)
コード例 #26
0
    def _prepare_parameters(max_edit_distance: int,
                            reference: dict,
                            name: str = None):
        location = "MatchedSequencesEncoder"

        ParameterValidator.assert_type_and_value(max_edit_distance,
                                                 int,
                                                 location,
                                                 "max_edit_distance",
                                                 min_inclusive=0)

        reference_sequences = MatchedReferenceUtil.prepare_reference(
            reference_params=reference, location=location, paired=False)

        return {
            "max_edit_distance": max_edit_distance,
            "reference_sequences": reference_sequences,
            "name": name
        }
コード例 #27
0
ファイル: MLParser.py プロジェクト: dn070017/immuneML
    def create_method_instance(ml_specification: dict, ml_method_class,
                               key: str) -> tuple:

        ml_params = {}

        if ml_specification[ml_method_class.__name__] is None or len(
                ml_specification[ml_method_class.__name__].keys()) == 0:
            ml_method = ml_method_class()
        else:
            ml_params = ml_specification[ml_method_class.__name__]
            init_method_keys = inspect.signature(
                ml_method_class.__init__).parameters.keys()
            if any(
                [isinstance(ml_params[key], list) for key in ml_params.keys()
                 ]) and "parameter_grid" in init_method_keys:

                ParameterValidator.assert_type_and_value(
                    ml_specification['model_selection_cv'],
                    bool,
                    MLParser.__name__,
                    f'{key}: model_selection_cv',
                    exact_value=True)
                ParameterValidator.assert_type_and_value(
                    ml_specification['model_selection_n_folds'], int,
                    MLParser.__name__, f'{key}: model_selection_n_folds', 2)

                ml_method = ml_method_class(
                    parameter_grid={
                        key: [ml_params[key]] if not isinstance(
                            ml_params[key], list) else ml_params[key]
                        for key in ml_params.keys()
                    })

            elif len(init_method_keys) == 3 and all(
                    arg in init_method_keys
                    for arg in ["parameters", "parameter_grid"]):
                ml_method = ml_method_class(parameters=ml_params)
            else:
                ml_method = ml_method_class(**ml_params)

        return ml_method, ml_params
コード例 #28
0
    def create_method_instance(ml_specification: dict, ml_method_class,
                               key: str) -> tuple:

        ml_params = {}

        if ml_specification[ml_method_class.__name__] is None or len(
                ml_specification[ml_method_class.__name__].keys()) == 0:
            ml_method = ml_method_class()
        else:
            ml_params = ml_specification[ml_method_class.__name__]
            init_method_keys = inspect.signature(
                ml_method_class.__init__).parameters.keys()
            if any(
                [isinstance(ml_params[key], list) for key in ml_params.keys()
                 ]) and "parameter_grid" in init_method_keys:

                ParameterValidator.assert_type_and_value(
                    ml_specification['model_selection_cv'], bool,
                    MLParser.__name__, f'{key}: model_selection_cv')
                assert ml_specification['model_selection_cv'] == True, f"MLParser: when running ML method {key} with a list of inputs, model_selection_cv must be True! " \
                                                                       f"Set the parameters for {key} to single values (not lists) or set model_selection_cv to True and model_selection_n_folds to >= 2"

                ParameterValidator.assert_type_and_value(
                    ml_specification['model_selection_n_folds'], int,
                    MLParser.__name__, f'{key}: model_selection_n_folds', 2)

                ml_method = ml_method_class(
                    parameter_grid={
                        key: [ml_params[key]] if not isinstance(
                            ml_params[key], list) else ml_params[key]
                        for key in ml_params.keys()
                    })

            elif len(init_method_keys) == 3 and all(
                    arg in init_method_keys
                    for arg in ["parameters", "parameter_grid"]):
                ml_method = ml_method_class(parameters=ml_params)
            else:
                ml_method = ml_method_class(**ml_params)

        return ml_method, ml_params
コード例 #29
0
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable,
              path: Path) -> MLApplicationInstruction:
        location = MLApplicationParser.__name__
        ParameterValidator.assert_keys(instruction.keys(), [
            'type', 'dataset', 'number_of_processes', 'config_path',
            'store_encoded_data'
        ], location, key)
        ParameterValidator.assert_in_valid_list(
            instruction['dataset'],
            symbol_table.get_keys_by_type(SymbolType.DATASET), location,
            f"{key}: dataset")
        ParameterValidator.assert_type_and_value(
            instruction['number_of_processes'],
            int,
            location,
            f"{key}: number_of_processes",
            min_inclusive=1)
        ParameterValidator.assert_type_and_value(instruction['config_path'],
                                                 str, location,
                                                 f'{key}: config_path')
        ParameterValidator.assert_type_and_value(
            instruction['store_encoded_data'], bool, location,
            f'{key}: store_encoded_data')

        hp_setting, label = self._parse_hp_setting(instruction, path, key)

        instruction = MLApplicationInstruction(
            dataset=symbol_table.get(instruction['dataset']),
            name=key,
            number_of_processes=instruction['number_of_processes'],
            label_configuration=LabelConfiguration([label]),
            hp_setting=hp_setting,
            store_encoded_data=instruction['store_encoded_data'])

        return instruction
コード例 #30
0
ファイル: OneHotEncoder.py プロジェクト: FBernal-oPs/immuneML
    def _prepare_parameters(use_positional_info,
                            distance_to_seq_middle,
                            flatten,
                            name: str = None):

        location = OneHotEncoder.__name__

        ParameterValidator.assert_type_and_value(use_positional_info, bool,
                                                 location,
                                                 "use_positional_info")
        if use_positional_info:
            ParameterValidator.assert_type_and_value(distance_to_seq_middle,
                                                     int,
                                                     location,
                                                     "distance_to_seq_middle",
                                                     min_inclusive=1)
        else:
            distance_to_seq_middle = None

        ParameterValidator.assert_type_and_value(flatten, bool, location,
                                                 "flatten")

        return {
            "use_positional_info": use_positional_info,
            "distance_to_seq_middle": distance_to_seq_middle,
            "flatten": flatten,
            "name": name
        }