def _prepare_report_config(self, instruction_key, instruction, split_key, symbol_table): if "reports" in instruction[split_key]: location = f"{instruction_key}/{split_key}/reports" report_types = list(signature(ReportConfig).parameters.keys()) ParameterValidator.assert_all_in_valid_list( instruction[split_key]["reports"].keys(), report_types, location, "reports") for report_type in instruction[split_key]["reports"]: ParameterValidator.assert_type_and_value( instruction[split_key]["reports"][report_type], list, f"{location}/{report_type}", report_type) report_config_input = { report_type: { report_id: symbol_table.get(report_id) for report_id in instruction[split_key]["reports"] [report_type] } for report_type in instruction[split_key]["reports"] } else: report_config_input = {} return report_config_input
def build_object(cls, **kwargs): location = "Coefficients" coefs_to_plot = [coef.upper() for coef in kwargs["coefs_to_plot"]] name = kwargs["name"] if "name" in kwargs else None ParameterValidator.assert_all_in_valid_list(coefs_to_plot, [item.name.upper() for item in CoefficientPlottingSetting], location, "coefs_to_plot") if CoefficientPlottingSetting.CUTOFF.name in coefs_to_plot: cutoff = kwargs["cutoff"] ParameterValidator.assert_type_and_value(cutoff, list, location, "cutoff") ParameterValidator.assert_all_type_and_value(cutoff, Number, location, "cutoff", min_inclusive=1e-15) else: cutoff = [] if CoefficientPlottingSetting.N_LARGEST.name in coefs_to_plot: n_largest = kwargs["n_largest"] ParameterValidator.assert_type_and_value(n_largest, list, location, "n_largest") ParameterValidator.assert_all_type_and_value(n_largest, int, location, "n_largest", min_inclusive=1) else: n_largest = [] coefs = CoefficientPlottingSettingList() for keyword in coefs_to_plot: coefs.append(CoefficientPlottingSetting[keyword.upper()]) return Coefficients(coefs, cutoff, n_largest, name)
def _prepare_specs(self): with open(self.yaml_path, "r") as file: specs = yaml.safe_load(file) ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], GalaxyTrainMLModel.__name__, "YAML specification") ParameterValidator.assert_all_in_valid_list( specs.keys(), ["definitions", "instructions", "output"], GalaxyTrainMLModel.__name__, "YAML specification") ParameterValidator.assert_type_and_value(specs["instructions"], dict, GalaxyTrainMLModel.__name__, "instructions") assert len(list(specs["instructions"].keys())) == 1, f"{GalaxyTrainMLModel.__name__}: one instruction has to be specified under " \ f"`instructions`, got the following instead: {list(specs['instructions'].keys())}." self.instruction_name = list(specs["instructions"].keys())[0] ParameterValidator.assert_type_and_value( specs['instructions'][self.instruction_name], dict, GalaxyTrainMLModel.__name__, self.instruction_name) ParameterValidator.assert_keys_present( specs['instructions'][self.instruction_name].keys(), ['type'], GalaxyTrainMLModel.__name__, self.instruction_name) assert specs['instructions'][self.instruction_name]['type'] == TrainMLModelInstruction.__name__[:-11], \ f"{GalaxyTrainMLModel.__name__}: instruction `type` under {self.instruction_name} has to be {TrainMLModelInstruction.__name__[:-11]} " \ f"for this tool." Util.check_paths(specs, GalaxyTrainMLModel.__name__) Util.update_result_paths(specs, self.result_path, self.yaml_path)
def _prepare_parameters(reference: dict, max_edit_distances: dict, name: str = None): location = "MatchedReceptorsEncoder" legal_chains = [ chain for receptor in (TCABReceptor(), TCGDReceptor(), BCReceptor()) for chain in receptor.get_chains() ] if type(max_edit_distances) is int: max_edit_distances = { chain: max_edit_distances for chain in legal_chains } elif type(max_edit_distances) is dict: ParameterValidator.assert_keys(max_edit_distances.keys(), legal_chains, location, "max_edit_distances", exclusive=False) else: ParameterValidator.assert_type_and_value(max_edit_distances, dict, location, 'max_edit_distances') reference_receptors = MatchedReferenceUtil.prepare_reference( reference, location=location, paired=True) return { "reference_receptors": reference_receptors, "max_edit_distances": max_edit_distances, "name": name }
def _check_label_format(self, labels: list, instruction_key: str): ParameterValidator.assert_type_and_value(labels, list, TrainMLModelParser.__name__, f'{instruction_key}/labels') assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \ f"{TrainMLModelParser.__name__}: labels under {instruction_key} were not defined properly. The list of labels has to either be a list of " \ f"label names, or there can be a parameter 'positive_class' defined under the label name." assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0] and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \ f"{TrainMLModelParser.__name__}: labels that are specified by more than label name, can include only one parameter called 'positive_class'."
def _prepare_parameters(max_edit_distance: int, reference: dict, name: str = None): location = "MatchedSequencesEncoder" ParameterValidator.assert_type_and_value(max_edit_distance, int, location, "max_edit_distance", min_inclusive=0) reference_sequences = MatchedReferenceUtil.prepare_reference(reference_params=reference, location=location, paired=False) return { "max_edit_distance": max_edit_distance, "reference_sequences": reference_sequences, "name": name }
def _check_instruction(self, specs): instruction_name = Util.check_instruction_type(specs, DatasetGenerationTool.__name__, DatasetExportInstruction.__name__[:-11]) for key in ['datasets', 'export_formats']: ParameterValidator.assert_keys_present(list(specs['instructions'][instruction_name].keys()), [key], DatasetGenerationTool.__name__, instruction_name) ParameterValidator.assert_type_and_value(specs["instructions"][instruction_name][key], list, DatasetGenerationTool.__name__, f"{instruction_name}/{key}") assert len(specs['instructions'][instruction_name][key]) == 1, \ f"{DatasetGenerationTool.__name__}: this tool accepts only one item under {key}, got {specs['instructions'][instruction_name][key]} " \ f"instead."
def __init__(self, percentage: float, show_warnings: bool = True): super().__init__() ParameterValidator.assert_type_and_value(percentage, float, "TCRdistClassifier", "percentage", min_inclusive=0., max_inclusive=1.) self.percentage = percentage self.k = None self.label = None self.show_warnings = show_warnings
def _parse_settings(self, instruction: dict, symbol_table: SymbolTable) -> list: try: settings = [] for index, setting in enumerate(instruction["settings"]): if "preprocessing" in setting: ParameterValidator.assert_type_and_value( setting["preprocessing"], str, TrainMLModelParser.__name__, f'settings: {index+1}. ' f'element: preprocessing') if symbol_table.contains(setting["preprocessing"]): preprocessing_sequence = symbol_table.get( setting["preprocessing"]) preproc_name = setting["preprocessing"] else: raise KeyError( f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value " f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under " f"definitions: {PreprocessingParser.keyword}.") else: setting["preprocessing"] = None preprocessing_sequence = [] preproc_name = None ParameterValidator.assert_keys( setting.keys(), ["preprocessing", "ml_method", "encoding"], TrainMLModelParser.__name__, f"settings, {index + 1}. entry") encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]), **symbol_table.get_config(setting["encoding"])["encoder_params"])\ .set_context({"dataset": symbol_table.get(instruction['dataset'])}) s = HPSetting(encoder=encoder, encoder_name=setting["encoding"], encoder_params=symbol_table.get_config( setting["encoding"])["encoder_params"], ml_method=symbol_table.get(setting["ml_method"]), ml_method_name=setting["ml_method"], ml_params=symbol_table.get_config( setting["ml_method"]), preproc_sequence=preprocessing_sequence, preproc_sequence_name=preproc_name) settings.append(s) return settings except KeyError as key_error: raise KeyError( f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction." )
def check_export_format(specs: dict, tool_name: str, instruction_name: str): ParameterValidator.assert_keys_present( list(specs['instructions'][instruction_name].keys()), ["export_formats"], tool_name, f"{instruction_name}/export_formats") ParameterValidator.assert_type_and_value( specs['instructions'][instruction_name]["export_formats"], list, tool_name, f"{instruction_name}/export_formats") assert len(specs['instructions'][instruction_name]["export_formats"]) == 1, \ f"{tool_name}: only one format can be specified under export_formats parameter under " \ f"{instruction_name}/export_formats, got {specs['instructions'][instruction_name]['export_formats']} instead." return specs['instructions'][instruction_name]["export_formats"][0]
def build_object(cls, **kwargs): if kwargs["additional_node_attributes"] is None: kwargs["additional_node_attributes"] = [] if kwargs["additional_edge_attributes"] is None: kwargs["additional_edge_attributes"] = [] ParameterValidator.assert_type_and_value( kwargs["additional_node_attributes"], list, "CytoscapeNetworkExporter", "additional_node_attributes") ParameterValidator.assert_type_and_value( kwargs["additional_edge_attributes"], list, "CytoscapeNetworkExporter", "additional_edge_attributes") return CytoscapeNetworkExporter(**kwargs)
def _check_dataset_specs(self, workflow_specification, location): ParameterValidator.assert_type_and_value( workflow_specification['definitions'], dict, location, 'definitions') ParameterValidator.assert_keys_present( workflow_specification['definitions'].keys(), ['datasets'], location, 'definitions') ParameterValidator.assert_type_and_value( workflow_specification['definitions']['datasets'], dict, location, 'datasets') dataset_names = list( workflow_specification['definitions']['datasets'].keys()) assert len(dataset_names) > 1, \ f"MultiDatasetBenchmarkTool: there is only one dataset specified ({dataset_names[0]}), while this tool operates on multiple datasets. " \ f"If only one dataset is needed, consider using the training instruction directly."
def build_object(cls, **kwargs): location = "DeepRCMotifDiscovery" name = kwargs["name"] if "name" in kwargs else None ParameterValidator.assert_type_and_value(kwargs["n_steps"], int, location, "n_steps", min_inclusive=1) ParameterValidator.assert_type_and_value(kwargs["threshold"], float, location, "threshold", min_inclusive=0, max_inclusive=1) return DeepRCMotifDiscovery(n_steps=kwargs["n_steps"], threshold=kwargs["threshold"], name=name)
def build_object(cls, **kwargs): location = cls.__name__ ParameterValidator.assert_keys( kwargs.keys(), ["filter_sequence_type", "batch_size", "count_agg"], location, "DuplicateSequenceFilter") ParameterValidator.assert_in_valid_list( kwargs["filter_sequence_type"].upper(), [item.name for item in SequenceType], location, "filter_sequence_type") ParameterValidator.assert_in_valid_list( kwargs["count_agg"].upper(), [item.name for item in CountAggregationFunction], location, "count_agg") ParameterValidator.assert_type_and_value(kwargs["batch_size"], int, location, "batch_size", 1) return DuplicateSequenceFilter( filter_sequence_type=SequenceType[ kwargs["filter_sequence_type"].upper()], batch_size=kwargs["batch_size"], count_agg=CountAggregationFunction[kwargs["count_agg"].upper()])
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> SubsamplingInstruction: valid_keys = ["type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats"] ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key) dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET) ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset') dataset = symbol_table.get(instruction['dataset']) ParameterValidator.assert_type_and_value(instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes') ParameterValidator.assert_all_type_and_value(instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count()) valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', "dataset_export/") ParameterValidator.assert_type_and_value(instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats") ParameterValidator.assert_all_in_valid_list(instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats") return SubsamplingInstruction(dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'], dataset_export_formats=[ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/") for export_format in instruction['dataset_export_formats']], name=key)
def __init__(self, k: int, skip_first_n_aa: int, skip_last_n_aa: int, abundance: str, normalize_all_features: bool, name: str = None): location = "AtchleyKmerEncoder" ParameterValidator.assert_type_and_value(k, int, location, "k", 1) ParameterValidator.assert_type_and_value(skip_first_n_aa, int, location, "skip_first_n_aa", 0) ParameterValidator.assert_type_and_value(skip_last_n_aa, int, location, "skip_last_n_aa", 0) ParameterValidator.assert_in_valid_list(abundance.upper(), [ab.name for ab in RelativeAbundanceType], location, "abundance") ParameterValidator.assert_type_and_value(normalize_all_features, bool, location, "normalize_all_features") self.k = k self.skip_first_n_aa = skip_first_n_aa self.skip_last_n_aa = skip_last_n_aa self.abundance = RelativeAbundanceType[abundance.upper()] self.normalize_all_features = normalize_all_features self.name = name self.scaler_path = None self.vectorizer_path = None
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str) -> MLApplicationInstruction: location = MLApplicationParser.__name__ ParameterValidator.assert_keys(instruction.keys(), ['type', 'dataset', 'label', 'pool_size', 'config_path', 'store_encoded_data'], location, key) ParameterValidator.assert_in_valid_list(instruction['dataset'], symbol_table.get_keys_by_type(SymbolType.DATASET), location, f"{key}: dataset") ParameterValidator.assert_type_and_value(instruction['pool_size'], int, location, f"{key}: pool_size", min_inclusive=1) ParameterValidator.assert_type_and_value(instruction['label'], str, location, f'{key}: label') ParameterValidator.assert_type_and_value(instruction['config_path'], str, location, f'{key}: config_path') ParameterValidator.assert_type_and_value(instruction['store_encoded_data'], bool, location, f'{key}: store_encoded_data') hp_setting, label = self._parse_hp_setting(instruction, path, key) instruction = MLApplicationInstruction(dataset=symbol_table.get(instruction['dataset']), name=key, pool_size=instruction['pool_size'], label_configuration=LabelConfiguration([label]), hp_setting=hp_setting, store_encoded_data=instruction['store_encoded_data']) return instruction
def create_method_instance(ml_specification: dict, ml_method_class, key: str) -> tuple: ml_params = {} if ml_specification[ml_method_class.__name__] is None or len(ml_specification[ml_method_class.__name__].keys()) == 0: ml_method = ml_method_class() else: ml_params = ml_specification[ml_method_class.__name__] init_method_keys = inspect.signature(ml_method_class.__init__).parameters.keys() if any([isinstance(ml_params[key], list) for key in ml_params.keys()]) and "parameter_grid" in init_method_keys: ParameterValidator.assert_type_and_value(ml_specification['model_selection_cv'], bool, MLParser.__name__, f'{key}: model_selection_cv', exact_value=True) ParameterValidator.assert_type_and_value(ml_specification['model_selection_n_folds'], int, MLParser.__name__, f'{key}: model_selection_n_folds', 2) ml_method = ml_method_class(parameter_grid={key: [ml_params[key]] if not isinstance(ml_params[key], list) else ml_params[key] for key in ml_params.keys()}) elif len(init_method_keys) == 3 and all(arg in init_method_keys for arg in ["parameters", "parameter_grid"]): ml_method = ml_method_class(parameters=ml_params) else: ml_method = ml_method_class(**ml_params) return ml_method, ml_params
def _prepare_parameters(vector_size: int, k: int, model_type: str, name: str = None): location = "Word2VecEncoder" ParameterValidator.assert_type_and_value(vector_size, int, location, "vector_size", min_inclusive=1) ParameterValidator.assert_type_and_value(k, int, location, "k", min_inclusive=1) ParameterValidator.assert_in_valid_list( model_type.upper(), [item.name for item in ModelType], location, "model_type") return { "vector_size": vector_size, "k": k, "model_type": ModelType[model_type.upper()], "name": name }
def _check_instruction_specs(self, workflow_specification, location): ParameterValidator.assert_type_and_value( workflow_specification['instructions'], dict, location, 'instructions') instruction_names = list(workflow_specification['instructions'].keys()) assert len(instruction_names) == 1, f"MultiDatasetBenchmarkTool: there can be only one instruction specified for this tool. " \ f"Currently the following instructions are specified: {instruction_names}." ParameterValidator.assert_keys_present( workflow_specification['instructions'][ instruction_names[0]].keys(), ['type', 'datasets'], location, instruction_names[0]) instruction_type = workflow_specification['instructions'][ instruction_names[0]]['type'] assert instruction_type == 'TrainMLModel', \ f"MultiDatasetBenchmarkTool: this tool works only with instruction of type 'TrainMLModel', got {instruction_type} instead." datasets_in_instruction = workflow_specification['instructions'][ instruction_names[0]]['datasets'] assert len(datasets_in_instruction) > 1, \ f'{location}: this tool takes a multiple dataset names as input, but only {len(datasets_in_instruction)} were provided: ' \ f'{datasets_in_instruction}.'
def _parse_dataset(key: str, dataset_specs: dict, symbol_table: SymbolTable, result_path: str) -> SymbolTable: location = "ImportParser" ParameterValidator.assert_keys(list(dataset_specs.keys()), ImportParser.valid_keys, location, f"datasets:{key}", False) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataImport, "Import", "IO/dataset_import/") ParameterValidator.assert_in_valid_list(dataset_specs["format"], valid_formats, location, "format") import_cls = ReflectionHandler.get_class_by_name("{}Import".format(dataset_specs["format"])) params = ImportParser._prepare_params(dataset_specs, result_path, key) if "is_repertoire" in params: ParameterValidator.assert_type_and_value(params["is_repertoire"], bool, location, "is_repertoire") if params["is_repertoire"] == True: if import_cls != IReceptorImport: assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/" ParameterValidator.assert_type_and_value(params["metadata_file"], str, location, "metadata_file") if params["is_repertoire"] == False: assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/" ParameterValidator.assert_type_and_value(params["paired"], bool, location, "paired") if params["paired"] == True: assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/" ParameterValidator.assert_in_valid_list(params["receptor_chains"], ["_".join(cp.value) for cp in ChainPair], location, "receptor_chains") try: dataset = import_cls.import_dataset(params, key) dataset.name = key symbol_table.add(key, SymbolType.DATASET, dataset) except KeyError as key_error: raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. " f"The keyword {key_error.args[0]} was missing. This either means this argument was " f"not defined under definitions/datasets/{key}/params, or this column was missing from " f"an input data file. ") except Exception as ex: raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.") return symbol_table
def build_object(cls, **kwargs): location = "MLSettingsPerformance" single_axis_labels = kwargs["single_axis_labels"] ParameterValidator.assert_type_and_value(single_axis_labels, bool, location, "single_axis_labels") if single_axis_labels: x_label_position = kwargs["x_label_position"] ParameterValidator.assert_type_and_value(x_label_position, float, location, "x_label_position") y_label_position = kwargs["y_label_position"] ParameterValidator.assert_type_and_value(y_label_position, float, location, "y_label_position") else: x_label_position = None y_label_position = None name = kwargs["name"] if "name" in kwargs else None return MLSettingsPerformance(single_axis_labels, x_label_position, y_label_position, name)
def check(self, v): ParameterValidator.assert_type_and_value(v, Cell, "CellList", "new item")
def check(self, v): ParameterValidator.assert_type_and_value( v, CoefficientPlottingSetting, "CoefficientPlottingSettingList", "new item")
def check(self, v): ParameterValidator.assert_type_and_value(v, ReceptorSequence, "ReceptorSequenceList", "new item")
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> TrainMLModelInstruction: valid_keys = [ "assessment", "selection", "dataset", "strategy", "labels", "metrics", "settings", "number_of_processes", "type", "reports", "optimization_metric", 'refit_optimal_model', 'store_encoded_data' ] ParameterValidator.assert_type_and_value(instruction['settings'], list, TrainMLModelParser.__name__, 'settings') ParameterValidator.assert_keys(list(instruction.keys()), valid_keys, TrainMLModelParser.__name__, "TrainMLModel") ParameterValidator.assert_type_and_value( instruction['refit_optimal_model'], bool, TrainMLModelParser.__name__, 'refit_optimal_model') ParameterValidator.assert_type_and_value(instruction['metrics'], list, TrainMLModelParser.__name__, 'metrics') ParameterValidator.assert_type_and_value( instruction['optimization_metric'], str, TrainMLModelParser.__name__, 'optimization_metric') ParameterValidator.assert_type_and_value( instruction['number_of_processes'], int, TrainMLModelParser.__name__, 'number_of_processes') ParameterValidator.assert_type_and_value(instruction['strategy'], str, TrainMLModelParser.__name__, 'strategy') ParameterValidator.assert_type_and_value( instruction['store_encoded_data'], bool, TrainMLModelParser.__name__, 'store_encoded_data') settings = self._parse_settings(instruction, symbol_table) dataset = symbol_table.get(instruction["dataset"]) assessment = self._parse_split_config(key, instruction, "assessment", symbol_table, len(settings)) selection = self._parse_split_config(key, instruction, "selection", symbol_table, len(settings)) assessment, selection = self._update_split_configs( assessment, selection, dataset) label_config = self._create_label_config(instruction, dataset, key) strategy = ReflectionHandler.get_class_by_name( instruction["strategy"], "hyperparameter_optimization/") metrics = {Metric[metric.upper()] for metric in instruction["metrics"]} optimization_metric = Metric[ instruction["optimization_metric"].upper()] metric_search_criterion = Metric.get_search_criterion( optimization_metric) path = self._prepare_path(instruction) context = self._prepare_context(instruction, symbol_table) reports = self._prepare_reports(instruction["reports"], symbol_table) hp_instruction = TrainMLModelInstruction( dataset=dataset, hp_strategy=strategy(settings, metric_search_criterion), hp_settings=settings, assessment=assessment, selection=selection, metrics=metrics, optimization_metric=optimization_metric, refit_optimal_model=instruction['refit_optimal_model'], label_configuration=label_config, path=path, context=context, store_encoded_data=instruction['store_encoded_data'], number_of_processes=instruction["number_of_processes"], reports=reports, name=key) return hp_instruction
def _prepare_parameters(normalization_type: str, reads: str, sequence_encoding: str, k: int = 0, k_left: int = 0, k_right: int = 0, min_gap: int = 0, max_gap: int = 0, metadata_fields_to_include: list = None, name: str = None, scale_to_unit_variance: bool = False, scale_to_zero_mean: bool = False): location = KmerFrequencyEncoder.__name__ ParameterValidator.assert_in_valid_list( normalization_type.upper(), [item.name for item in NormalizationType], location, "normalization_type") ParameterValidator.assert_in_valid_list( reads.upper(), [item.name for item in ReadsType], location, "reads") ParameterValidator.assert_in_valid_list( sequence_encoding.upper(), [item.name for item in SequenceEncodingType], location, "sequence_encoding") ParameterValidator.assert_type_and_value(scale_to_zero_mean, bool, location, "scale_to_zero_mean") ParameterValidator.assert_type_and_value(scale_to_unit_variance, bool, location, "scale_to_unit_variance") vars_to_check = { "k": k, "k_left": k_left, "k_right": k_right, "min_gap": min_gap, "max_gap": max_gap } for param in vars_to_check.keys(): ParameterValidator.assert_type_and_value(vars_to_check[param], int, location, param, min_inclusive=0) if "gap" in sequence_encoding.lower(): assert k_left != 0 and k_right != 0, f"KmerFrequencyEncoder: sequence encoding {sequence_encoding} was chosen, but k_left " \ f"({k_left}) or k_right ({k_right}) have to be set and larger than 0." return { "normalization_type": NormalizationType[normalization_type.upper()], "reads": ReadsType[reads.upper()], "sequence_encoding": SequenceEncodingType[sequence_encoding.upper()], "name": name, "scale_to_zero_mean": scale_to_zero_mean, "scale_to_unit_variance": scale_to_unit_variance, **vars_to_check }