def import_dataset(params, name: str) -> SequenceDataset: """ Returns randomly generated receptor dataset according to the parameters; YAML specification: result_path: path/where/to/store/results/ sequence_count: 100 # number of random sequences to generate chain_1_length_probabilities: 14: 0.8 # 80% of all generated sequences for all sequences will have length 14 15: 0.2 # 20% of all generated sequences across all sequences will have length 15 labels: epitope1: # label name True: 0.5 # 50% of the sequences will have class True False: 0.5 # 50% of the sequences will have class False epitope2: # next label with classes that will be assigned to sequences independently of the previous label or other parameters 1: 0.3 # 30% of the generated sequences will have class 1 0: 0.7 # 70% of the generated sequences will have class 0 """ valid_keys = [ "sequence_count", "length_probabilities", "labels", "result_path" ] ParameterValidator.assert_all_in_valid_list( list(params.keys()), valid_keys, "RandomSequenceDatasetImport", "params") return RandomDatasetGenerator.generate_sequence_dataset( sequence_count=params["sequence_count"], length_probabilities=params["length_probabilities"], labels=params["labels"], path=params["result_path"])
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path) -> MLApplicationInstruction: location = MLApplicationParser.__name__ ParameterValidator.assert_keys(instruction.keys(), [ 'type', 'dataset', 'number_of_processes', 'config_path', 'store_encoded_data' ], location, key) ParameterValidator.assert_in_valid_list( instruction['dataset'], symbol_table.get_keys_by_type(SymbolType.DATASET), location, f"{key}: dataset") ParameterValidator.assert_type_and_value( instruction['number_of_processes'], int, location, f"{key}: number_of_processes", min_inclusive=1) ParameterValidator.assert_type_and_value(instruction['config_path'], str, location, f'{key}: config_path') ParameterValidator.assert_type_and_value( instruction['store_encoded_data'], bool, location, f'{key}: store_encoded_data') hp_setting, label = self._parse_hp_setting(instruction, path, key) instruction = MLApplicationInstruction( dataset=symbol_table.get(instruction['dataset']), name=key, number_of_processes=instruction['number_of_processes'], label_configuration=LabelConfiguration([label]), hp_setting=hp_setting, store_encoded_data=instruction['store_encoded_data']) return instruction
def compute_overlap_matrix(hp_items: List[HPItem]): ParameterValidator.assert_all_type_and_value( [hp_item.encoder for hp_item in hp_items], SequenceAbundanceEncoder, 'Overlap matrix computation', 'encoders') overlap_matrix = np.zeros((len(hp_items), len(hp_items))) import_sequences_as_set = lambda path: set( pd.read_csv(path).apply(frozenset, axis=1).values.tolist()) for index1 in range(len(hp_items)): overlap_matrix[index1, index1] = 100 sequences1 = import_sequences_as_set( hp_items[index1].encoder.relevant_sequence_csv_path) if len(sequences1) == 0: return None for index2 in range(index1 + 1, len(hp_items)): sequences2 = import_sequences_as_set( hp_items[index2].encoder.relevant_sequence_csv_path) if len(sequences2) == 0: return None intersection = sequences1.intersection(sequences2) overlap_matrix[index1, index2] = round( len(intersection) * 100 / min(len(sequences1), len(sequences2)), 2) overlap_matrix[index2, index1] = overlap_matrix[index1, index2] return overlap_matrix
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> SimulationInstruction: ParameterValidator.assert_keys( instruction.keys(), ["dataset", "simulation", "type", "export_formats"], "SimulationParser", key) signals = [ signal.item for signal in symbol_table.get_by_type(SymbolType.SIGNAL) ] simulation = symbol_table.get(instruction["simulation"]) dataset = symbol_table.get(instruction["dataset"]) exporters = self.parse_exporters(instruction) process = SimulationInstruction(signals=signals, simulation=simulation, dataset=dataset, name=key, exporters=exporters) return process
def _prepare_specs(self): with self.yaml_path.open("r") as file: specs = yaml.safe_load(file) ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], GalaxyTrainMLModel.__name__, "YAML specification") ParameterValidator.assert_all_in_valid_list(specs.keys(), ["definitions", "instructions", "output"], GalaxyTrainMLModel.__name__, "YAML specification") ParameterValidator.assert_type_and_value(specs["instructions"], dict, GalaxyTrainMLModel.__name__, "instructions") assert len(list(specs["instructions"].keys())) == 1, f"{GalaxyTrainMLModel.__name__}: one instruction has to be specified under " \ f"`instructions`, got the following instead: {list(specs['instructions'].keys())}." self.instruction_name = list(specs["instructions"].keys())[0] ParameterValidator.assert_type_and_value(specs['instructions'][self.instruction_name], dict, GalaxyTrainMLModel.__name__, self.instruction_name) ParameterValidator.assert_keys_present(specs['instructions'][self.instruction_name].keys(), ['type'], GalaxyTrainMLModel.__name__, self.instruction_name) assert specs['instructions'][self.instruction_name]['type'] == TrainMLModelInstruction.__name__[:-11], \ f"{GalaxyTrainMLModel.__name__}: instruction `type` under {self.instruction_name} has to be {TrainMLModelInstruction.__name__[:-11]} " \ f"for this tool." assert len( specs['instructions'][self.instruction_name]['labels']) == 1, f"{GalaxyTrainMLModel.__name__}: one label has to be specified under " \ f"`labels`, got the following instead: {specs['instructions'][self.instruction_name]['labels']}." Util.check_paths(specs, GalaxyTrainMLModel.__name__) Util.update_result_paths(specs, self.result_path, self.yaml_path)
def build_object(cls, **kwargs): comparison_label = kwargs[ "comparison_label"] if "comparison_label" in kwargs else None color_grouping_label = kwargs[ "color_grouping_label"] if "color_grouping_label" in kwargs else None row_grouping_label = kwargs[ "row_grouping_label"] if "row_grouping_label" in kwargs else None column_grouping_label = kwargs[ "column_grouping_label"] if "column_grouping_label" in kwargs else None log_scale = kwargs["log_scale"] if "log_scale" in kwargs else None keep_fraction = float( kwargs["keep_fraction"]) if "keep_fraction" in kwargs else 1.0 ParameterValidator.assert_type_and_value(keep_fraction, float, "FeatureComparison", "keep_fraction", min_inclusive=0, max_inclusive=1) ParameterValidator.assert_type_and_value(log_scale, bool, "FeatureComparison", "log_scale") assert comparison_label is not None, "FeatureComparison: the parameter 'comparison_label' must be set in order to be able to compare across this label" assert comparison_label != color_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as color_grouping_label" assert comparison_label != row_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as row_grouping_label" assert comparison_label != column_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as column_grouping_label" return FeatureComparison(**kwargs)
def _prepare_report_config(self, instruction_key, instruction, split_key, symbol_table): if "reports" in instruction[split_key] and len( instruction[split_key]["reports"]) > 0: location = f"{instruction_key}/{split_key}/reports" report_types = list(signature(ReportConfig).parameters.keys()) ParameterValidator.assert_all_in_valid_list( instruction[split_key]["reports"].keys(), report_types, location, "reports") for report_type in instruction[split_key]["reports"]: ParameterValidator.assert_type_and_value( instruction[split_key]["reports"][report_type], list, f"{location}/{report_type}", report_type) report_config_input = { report_type: { report_id: symbol_table.get(report_id) for report_id in instruction[split_key]["reports"] [report_type] } for report_type in instruction[split_key]["reports"] } else: report_config_input = {} return report_config_input
def prepare_specs(self): with self.yaml_path.open("r") as file: specs = yaml.safe_load(file) self.instruction_name = Util.check_instruction_type( specs, DataSimulationTool.__name__, self.expected_instruction) self.export_format = Util.check_export_format( specs, DataSimulationTool.__name__, self.instruction_name) ParameterValidator.assert_keys_present(specs["definitions"], ["datasets"], DataSimulationTool.__name__, "definitions/datasets") ParameterValidator.assert_type_and_value( specs['definitions']['datasets'], dict, DataSimulationTool.__name__, "definitions/datasets") dataset_names = list(specs['definitions']['datasets'].keys()) assert len(dataset_names) == 1, f"{DataSimulationTool.__name__}: one dataset has to be defined under definitions/datasets, got " \ f"{dataset_names} instead." self.dataset_name = dataset_names[0] Util.check_paths(specs, DataSimulationTool.__name__) Util.update_result_paths(specs, self.result_path, self.yaml_path)
def prepare_reference(reference_params: dict, location: str, paired: bool): ParameterValidator.assert_keys(list(reference_params.keys()), ["format", "params"], location, "reference") seq_import_params = reference_params["params"] if "params" in reference_params else {} assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \ f"Specify the correct path under reference." if "is_repertoire" in seq_import_params: assert seq_import_params["is_repertoire"] == False, f"{location}: is_repertoire must be False for SequenceImport" else: seq_import_params["is_repertoire"] = False if "paired" in seq_import_params: assert seq_import_params["paired"] == paired, f"{location}: paired must be {paired} for SequenceImport" else: seq_import_params["paired"] = paired format_str = reference_params["format"] import_class = ReflectionHandler.get_class_by_name("{}Import".format(format_str)) default_params = DefaultParamsLoader.load(EnvironmentSettings.default_params_path / "datasets", DefaultParamsLoader.convert_to_snake_case(format_str)) params = {**default_params, **seq_import_params} processed_params = DatasetImportParams.build_object(**params) receptors = ImportHelper.import_items(import_class, reference_params["params"]["path"], processed_params) return receptors
def _prepare_parameters(use_positional_info: bool, distance_to_seq_middle: int, flatten: bool, sequence_type: str, name: str = None): location = OneHotEncoder.__name__ ParameterValidator.assert_type_and_value(use_positional_info, bool, location, "use_positional_info") if use_positional_info: ParameterValidator.assert_type_and_value(distance_to_seq_middle, int, location, "distance_to_seq_middle", min_inclusive=1) else: distance_to_seq_middle = None ParameterValidator.assert_type_and_value(flatten, bool, location, "flatten") ParameterValidator.assert_type_and_value(sequence_type, str, location, 'sequence_type') ParameterValidator.assert_in_valid_list( sequence_type.upper(), [item.name for item in SequenceType], location, 'sequence_type') return { "use_positional_info": use_positional_info, "distance_to_seq_middle": distance_to_seq_middle, "flatten": flatten, "sequence_type": SequenceType[sequence_type.upper()], "name": name }
def __init__(self, k: int, skip_first_n_aa: int, skip_last_n_aa: int, abundance: str, normalize_all_features: bool, name: str = None): location = "AtchleyKmerEncoder" ParameterValidator.assert_type_and_value(k, int, location, "k", 1) ParameterValidator.assert_type_and_value(skip_first_n_aa, int, location, "skip_first_n_aa", 0) ParameterValidator.assert_type_and_value(skip_last_n_aa, int, location, "skip_last_n_aa", 0) ParameterValidator.assert_in_valid_list( abundance.upper(), [ab.name for ab in RelativeAbundanceType], location, "abundance") ParameterValidator.assert_type_and_value(normalize_all_features, bool, location, "normalize_all_features") self.k = k self.skip_first_n_aa = skip_first_n_aa self.skip_last_n_aa = skip_last_n_aa self.abundance = RelativeAbundanceType[abundance.upper()] self.normalize_all_features = normalize_all_features self.name = name self.scaler_path = None self.vectorizer_path = None
def _parse_settings(self, instruction: dict, symbol_table: SymbolTable) -> list: try: settings = [] for index, setting in enumerate(instruction["settings"]): if "preprocessing" in setting and setting[ "preprocessing"] is not None: ParameterValidator.assert_type_and_value( setting["preprocessing"], str, TrainMLModelParser.__name__, f'settings: {index+1}. ' f'element: preprocessing') if symbol_table.contains(setting["preprocessing"]): preprocessing_sequence = symbol_table.get( setting["preprocessing"]) preproc_name = setting["preprocessing"] if not all(preproc.keeps_example_count() for preproc in preprocessing_sequence): raise ValueError( f"{TrainMLModelParser.__name__}: preprocessing sequence {preproc_name} includes preprocessing that " f"change the number of examples at runtime and as such cannot be used with this instruction. See the " f"documentation for the preprocessing or alternatively use them with other instructions." ) else: raise KeyError( f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value " f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under " f"definitions: {PreprocessingParser.keyword}.") else: setting["preprocessing"] = None preprocessing_sequence = [] preproc_name = None ParameterValidator.assert_keys( setting.keys(), ["preprocessing", "ml_method", "encoding"], TrainMLModelParser.__name__, f"settings, {index + 1}. entry") encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]), **symbol_table.get_config(setting["encoding"])["encoder_params"])\ .set_context({"dataset": symbol_table.get(instruction['dataset'])}) ml_method = symbol_table.get(setting["ml_method"]) ml_method.check_encoder_compatibility(encoder) s = HPSetting(encoder=encoder, encoder_name=setting["encoding"], encoder_params=symbol_table.get_config( setting["encoding"])["encoder_params"], ml_method=ml_method, ml_method_name=setting["ml_method"], ml_params=symbol_table.get_config( setting["ml_method"]), preproc_sequence=preprocessing_sequence, preproc_sequence_name=preproc_name) settings.append(s) return settings except KeyError as key_error: raise KeyError( f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction." )
def _prepare_parameters(reference: dict, max_edit_distances: dict, name: str = None): location = "MatchedReceptorsEncoder" legal_chains = [ chain for receptor in (TCABReceptor(), TCGDReceptor(), BCReceptor()) for chain in receptor.get_chains() ] if type(max_edit_distances) is int: max_edit_distances = { chain: max_edit_distances for chain in legal_chains } elif type(max_edit_distances) is dict: ParameterValidator.assert_keys(max_edit_distances.keys(), legal_chains, location, "max_edit_distances", exclusive=False) else: ParameterValidator.assert_type_and_value(max_edit_distances, dict, location, 'max_edit_distances') reference_receptors = MatchedReferenceUtil.prepare_reference( reference, location=location, paired=True) return { "reference_receptors": reference_receptors, "max_edit_distances": max_edit_distances, "name": name }
def add_label(self, label_name: str, values: list = None, auxiliary_labels: list = None, positive_class=None): vals = list(values) if values else None if label_name in self._labels and self._labels[ label_name] is not None and len(self._labels[label_name]) > 0: warnings.warn( "Label " + label_name + " has already been set. Overriding existing values...", Warning) if positive_class is not None: if all(isinstance(val, str) for val in values) and not isinstance(positive_class, str): positive_class = str(positive_class) ParameterValidator.assert_in_valid_list(positive_class, values, Label.__name__, 'positive_class') else: positive_class = self._get_default_positive_class(values) if positive_class: logging.info( f"LabelConfiguration: set default positive class '{positive_class}' for label {label_name}" ) self._labels[label_name] = Label(label_name, vals, auxiliary_labels, positive_class)
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> ExploratoryAnalysisInstruction: exp_analysis_units = {} ParameterValidator.assert_keys( instruction, ["analyses", "type", "number_of_processes"], "ExploratoryAnalysisParser", "ExploratoryAnalysis") ParameterValidator.assert_type_and_value( instruction["number_of_processes"], int, ExploratoryAnalysisParser.__name__, "number_of_processes") for analysis_key, analysis in instruction["analyses"].items(): params = self._prepare_params(analysis, symbol_table, f"{key}/{analysis_key}") params["number_of_processes"] = instruction["number_of_processes"] exp_analysis_units[analysis_key] = ExploratoryAnalysisUnit( **params) process = ExploratoryAnalysisInstruction( exploratory_analysis_units=exp_analysis_units, name=key) return process
def build(cls, **kwargs): ParameterValidator.assert_keys_present( list(kwargs.keys()), ['metadata_file', 'name', 'repertoire_ids', 'metadata_fields'], RepertoireDataset.__name__, "repertoire dataset") repertoires = [] metadata_df = pd.read_csv(kwargs['metadata_file'], comment=Constants.COMMENT_SIGN) for index, row in metadata_df.iterrows(): filename = Path(kwargs['metadata_file']).parent / row['filename'] if not filename.is_file() and 'repertoires' in str(filename): filename = filename.parent.parent / Path(row['filename']).name repertoire = Repertoire(data_filename=filename, metadata_filename=filename.parent / f'{filename.stem}_metadata.yaml', identifier=row['identifier']) repertoires.append(repertoire) if "repertoire_ids" in kwargs.keys( ) and "repertoires" not in kwargs.keys( ) and kwargs['repertoire_ids'] is not None: assert all(rep.identifier == kwargs['repertoire_ids'][i] for i, rep in enumerate(repertoires)), \ f"{RepertoireDataset.__name__}: repertoire ids from the iml_dataset file and metadata file don't match for the dataset " \ f"{kwargs['name']} with identifier {kwargs['identifier']}." return RepertoireDataset(**{**kwargs, **{"repertoires": repertoires}})
def parse_object(specs, valid_class_names: list, class_name_ending: str, class_path: str, location: str, key: str, builder: bool = False, return_params_dict: bool = False): class_name = ObjectParser.get_class_name(specs, valid_class_names, class_name_ending, location, key) ParameterValidator.assert_in_valid_list(class_name, valid_class_names, location, key) cls = ReflectionHandler.get_class_by_name( f"{class_name}{class_name_ending}", class_path) params = ObjectParser.get_all_params(specs, class_path, class_name, key) try: if "name" not in inspect.signature(cls.__init__).parameters.keys(): del params["name"] obj = cls.build_object( **params) if builder and hasattr(cls, "build_object") else cls( **params) except TypeError as err: raise AssertionError( f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} " f"under key {key}. Valid parameter names are: " f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}" ) return (obj, {class_name: params}) if return_params_dict else obj
def _prepare_reports(self, reports: list, symbol_table: SymbolTable) -> dict: if reports is not None: ParameterValidator.assert_type_and_value(reports, list, TrainMLModelParser.__name__, "reports") report_objects = {report_id: symbol_table.get(report_id) for report_id in reports} ParameterValidator.assert_all_type_and_value(report_objects.values(), TrainMLModelReport, TrainMLModelParser.__name__, 'reports') return report_objects else: return {}
def _check_specs(self, workflow_specification): location = 'MultiDatasetBenchmarkTool' ParameterValidator.assert_keys( workflow_specification.keys(), ['definitions', 'instructions', 'output'], location, 'YAML specification') self._check_dataset_specs(workflow_specification, location) self._check_instruction_specs(workflow_specification, location)
def parse(specs: dict, symbol_table: SymbolTable) -> dict: if "output" in specs: ParameterValidator.assert_keys(specs["output"], ["format"], "OutputParser", "output") ParameterValidator.assert_in_valid_list(specs["output"]["format"], ["HTML"], "OutputParser", "format") else: specs["output"] = {"format": "HTML"} symbol_table.add("output", SymbolType.OUTPUT, specs["output"]) return specs["output"]
def _check_label_format(self, labels: list, instruction_key: str): ParameterValidator.assert_type_and_value(labels, list, TrainMLModelParser.__name__, f'{instruction_key}/labels') assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \ f"{TrainMLModelParser.__name__}: labels under {instruction_key} were not defined properly. The list of labels has to either be a list of " \ f"label names, or there can be a parameter 'positive_class' defined under the label name." assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0] and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \ f"{TrainMLModelParser.__name__}: labels that are specified by more than label name, can include only one parameter called 'positive_class'."
def __init__(self, percentage: float, show_warnings: bool = True): super().__init__() ParameterValidator.assert_type_and_value(percentage, float, "TCRdistClassifier", "percentage", min_inclusive=0., max_inclusive=1.) self.percentage = percentage self.k = None self.label = None self.show_warnings = show_warnings
def _prepare_parameters(distance_metric: str, attributes_to_match: list, sequence_batch_size: int, context: dict = None): valid_metrics = [metric.name for metric in DistanceMetricType] ParameterValidator.assert_in_valid_list(distance_metric, valid_metrics, "DistanceEncoder", "distance_metric") return { "distance_metric": DistanceMetricType[distance_metric.upper()], "attributes_to_match": attributes_to_match, "sequence_batch_size": sequence_batch_size, "context": context }
def parse_motifs(motifs: dict, symbol_table: SymbolTable): valid_motif_keys = ["seed", "instantiation", "seed_chain1", "seed_chain2", "name_chain1", "name_chain2"] for key in motifs.keys(): ParameterValidator.assert_keys(motifs[key].keys(), valid_motif_keys, "MotifParser", key, exclusive=False) motif = MotifParser._parse_motif(key, motifs[key]) symbol_table.add(key, SymbolType.MOTIF, motif) return symbol_table, motifs
def build_object(cls, **kwargs): if kwargs["additional_node_attributes"] is None: kwargs["additional_node_attributes"] = [] if kwargs["additional_edge_attributes"] is None: kwargs["additional_edge_attributes"] = [] ParameterValidator.assert_type_and_value(kwargs["additional_node_attributes"], list, "CytoscapeNetworkExporter", "additional_node_attributes") ParameterValidator.assert_type_and_value(kwargs["additional_edge_attributes"], list, "CytoscapeNetworkExporter", "additional_edge_attributes") return CytoscapeNetworkExporter(**kwargs)
def build_object(cls, **kwargs): location = "TrainingPerformance" valid_metrics = [m.name for m in Metric] name = kwargs["name"] if "name" in kwargs else None metrics = kwargs["metrics"] if "metrics" in kwargs else valid_metrics metrics = [m.upper() for m in metrics] ParameterValidator.assert_all_in_valid_list(metrics, valid_metrics, location, 'metrics') return TrainingPerformance(set(metrics), name=name)
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> ExploratoryAnalysisInstruction: exp_analysis_units = {} ParameterValidator.assert_keys(instruction, ["analyses", "type"], "ExploratoryAnalysisParser", "ExploratoryAnalysis") for analysis_key, analysis in instruction["analyses"].items(): params = self._prepare_params(analysis, symbol_table) exp_analysis_units[analysis_key] = ExploratoryAnalysisUnit(**params) process = ExploratoryAnalysisInstruction(exploratory_analysis_units=exp_analysis_units, name=key) return process
def build_object(cls, **kwargs): ParameterValidator.assert_keys_present(list(kwargs.keys()), ['file_format', 'name'], DesignMatrixExporter.__name__, DesignMatrixExporter.__name__) ParameterValidator.assert_in_valid_list( kwargs['file_format'], ['npy', 'csv', 'npy.zip', 'csv.zip', 'hdf5.zip'], DesignMatrixExporter.__name__, 'file_format') return DesignMatrixExporter(**kwargs)
def _prepare_params(self, analysis: dict, symbol_table: SymbolTable) -> dict: valid_keys = ["dataset", "report", "preprocessing_sequence", "labels", "encoding", "number_of_processes"] ParameterValidator.assert_keys(list(analysis.keys()), valid_keys, "ExploratoryAnalysisParser", "analysis", False) params = {"dataset": symbol_table.get(analysis["dataset"]), "report": copy.deepcopy(symbol_table.get(analysis["report"]))} optional_params = self._prepare_optional_params(analysis, symbol_table) params = {**params, **optional_params} return params
def _check_dataset(self, specs): ParameterValidator.assert_keys_present(specs["definitions"].keys(), ['datasets'], DatasetGenerationTool.__name__, 'definitions') assert len(specs['definitions']['datasets'].keys()) == 1, \ f"{DatasetGenerationTool.__name__}: only one dataset can be defined with this Galaxy tool, got these " \ f"instead: {list(specs['definitions']['datasets'].keys())}." assert len(specs['instructions'].keys()) == 1, \ f"{DatasetGenerationTool.__name__}: only one instruction of type DatasetExport can be defined with this Galaxy tool, got these " \ f"instructions instead: {list(specs['instructions'].keys())}."