def make_simulation_docs(path: Path): instantiations = ReflectionHandler.all_nonabstract_subclasses( MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/") instantiations = [ DocumentationFormat(inst, inst.__name__.replace('Instantiation', ""), DocumentationFormat.LEVELS[2]) for inst in instantiations ] implanting_strategies = ReflectionHandler.all_nonabstract_subclasses( SignalImplantingStrategy, 'Implanting', 'signal_implanting_strategy/') implanting_strategies = [ DocumentationFormat(implanting, implanting.__name__.replace('Implanting', ""), DocumentationFormat.LEVELS[2]) for implanting in implanting_strategies ] classes_to_document = [DocumentationFormat(Motif, Motif.__name__, DocumentationFormat.LEVELS[1])] + instantiations + \ [DocumentationFormat(Signal, Signal.__name__, DocumentationFormat.LEVELS[1])] + implanting_strategies + \ [DocumentationFormat(Implanting, Implanting.__name__, DocumentationFormat.LEVELS[1])] file_path = path / "simulation.rst" with file_path.open("w") as file: for doc_format in classes_to_document: write_class_docs(doc_format, file)
def _parse_to_enum_instances(params, location): for key in params.keys(): class_name = DefaultParamsLoader._convert_to_camel_case(key) if ReflectionHandler.exists(class_name, location): cls = ReflectionHandler.get_class_by_name(class_name, location) params[key] = cls[params[key].upper()] return params
def _parse_dataset(key: str, dataset_specs: dict, symbol_table: SymbolTable, result_path: Path) -> SymbolTable: location = "ImportParser" ParameterValidator.assert_keys(list(dataset_specs.keys()), ImportParser.valid_keys, location, f"datasets:{key}", False) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names( DataImport, "Import", "IO/dataset_import/") ParameterValidator.assert_in_valid_list(dataset_specs["format"], valid_formats, location, "format") import_cls = ReflectionHandler.get_class_by_name("{}Import".format( dataset_specs["format"])) params = ImportParser._prepare_params(dataset_specs, result_path, key) if "is_repertoire" in params: ParameterValidator.assert_type_and_value(params["is_repertoire"], bool, location, "is_repertoire") if params["is_repertoire"] == True: if import_cls != IReceptorImport: assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/" ParameterValidator.assert_type_and_value( params["metadata_file"], Path, location, "metadata_file") if params["is_repertoire"] == False: assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/" ParameterValidator.assert_type_and_value( params["paired"], bool, location, "paired") if params["paired"] == True: assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/" ParameterValidator.assert_in_valid_list( params["receptor_chains"], ["_".join(cp.value) for cp in ChainPair], location, "receptor_chains") try: dataset = import_cls.import_dataset(params, key) dataset.name = key symbol_table.add(key, SymbolType.DATASET, dataset) except KeyError as key_error: raise KeyError( f"{key_error}\n\nAn error occurred during parsing of dataset {key}. " f"The keyword {key_error.args[0]} was missing. This either means this argument was " f"not defined under definitions/datasets/{key}/params, or this column was missing from " f"an input data file. ") except Exception as ex: raise Exception( f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details." ) return symbol_table
def test_get_class_from_path(self): filepath = EnvironmentSettings.root_path / "/immuneML/util/KmerHelper.py" cls = ReflectionHandler.get_class_from_path(filepath, "KmerHelper") self.assertEqual(KmerHelper, cls) cls = ReflectionHandler.get_class_from_path(filepath) self.assertEqual(KmerHelper, cls)
def parse_instruction(key: str, instruction: dict, symbol_table: SymbolTable, path) -> tuple: ParameterValidator.assert_keys_present(list(instruction.keys()), ["type"], InstructionParser.__name__, key) valid_instructions = [cls[:-6] for cls in ReflectionHandler.discover_classes_by_partial_name("Parser", "dsl/instruction_parsers/")] ParameterValidator.assert_in_valid_list(instruction["type"], valid_instructions, "InstructionParser", "type") default_params = DefaultParamsLoader.load("instructions/", instruction["type"]) instruction = {**default_params, **instruction} parser = ReflectionHandler.get_class_by_name("{}Parser".format(instruction["type"]), "instruction_parsers/")() instruction_object = parser.parse(key, instruction, symbol_table, path) symbol_table.add(key, SymbolType.INSTRUCTION, instruction_object) return instruction, symbol_table
def import_hp_setting(config_dir: Path) -> Tuple[HPSetting, Label]: config = MLMethodConfiguration() config.load(config_dir / 'ml_config.yaml') ml_method = ReflectionHandler.get_class_by_name( config.ml_method, 'ml_methods/')() ml_method.load(config_dir) encoder = MLImport.import_encoder(config, config_dir) preprocessing_sequence = MLImport.import_preprocessing_sequence( config, config_dir) labels = list(config.labels_with_values.keys()) assert len( labels ) == 1, "MLImport: Multiple labels set in a single ml_config file." label = Label(labels[0], config.labels_with_values[labels[0]]) return HPSetting( encoder=encoder, encoder_params=config.encoding_parameters, encoder_name=config.encoding_name, ml_method=ml_method, ml_method_name=config.ml_method_name, ml_params={}, preproc_sequence=preprocessing_sequence, preproc_sequence_name=config.preprocessing_sequence_name), label
def generate_docs(path: Path): inst_path = PathBuilder.build(path / "instructions") instructions = sorted(ReflectionHandler.all_nonabstract_subclasses( Instruction, "Instruction", subdirectory='instructions/'), key=lambda x: x.__name__) inst_paths = {} for instruction in instructions: instruction_name = instruction.__name__[:-11] if hasattr(InstructionParser, f"make_{instruction_name.lower()}_docs"): fn = getattr(InstructionParser, f"make_{instruction_name.lower()}_docs") file_path = fn(inst_path) else: file_path = InstructionParser.make_docs( instruction, instruction_name, inst_path) inst_paths[instruction_name] = file_path inst_file_path = inst_path / "instructions.rst" with inst_file_path.open('w') as file: for key, item in inst_paths.items(): lines = f"{key}\n---------------------------\n.. include:: {os.path.relpath(item, EnvironmentSettings.source_docs_path)}\n" file.writelines(lines)
def parse_object(specs, valid_class_names: list, class_name_ending: str, class_path: str, location: str, key: str, builder: bool = False, return_params_dict: bool = False): class_name = ObjectParser.get_class_name(specs, valid_class_names, class_name_ending, location, key) ParameterValidator.assert_in_valid_list(class_name, valid_class_names, location, key) cls = ReflectionHandler.get_class_by_name( f"{class_name}{class_name_ending}", class_path) params = ObjectParser.get_all_params(specs, class_path, class_name, key) try: if "name" not in inspect.signature(cls.__init__).parameters.keys(): del params["name"] obj = cls.build_object( **params) if builder and hasattr(cls, "build_object") else cls( **params) except TypeError as err: raise AssertionError( f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} " f"under key {key}. Valid parameter names are: " f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}" ) return (obj, {class_name: params}) if return_params_dict else obj
def __init__(self, validation_part, add_positional_information, kernel_size, n_kernels, n_additional_convs, n_attention_network_layers, n_attention_network_units, n_output_network_units, consider_seq_counts, sequence_reduction_fraction, reduction_mb_size, n_updates, n_torch_threads, learning_rate, l1_weight_decay, l2_weight_decay, evaluate_at, sample_n_sequences, training_batch_size, n_workers, keep_dataset_in_ram, pytorch_device_name): super(DeepRC, self).__init__() if not ReflectionHandler.is_installed("deeprc"): raise RuntimeError( f"{DeepRC.__name__}: deeprc module is not installed. Please check the documentation at " f"https://docs.immuneml.uio.no/installation/install_with_package_manager.html for instructions how to install it." ) from deeprc.deeprc_binary.training import train self.training_function = train self.model = None self.result_path = None self.max_seq_len = None self.label = None self.keep_dataset_in_ram = keep_dataset_in_ram self.pytorch_device_name = pytorch_device_name self.pytorch_device = torch.device(self.pytorch_device_name) # ML model setting (not inherited from DeepRC code) self.validation_part = validation_part # DeepRC class settings: self.add_positional_information = add_positional_information self.n_input_features = 20 + 3 * self.add_positional_information self.kernel_size = kernel_size self.n_kernels = n_kernels self.n_additional_convs = n_additional_convs self.n_attention_network_layers = n_attention_network_layers self.n_attention_network_units = n_attention_network_units self.n_output_network_units = n_output_network_units self.consider_seq_counts = consider_seq_counts self.sequence_reduction_fraction = sequence_reduction_fraction self.reduction_mb_size = reduction_mb_size # train function settings: self.evaluate_at = evaluate_at self.n_updates = n_updates self.n_torch_threads = n_torch_threads self.learning_rate = learning_rate self.l1_weight_decay = l1_weight_decay self.l2_weight_decay = l2_weight_decay # Dataloader related settings: self.sample_n_sequences = sample_n_sequences self.training_batch_size = training_batch_size self.n_workers = n_workers self.feature_names = None
def prepare_reference(reference_params: dict, location: str, paired: bool): ParameterValidator.assert_keys(list(reference_params.keys()), ["format", "params"], location, "reference") seq_import_params = reference_params["params"] if "params" in reference_params else {} assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \ f"Specify the correct path under reference." if "is_repertoire" in seq_import_params: assert seq_import_params["is_repertoire"] == False, f"{location}: is_repertoire must be False for SequenceImport" else: seq_import_params["is_repertoire"] = False if "paired" in seq_import_params: assert seq_import_params["paired"] == paired, f"{location}: paired must be {paired} for SequenceImport" else: seq_import_params["paired"] = paired format_str = reference_params["format"] import_class = ReflectionHandler.get_class_by_name("{}Import".format(format_str)) default_params = DefaultParamsLoader.load(EnvironmentSettings.default_params_path / "datasets", DefaultParamsLoader.convert_to_snake_case(format_str)) params = {**default_params, **seq_import_params} processed_params = DatasetImportParams.build_object(**params) receptors = ImportHelper.import_items(import_class, reference_params["params"]["path"], processed_params) return receptors
def get_class(specs, valid_class_names, class_name_ending, class_path, location, key): class_name = ObjectParser.get_class_name(specs, valid_class_names, class_name_ending, location, key) cls = ReflectionHandler.get_class_by_name( f"{class_name}{class_name_ending}", class_path) return cls
def _parse_report(key: str, params: dict, symbol_table: SymbolTable): valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(Report, "", "reports/") report_object, params = ObjectParser.parse_object(params, valid_values, "", "reports/", "ReportParser", key, builder=True, return_params_dict=True) symbol_table.add(key, SymbolType.REPORT, report_object) return symbol_table, params
def test_discover_classes_by_partial_name(self): classes = ReflectionHandler.discover_classes_by_partial_name( "Implanting", "simulation/signal_implanting_strategy/") self.assertListEqual( sorted([ 'HealthySequenceImplanting', 'ReceptorImplanting', 'FullSequenceImplanting' ]), sorted(classes))
def test_get_classes_by_partial_name(self): classes = ReflectionHandler.get_classes_by_partial_name( "Implanting", "simulation/signal_implanting_strategy/") self.assertSetEqual( { HealthySequenceImplanting, ReceptorImplanting, FullSequenceImplanting }, set(classes))
def build_object(dataset=None, **params): try: prepared_params = KmerFrequencyEncoder._prepare_parameters(**params) encoder = ReflectionHandler.get_class_by_name(KmerFrequencyEncoder.dataset_mapping[dataset.__class__.__name__], "kmer_frequency/")(**prepared_params) except ValueError: raise ValueError("{} is not defined for dataset of type {}.".format(KmerFrequencyEncoder.__name__, dataset.__class__.__name__)) return encoder
def build_object(dataset=None, **params): try: prepared_params = EvennessProfileEncoder._prepare_parameters(**params) encoder = ReflectionHandler.get_class_by_name(EvennessProfileEncoder.dataset_mapping[dataset.__class__.__name__], "evenness_profile/")(**prepared_params) except ValueError: raise ValueError("{} is not defined for dataset of type {}.".format(EvennessProfileEncoder.__name__, dataset.__class__.__name__)) return encoder
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> SubsamplingInstruction: valid_keys = [ "type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats" ] ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key) dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET) ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset') dataset = symbol_table.get(instruction['dataset']) ParameterValidator.assert_type_and_value( instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes') ParameterValidator.assert_all_type_and_value( instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count()) valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names( DataExporter, 'Exporter', "dataset_export/") ParameterValidator.assert_type_and_value( instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats") ParameterValidator.assert_all_in_valid_list( instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats") return SubsamplingInstruction( dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'], dataset_export_formats=[ ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/") for export_format in instruction['dataset_export_formats'] ], name=key)
def _parse_ml_method(ml_method_id: str, ml_specification) -> tuple: valid_class_values = ReflectionHandler.all_nonabstract_subclass_basic_names( MLMethod, "", "ml_methods/") if type(ml_specification) is str: ml_specification = {ml_specification: {}} ml_specification = { **DefaultParamsLoader.load("ml_methods/", "MLMethod"), **ml_specification } ml_specification_keys = list(ml_specification.keys()) ParameterValidator.assert_all_in_valid_list( list(ml_specification_keys), ["model_selection_cv", "model_selection_n_folds"] + valid_class_values, "MLParser", ml_method_id) non_default_keys = [ key for key in ml_specification.keys() if key not in ["model_selection_cv", "model_selection_n_folds"] ] assert len(ml_specification_keys) == 3, f"MLParser: ML method {ml_method_id} was not correctly specified. Expected at least 1 key " \ f"(ML method name), got {len(ml_specification_keys) - 2} instead: " \ f"{str([key for key in non_default_keys])[1:-1]}." ml_method_class_name = non_default_keys[0] ml_method_class = ReflectionHandler.get_class_by_name( ml_method_class_name, "ml_methods/") ml_specification[ml_method_class_name] = { **DefaultParamsLoader.load("ml_methods/", ml_method_class_name, log_if_missing=False), **ml_specification[ml_method_class_name] } method, params = MLParser.create_method_instance( ml_specification, ml_method_class, ml_method_id) ml_specification[ml_method_class_name] = params method.name = ml_method_id return method, ml_specification
def parse_exporters(self, instruction): if instruction["export_formats"] is not None: class_path = "dataset_export/" ParameterValidator.assert_all_in_valid_list( instruction["export_formats"], ReflectionHandler.all_nonabstract_subclass_basic_names( DataExporter, 'Exporter', class_path), location="SimulationParser", parameter_name="export_formats") exporters = [ ReflectionHandler.get_class_by_name(f"{item}Exporter", class_path) for item in instruction["export_formats"] ] else: exporters = None return exporters
def build_object(dataset=None, **params): try: prepared_params = MatchedRegexEncoder._prepare_parameters(**params) encoder = ReflectionHandler.get_class_by_name( MatchedRegexEncoder.dataset_mapping[dataset.__class__.__name__], "reference_encoding/")(**prepared_params) except ValueError: raise ValueError("{} is not defined for dataset of type {}.".format(MatchedRegexEncoder.__name__, dataset.__class__.__name__)) return encoder
def __init__(self, distance_metric: DistanceMetricType, attributes_to_match: list, sequence_batch_size: int, context: dict = None, name: str = None): self.distance_metric = distance_metric self.distance_fn = ReflectionHandler.import_function(self.distance_metric.value, DistanceMetrics) self.attributes_to_match = attributes_to_match self.sequence_batch_size = sequence_batch_size self.context = context self.name = name self.comparison = None
def get_documentation(): doc = str(DatasetExportInstruction.__doc__) valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, "Exporter", "dataset_export/") valid_strategy_values = str(valid_strategy_values)[1:-1].replace("'", "`") mapping = { "Valid formats are class names of any non-abstract class inheriting " ":py:obj:`~immuneML.IO.dataset_export.DataExporter.DataExporter`.": f"Valid values are: {valid_strategy_values}." } doc = update_docs_per_mapping(doc, mapping) return doc
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> DatasetExportInstruction: location = "DatasetExportParser" ParameterValidator.assert_keys( list(instruction.keys()), DatasetExportParser.REQUIRED_KEYS + DatasetExportParser.OPTIONAL_KEYS, location, key, False) ParameterValidator.assert_keys_present( list(instruction.keys()), DatasetExportParser.REQUIRED_KEYS, location, key) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names( DataExporter, "Exporter", 'dataset_export/') ParameterValidator.assert_all_in_valid_list( instruction["export_formats"], valid_formats, location, "export_formats") ParameterValidator.assert_all_in_valid_list( instruction["datasets"], symbol_table.get_keys_by_type(SymbolType.DATASET), location, "datasets") return DatasetExportInstruction( datasets=[ symbol_table.get(dataset_key) for dataset_key in instruction["datasets"] ], exporters=[ ReflectionHandler.get_class_by_name(f"{key}Exporter", "dataset_export/") for key in instruction["export_formats"] ], preprocessing_sequence=symbol_table.get( instruction["preprocessing_sequence"]) if "preprocessing_sequence" in instruction else None, name=key)
def get_documentation(): initial_doc = str(Signal.__doc__) valid_implanting_values = str( ReflectionHandler.all_nonabstract_subclass_basic_names( SignalImplantingStrategy, 'Implanting', 'signal_implanting_strategy/'))[1:-1].replace("'", "`") docs_mapping = { "Valid values for this argument are class names of different signal implanting strategies.": f"Valid values are: {valid_implanting_values}" } doc = update_docs_per_mapping(initial_doc, docs_mapping) return doc
def _get_implanting_strategy(key: str, signal: dict) -> SignalImplantingStrategy: valid_strategies = [ cls[:-10] for cls in ReflectionHandler.discover_classes_by_partial_name( "Implanting", "simulation/signal_implanting_strategy/") ] ParameterValidator.assert_in_valid_list(signal["implanting"], valid_strategies, "SignalParser", key) defaults = DefaultParamsLoader.load( "signal_implanting_strategy/", f"{signal['implanting']}Implanting") signal = {**defaults, **signal} ParameterValidator.assert_keys_present( list(signal.keys()), ["motifs", "implanting", "sequence_position_weights"], SignalParser.__name__, key) implanting_comp = None if 'implanting_computation' in signal: implanting_comp = signal['implanting_computation'].lower() ParameterValidator.assert_in_valid_list( implanting_comp, [el.name.lower() for el in ImplantingComputation], SignalParser.__name__, 'implanting_computation') implanting_comp = ImplantingComputation[implanting_comp.upper()] implanting_strategy = ReflectionHandler.get_class_by_name( f"{signal['implanting']}Implanting")( GappedMotifImplanting(), signal["sequence_position_weights"], implanting_comp) return implanting_strategy
def get_documentation(): doc = str(Motif.__doc__) valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names( MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/") valid_strategy_values = str(valid_strategy_values)[1:-1].replace( "'", "`") chain_values = str([name for name in Chain])[1:-1].replace("'", "`") mapping = { "It should be one of the classes inheriting MotifInstantiationStrategy.": f"Valid values are: {valid_strategy_values}.", "The value should be an instance of :py:obj:`~immuneML.data_model.receptor.receptor_sequence.Chain.Chain`.": f"Valid values are: {chain_values}." } doc = update_docs_per_mapping(doc, mapping) return doc
def _parse_sequence(key: str, preproc_sequence: list, symbol_table: SymbolTable) -> SymbolTable: sequence = [] valid_preprocessing_classes = ReflectionHandler.all_nonabstract_subclass_basic_names( Preprocessor, "", "preprocessing/") for item in preproc_sequence: for step_key, step in item.items(): obj, params = ObjectParser.parse_object( step, valid_preprocessing_classes, "", "preprocessing/", "PreprocessingParser", step_key, True, True) step = params sequence.append(obj) symbol_table.add(key, SymbolType.PREPROCESSING, sequence) return symbol_table
def _load_batch(self, current_file: int): element_class = ReflectionHandler.get_class_by_name( self.element_class_name, "data_model") assert hasattr(element_class, 'create_from_record'), \ f"{ElementGenerator.__name__}: cannot load the binary file, the class {element_class.__name__} has no 'create_from_record' method." try: elements = [ element_class.create_from_record(el) for el in np.load(self.file_list[current_file], allow_pickle=False) ] except ValueError as error: raise ValueError( f'{ElementGenerator.__name__}: an error occurred while creating an object from binary file. Details: {error}' ) return elements
def parse_encoder(key: str, specs: dict): class_path = "encodings" valid_encoders = ReflectionHandler.all_nonabstract_subclass_basic_names( DatasetEncoder, "Encoder", class_path) encoder = ObjectParser.get_class(specs, valid_encoders, "Encoder", class_path, "EncodingParser", key) params = ObjectParser.get_all_params(specs, class_path, encoder.__name__[:-7], key) required_params = [ p for p in list( inspect.signature(encoder.__init__).parameters.keys()) if p != "self" ] ParameterValidator.assert_all_in_valid_list( params.keys(), required_params, "EncoderParser", f"{key}/{encoder.__name__.replace('Encoder', '')}") return encoder, params
def run_immuneML(namespace: argparse.Namespace): if os.path.isdir(namespace.result_path) and len( os.listdir(namespace.result_path)) != 0: raise ValueError( f"Directory {namespace.result_path} already exists. Please specify a new output directory for the analysis." ) PathBuilder.build(namespace.result_path) logging.basicConfig(filename=Path(namespace.result_path) / "log.txt", level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: logging.warning( message) if namespace.tool is None: app = ImmuneMLApp(namespace.specification_path, namespace.result_path) else: app_cls = ReflectionHandler.get_class_by_name(namespace.tool, "api/") app = app_cls(**vars(namespace)) app.run()