Beispiel #1
0
    def build_object(cls, **kwargs):
        location = "Coefficients"
        coefs_to_plot = [coef.upper() for coef in kwargs["coefs_to_plot"]]

        name = kwargs["name"] if "name" in kwargs else None

        ParameterValidator.assert_all_in_valid_list(coefs_to_plot, [item.name.upper() for item in CoefficientPlottingSetting], location,
                                                    "coefs_to_plot")

        if CoefficientPlottingSetting.CUTOFF.name in coefs_to_plot:
            cutoff = kwargs["cutoff"]
            ParameterValidator.assert_type_and_value(cutoff, list, location, "cutoff")
            ParameterValidator.assert_all_type_and_value(cutoff, Number, location, "cutoff", min_inclusive=1e-15)
        else:
            cutoff = []

        if CoefficientPlottingSetting.N_LARGEST.name in coefs_to_plot:
            n_largest = kwargs["n_largest"]
            ParameterValidator.assert_type_and_value(n_largest, list, location, "n_largest")
            ParameterValidator.assert_all_type_and_value(n_largest, int, location, "n_largest", min_inclusive=1)
        else:
            n_largest = []

        coefs = CoefficientPlottingSettingList()
        for keyword in coefs_to_plot:
            coefs.append(CoefficientPlottingSetting[keyword.upper()])

        return Coefficients(coefs, cutoff, n_largest, name)
Beispiel #2
0
    def parse_object(specs,
                     valid_class_names: list,
                     class_name_ending: str,
                     class_path: str,
                     location: str,
                     key: str,
                     builder: bool = False,
                     return_params_dict: bool = False):

        class_name = ObjectParser.get_class_name(specs, valid_class_names,
                                                 class_name_ending, location,
                                                 key)
        ParameterValidator.assert_in_valid_list(class_name, valid_class_names,
                                                location, key)

        cls = ReflectionHandler.get_class_by_name(
            f"{class_name}{class_name_ending}", class_path)
        params = ObjectParser.get_all_params(specs, class_path, class_name,
                                             key)

        try:
            if "name" not in inspect.signature(cls.__init__).parameters.keys():
                del params["name"]
            obj = cls.build_object(
                **params) if builder and hasattr(cls, "build_object") else cls(
                    **params)
        except TypeError as err:
            raise AssertionError(
                f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} "
                f"under key {key}. Valid parameter names are: "
                f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}"
            )

        return (obj, {class_name: params}) if return_params_dict else obj
Beispiel #3
0
    def _prepare_specs(self):
        with open(self.yaml_path, "r") as file:
            specs = yaml.safe_load(file)

        ParameterValidator.assert_keys_present(specs.keys(),
                                               ["definitions", "instructions"],
                                               GalaxyTrainMLModel.__name__,
                                               "YAML specification")
        ParameterValidator.assert_all_in_valid_list(
            specs.keys(), ["definitions", "instructions", "output"],
            GalaxyTrainMLModel.__name__, "YAML specification")

        ParameterValidator.assert_type_and_value(specs["instructions"], dict,
                                                 GalaxyTrainMLModel.__name__,
                                                 "instructions")

        assert len(list(specs["instructions"].keys())) == 1, f"{GalaxyTrainMLModel.__name__}: one instruction has to be specified under " \
                                                             f"`instructions`, got the following instead: {list(specs['instructions'].keys())}."

        self.instruction_name = list(specs["instructions"].keys())[0]

        ParameterValidator.assert_type_and_value(
            specs['instructions'][self.instruction_name], dict,
            GalaxyTrainMLModel.__name__, self.instruction_name)
        ParameterValidator.assert_keys_present(
            specs['instructions'][self.instruction_name].keys(), ['type'],
            GalaxyTrainMLModel.__name__, self.instruction_name)

        assert specs['instructions'][self.instruction_name]['type'] == TrainMLModelInstruction.__name__[:-11], \
            f"{GalaxyTrainMLModel.__name__}: instruction `type` under {self.instruction_name} has to be {TrainMLModelInstruction.__name__[:-11]} " \
            f"for this tool."

        Util.check_paths(specs, GalaxyTrainMLModel.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
Beispiel #4
0
    def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset_name: str) -> RepertoireDataset:
        """
        Function to create a dataset from the metadata and a list of repertoire files and exports dataset pickle file

        Arguments:
            import_class: class to use for import
            params: instance of DatasetImportParams class which includes information on path, columns, result path etc.
            dataset_name: user-defined name of the dataset

        Returns:
            RepertoireDataset object that was created
        """
        metadata = pd.read_csv(params.metadata_file, ",")

        ParameterValidator.assert_keys_present(metadata.columns.tolist(), ["filename"], ImportHelper.__name__,
                                               f'{dataset_name}: params: metadata_file')

        PathBuilder.build(params.result_path + "repertoires/")

        arguments = [(import_class, row, params) for index, row in metadata.iterrows()]
        with Pool(params.number_of_processes) as pool:
            repertoires = pool.starmap(ImportHelper.load_repertoire_as_object, arguments)

        new_metadata_file = ImportHelper.make_new_metadata_file(repertoires, metadata, params.result_path, dataset_name)

        potential_labels = list(set(metadata.columns.tolist()) - {"filename"})
        dataset = RepertoireDataset(params={key: list(set(metadata[key].values.tolist())) for key in potential_labels},
                                    repertoires=repertoires, metadata_file=new_metadata_file, name=dataset_name)

        PickleExporter.export(dataset, params.result_path)

        return dataset
Beispiel #5
0
    def _prepare_report_config(self, instruction_key, instruction, split_key,
                               symbol_table):
        if "reports" in instruction[split_key]:
            location = f"{instruction_key}/{split_key}/reports"
            report_types = list(signature(ReportConfig).parameters.keys())
            ParameterValidator.assert_all_in_valid_list(
                instruction[split_key]["reports"].keys(), report_types,
                location, "reports")

            for report_type in instruction[split_key]["reports"]:
                ParameterValidator.assert_type_and_value(
                    instruction[split_key]["reports"][report_type], list,
                    f"{location}/{report_type}", report_type)

            report_config_input = {
                report_type: {
                    report_id: symbol_table.get(report_id)
                    for report_id in instruction[split_key]["reports"]
                    [report_type]
                }
                for report_type in instruction[split_key]["reports"]
            }
        else:
            report_config_input = {}

        return report_config_input
Beispiel #6
0
    def _parse_ml_method(ml_method_id: str, ml_specification) -> tuple:

        valid_class_values = ReflectionHandler.all_nonabstract_subclass_basic_names(MLMethod, "", "ml_methods/")

        if type(ml_specification) is str:
            ml_specification = {ml_specification: {}}

        ml_specification = {**DefaultParamsLoader.load("ml_methods/", "MLMethod"), **ml_specification}
        ml_specification_keys = list(ml_specification.keys())

        ParameterValidator.assert_all_in_valid_list(list(ml_specification_keys), ["model_selection_cv", "model_selection_n_folds"] +
                                                    valid_class_values, "MLParser", ml_method_id)

        non_default_keys = [key for key in ml_specification.keys() if key not in ["model_selection_cv", "model_selection_n_folds"]]

        assert len(ml_specification_keys) == 3, f"MLParser: ML method {ml_method_id} was not correctly specified. Expected at least 1 key " \
                                                f"(ML method name), got {len(ml_specification_keys) - 2} instead: " \
                                                f"{str([key for key in non_default_keys])[1:-1]}."

        ml_method_class_name = non_default_keys[0]
        ml_method_class = ReflectionHandler.get_class_by_name(ml_method_class_name, "ml_methods/")

        ml_specification[ml_method_class_name] = {**DefaultParamsLoader.load("ml_methods/", ml_method_class_name, log_if_missing=False),
                                                  **ml_specification[ml_method_class_name]}

        method, params = MLParser.create_method_instance(ml_specification, ml_method_class, ml_method_id)
        ml_specification[ml_method_class_name] = params
        method.name = ml_method_id

        return method, ml_specification
Beispiel #7
0
    def import_dataset(params, name: str) -> SequenceDataset:
        """
        Returns randomly generated receptor dataset according to the parameters;

        YAML specification:

            result_path: path/where/to/store/results/
            sequence_count: 100 # number of random sequences to generate
            chain_1_length_probabilities:
                14: 0.8 # 80% of all generated sequences for all sequences will have length 14
                15: 0.2 # 20% of all generated sequences across all sequences will have length 15
            labels:
                epitope1: # label name
                    True: 0.5 # 50% of the sequences will have class True
                    False: 0.5 # 50% of the sequences will have class False
                epitope2: # next label with classes that will be assigned to sequences independently of the previous label or other parameters
                    1: 0.3 # 30% of the generated sequences will have class 1
                    0: 0.7 # 70% of the generated sequences will have class 0

        """
        valid_keys = [
            "sequence_count", "length_probabilities", "labels", "result_path"
        ]
        ParameterValidator.assert_all_in_valid_list(
            list(params.keys()), valid_keys, "RandomSequenceDatasetImport",
            "params")

        return RandomDatasetGenerator.generate_sequence_dataset(
            sequence_count=params["sequence_count"],
            length_probabilities=params["length_probabilities"],
            labels=params["labels"],
            path=params["result_path"])
Beispiel #8
0
    def _prepare_parameters(reference: dict,
                            max_edit_distances: dict,
                            name: str = None):
        location = "MatchedReceptorsEncoder"

        legal_chains = [
            chain
            for receptor in (TCABReceptor(), TCGDReceptor(), BCReceptor())
            for chain in receptor.get_chains()
        ]

        if type(max_edit_distances) is int:
            max_edit_distances = {
                chain: max_edit_distances
                for chain in legal_chains
            }
        elif type(max_edit_distances) is dict:
            ParameterValidator.assert_keys(max_edit_distances.keys(),
                                           legal_chains,
                                           location,
                                           "max_edit_distances",
                                           exclusive=False)
        else:
            ParameterValidator.assert_type_and_value(max_edit_distances, dict,
                                                     location,
                                                     'max_edit_distances')

        reference_receptors = MatchedReferenceUtil.prepare_reference(
            reference, location=location, paired=True)

        return {
            "reference_receptors": reference_receptors,
            "max_edit_distances": max_edit_distances,
            "name": name
        }
Beispiel #9
0
    def import_dataset(params: dict, dataset_name: str) -> RepertoireDataset:
        valid_keys = ["result_path", "repertoire_count", "sequence_count_probabilities", "sequence_length_probabilities", "labels"]
        ParameterValidator.assert_all_in_valid_list(list(params.keys()), valid_keys, "RandomRepertoireDatasetImport", "params")

        return RandomDatasetGenerator.generate_repertoire_dataset(repertoire_count=params["repertoire_count"],
                                                                  sequence_count_probabilities=params["sequence_count_probabilities"],
                                                                  sequence_length_probabilities=params["sequence_length_probabilities"],
                                                                  labels=params["labels"],
                                                                  path=params["result_path"])
    def _check_specs(self, workflow_specification):
        location = 'MultiDatasetBenchmarkTool'
        ParameterValidator.assert_keys(
            workflow_specification.keys(),
            ['definitions', 'instructions', 'output'], location,
            'YAML specification')

        self._check_dataset_specs(workflow_specification, location)
        self._check_instruction_specs(workflow_specification, location)
Beispiel #11
0
    def _check_dataset(self, specs):
        ParameterValidator.assert_keys_present(specs["definitions"].keys(), ['datasets'], DatasetGenerationTool.__name__, 'definitions')
        assert len(specs['definitions']['datasets'].keys()) == 1, \
            f"{DatasetGenerationTool.__name__}: only one dataset can be defined with this Galaxy tool, got these " \
            f"instead: {list(specs['definitions']['datasets'].keys())}."

        assert len(specs['instructions'].keys()) == 1, \
            f"{DatasetGenerationTool.__name__}: only one instruction of type DatasetExport can be defined with this Galaxy tool, got these " \
            f"instructions instead: {list(specs['instructions'].keys())}."
Beispiel #12
0
    def _check_label_format(self, labels: list, instruction_key: str):
        ParameterValidator.assert_type_and_value(labels, list,
                                                 TrainMLModelParser.__name__,
                                                 f'{instruction_key}/labels')
        assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \
            f"{TrainMLModelParser.__name__}: labels under {instruction_key} were not defined properly. The list of labels has to either be a list of " \
            f"label names, or there can be a parameter 'positive_class' defined under the label name."

        assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0]
                   and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \
            f"{TrainMLModelParser.__name__}: labels that are specified by more than label name, can include only one parameter called 'positive_class'."
Beispiel #13
0
    def parse(specs: dict, symbol_table: SymbolTable) -> dict:
        if "output" in specs:
            ParameterValidator.assert_keys(specs["output"], ["format"],
                                           "OutputParser", "output")
            ParameterValidator.assert_in_valid_list(specs["output"]["format"],
                                                    ["HTML"], "OutputParser",
                                                    "format")
        else:
            specs["output"] = {"format": "HTML"}
        symbol_table.add("output", SymbolType.OUTPUT, specs["output"])

        return specs["output"]
    def _prepare_parameters(max_edit_distance: int, reference: dict, name: str = None):
        location = "MatchedSequencesEncoder"

        ParameterValidator.assert_type_and_value(max_edit_distance, int, location, "max_edit_distance", min_inclusive=0)

        reference_sequences = MatchedReferenceUtil.prepare_reference(reference_params=reference, location=location, paired=False)

        return {
            "max_edit_distance": max_edit_distance,
            "reference_sequences": reference_sequences,
            "name": name
        }
Beispiel #15
0
    def _check_instruction(self, specs):
        instruction_name = Util.check_instruction_type(specs, DatasetGenerationTool.__name__, DatasetExportInstruction.__name__[:-11])

        for key in ['datasets', 'export_formats']:
            ParameterValidator.assert_keys_present(list(specs['instructions'][instruction_name].keys()), [key], DatasetGenerationTool.__name__,
                                                   instruction_name)
            ParameterValidator.assert_type_and_value(specs["instructions"][instruction_name][key], list, DatasetGenerationTool.__name__,
                                                     f"{instruction_name}/{key}")

            assert len(specs['instructions'][instruction_name][key]) == 1, \
                f"{DatasetGenerationTool.__name__}: this tool accepts only one item under {key}, got {specs['instructions'][instruction_name][key]} " \
                f"instead."
Beispiel #16
0
    def update_specs(self):
        with open(self.yaml_path, 'r') as file:
            specs = yaml.safe_load(file)

        ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], DatasetGenerationTool.__name__, "YAML specification")
        ParameterValidator.assert_all_in_valid_list(specs.keys(), ["definitions", "instructions", "output"], DatasetGenerationTool.__name__, "YAML specification")

        self._check_dataset(specs)
        self._check_instruction(specs)

        Util.check_paths(specs, DatasetGenerationTool.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
Beispiel #17
0
 def _prepare_reports(self, reports: list,
                      symbol_table: SymbolTable) -> dict:
     if reports is not None:
         report_objects = {
             report_id: symbol_table.get(report_id)
             for report_id in reports
         }
         ParameterValidator.assert_all_type_and_value(
             report_objects.values(), TrainMLModelReport,
             TrainMLModelParser.__name__, 'reports')
         return report_objects
     else:
         return {}
Beispiel #18
0
    def check_instruction_type(specs: dict, tool_name,
                               expected_instruction) -> str:
        ParameterValidator.assert_keys_present(list(specs.keys()),
                                               ['definitions', 'instructions'],
                                               tool_name, "YAML specification")
        assert len(list(specs['instructions'].keys())) == 1, f"{tool_name}: multiple instructions were given " \
                                                             f"({str(list(specs['instructions'].keys()))[1:-1]}), but only one instruction of type " \
                                                             f"{expected_instruction} should be specified."
        instruction_name = list(specs['instructions'].keys())[0]
        instruction_type = specs['instructions'][instruction_name]['type']
        assert instruction_type == expected_instruction, \
            f"{tool_name}: instruction type has to be '{expected_instruction}', got {instruction_type} instead."

        return instruction_name
Beispiel #19
0
    def __init__(self, percentage: float, show_warnings: bool = True):
        super().__init__()

        ParameterValidator.assert_type_and_value(percentage,
                                                 float,
                                                 "TCRdistClassifier",
                                                 "percentage",
                                                 min_inclusive=0.,
                                                 max_inclusive=1.)

        self.percentage = percentage
        self.k = None
        self.label = None
        self.show_warnings = show_warnings
Beispiel #20
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=symbol_table.get(setting["ml_method"]),
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
Beispiel #21
0
    def check_export_format(specs: dict, tool_name: str,
                            instruction_name: str):
        ParameterValidator.assert_keys_present(
            list(specs['instructions'][instruction_name].keys()),
            ["export_formats"], tool_name,
            f"{instruction_name}/export_formats")
        ParameterValidator.assert_type_and_value(
            specs['instructions'][instruction_name]["export_formats"], list,
            tool_name, f"{instruction_name}/export_formats")

        assert len(specs['instructions'][instruction_name]["export_formats"]) == 1, \
            f"{tool_name}: only one format can be specified under export_formats parameter under " \
            f"{instruction_name}/export_formats, got {specs['instructions'][instruction_name]['export_formats']} instead."

        return specs['instructions'][instruction_name]["export_formats"][0]
Beispiel #22
0
    def parse_signals(signals: dict, symbol_table: SymbolTable):
        for key, signal_spec in signals.items():

            ParameterValidator.assert_keys_present(signal_spec.keys(), SignalParser.VALID_KEYS, "SignalParser", key)

            implanting_strategy = SignalParser._get_implanting_strategy(key, signal_spec)

            ParameterValidator.assert_keys(signal_spec["motifs"], symbol_table.get_keys_by_type(SymbolType.MOTIF), "SignalParser",
                                           f"motifs in signal {key}", False)

            signal_motifs = [symbol_table.get(motif_id) for motif_id in signal_spec["motifs"]]
            signal = Signal(key, signal_motifs, implanting_strategy)
            symbol_table.add(key, SymbolType.SIGNAL, signal)

        return symbol_table, signals
Beispiel #23
0
    def build_object(cls, **kwargs):

        if kwargs["additional_node_attributes"] is None:
            kwargs["additional_node_attributes"] = []
        if kwargs["additional_edge_attributes"] is None:
            kwargs["additional_edge_attributes"] = []

        ParameterValidator.assert_type_and_value(
            kwargs["additional_node_attributes"], list,
            "CytoscapeNetworkExporter", "additional_node_attributes")
        ParameterValidator.assert_type_and_value(
            kwargs["additional_edge_attributes"], list,
            "CytoscapeNetworkExporter", "additional_edge_attributes")

        return CytoscapeNetworkExporter(**kwargs)
Beispiel #24
0
 def __init__(self, k: int, skip_first_n_aa: int, skip_last_n_aa: int, abundance: str, normalize_all_features: bool, name: str = None):
     location = "AtchleyKmerEncoder"
     ParameterValidator.assert_type_and_value(k, int, location, "k", 1)
     ParameterValidator.assert_type_and_value(skip_first_n_aa, int, location, "skip_first_n_aa", 0)
     ParameterValidator.assert_type_and_value(skip_last_n_aa, int, location, "skip_last_n_aa", 0)
     ParameterValidator.assert_in_valid_list(abundance.upper(), [ab.name for ab in RelativeAbundanceType], location, "abundance")
     ParameterValidator.assert_type_and_value(normalize_all_features, bool, location, "normalize_all_features")
     self.k = k
     self.skip_first_n_aa = skip_first_n_aa
     self.skip_last_n_aa = skip_last_n_aa
     self.abundance = RelativeAbundanceType[abundance.upper()]
     self.normalize_all_features = normalize_all_features
     self.name = name
     self.scaler_path = None
     self.vectorizer_path = None
Beispiel #25
0
    def _prepare_parameters(distance_metric: str,
                            attributes_to_match: list,
                            sequence_batch_size: int,
                            context: dict = None):
        valid_metrics = [metric.name for metric in DistanceMetricType]
        ParameterValidator.assert_in_valid_list(distance_metric, valid_metrics,
                                                "DistanceEncoder",
                                                "distance_metric")

        return {
            "distance_metric": DistanceMetricType[distance_metric.upper()],
            "attributes_to_match": attributes_to_match,
            "sequence_batch_size": sequence_batch_size,
            "context": context
        }
    def build_object(cls, **kwargs):
        location = "DeepRCMotifDiscovery"
        name = kwargs["name"] if "name" in kwargs else None
        ParameterValidator.assert_type_and_value(kwargs["n_steps"],
                                                 int,
                                                 location,
                                                 "n_steps",
                                                 min_inclusive=1)
        ParameterValidator.assert_type_and_value(kwargs["threshold"],
                                                 float,
                                                 location,
                                                 "threshold",
                                                 min_inclusive=0,
                                                 max_inclusive=1)

        return DeepRCMotifDiscovery(n_steps=kwargs["n_steps"],
                                    threshold=kwargs["threshold"],
                                    name=name)
Beispiel #27
0
    def parse_motifs(motifs: dict, symbol_table: SymbolTable):

        valid_motif_keys = [
            "seed", "instantiation", "seed_chain1", "seed_chain2",
            "name_chain1", "name_chain2"
        ]
        for key in motifs.keys():

            ParameterValidator.assert_keys(motifs[key].keys(),
                                           valid_motif_keys,
                                           "MotifParser",
                                           key,
                                           exclusive=False)

            motif = MotifParser._parse_motif(key, motifs[key])
            symbol_table.add(key, SymbolType.MOTIF, motif)

        return symbol_table, motifs
Beispiel #28
0
    def parse_encoder(key: str, specs: dict):
        class_path = "encodings"
        valid_encoders = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DatasetEncoder, "Encoder", class_path)
        encoder = ObjectParser.get_class(specs, valid_encoders, "Encoder",
                                         class_path, "EncodingParser", key)
        params = ObjectParser.get_all_params(specs, class_path,
                                             encoder.__name__[:-7], key)

        required_params = [
            p for p in list(
                inspect.signature(encoder.__init__).parameters.keys())
            if p != "self"
        ]
        ParameterValidator.assert_all_in_valid_list(
            params.keys(), required_params, "EncoderParser",
            f"{key}/{encoder.__name__.replace('Encoder', '')}")

        return encoder, params
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: str = None) -> ExploratoryAnalysisInstruction:
        exp_analysis_units = {}

        ParameterValidator.assert_keys(instruction, ["analyses", "type"],
                                       "ExploratoryAnalysisParser",
                                       "ExploratoryAnalysis")
        for analysis_key, analysis in instruction["analyses"].items():

            params = self._prepare_params(analysis, symbol_table)
            exp_analysis_units[analysis_key] = ExploratoryAnalysisUnit(
                **params)

        process = ExploratoryAnalysisInstruction(
            exploratory_analysis_units=exp_analysis_units, name=key)
        return process
Beispiel #30
0
    def parse_instruction(key: str, instruction: dict,
                          symbol_table: SymbolTable, path) -> tuple:
        ParameterValidator.assert_keys_present(list(instruction.keys()),
                                               ["type"],
                                               InstructionParser.__name__, key)
        valid_instructions = [
            cls[:-6]
            for cls in ReflectionHandler.discover_classes_by_partial_name(
                "Parser", "dsl/instruction_parsers/")
        ]
        ParameterValidator.assert_in_valid_list(instruction["type"],
                                                valid_instructions,
                                                "InstructionParser", "type")

        parser = ReflectionHandler.get_class_by_name(
            "{}Parser".format(instruction["type"]), "instruction_parsers/")()
        instruction_object = parser.parse(key, instruction, symbol_table, path)

        symbol_table.add(key, SymbolType.INSTRUCTION, instruction_object)
        return instruction, symbol_table