Example #1
0
    def import_dataset(params, name: str) -> SequenceDataset:
        """
        Returns randomly generated receptor dataset according to the parameters;

        YAML specification:

            result_path: path/where/to/store/results/
            sequence_count: 100 # number of random sequences to generate
            chain_1_length_probabilities:
                14: 0.8 # 80% of all generated sequences for all sequences will have length 14
                15: 0.2 # 20% of all generated sequences across all sequences will have length 15
            labels:
                epitope1: # label name
                    True: 0.5 # 50% of the sequences will have class True
                    False: 0.5 # 50% of the sequences will have class False
                epitope2: # next label with classes that will be assigned to sequences independently of the previous label or other parameters
                    1: 0.3 # 30% of the generated sequences will have class 1
                    0: 0.7 # 70% of the generated sequences will have class 0

        """
        valid_keys = [
            "sequence_count", "length_probabilities", "labels", "result_path"
        ]
        ParameterValidator.assert_all_in_valid_list(
            list(params.keys()), valid_keys, "RandomSequenceDatasetImport",
            "params")

        return RandomDatasetGenerator.generate_sequence_dataset(
            sequence_count=params["sequence_count"],
            length_probabilities=params["length_probabilities"],
            labels=params["labels"],
            path=params["result_path"])
Example #2
0
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable,
              path: Path) -> MLApplicationInstruction:
        location = MLApplicationParser.__name__
        ParameterValidator.assert_keys(instruction.keys(), [
            'type', 'dataset', 'number_of_processes', 'config_path',
            'store_encoded_data'
        ], location, key)
        ParameterValidator.assert_in_valid_list(
            instruction['dataset'],
            symbol_table.get_keys_by_type(SymbolType.DATASET), location,
            f"{key}: dataset")
        ParameterValidator.assert_type_and_value(
            instruction['number_of_processes'],
            int,
            location,
            f"{key}: number_of_processes",
            min_inclusive=1)
        ParameterValidator.assert_type_and_value(instruction['config_path'],
                                                 str, location,
                                                 f'{key}: config_path')
        ParameterValidator.assert_type_and_value(
            instruction['store_encoded_data'], bool, location,
            f'{key}: store_encoded_data')

        hp_setting, label = self._parse_hp_setting(instruction, path, key)

        instruction = MLApplicationInstruction(
            dataset=symbol_table.get(instruction['dataset']),
            name=key,
            number_of_processes=instruction['number_of_processes'],
            label_configuration=LabelConfiguration([label]),
            hp_setting=hp_setting,
            store_encoded_data=instruction['store_encoded_data'])

        return instruction
Example #3
0
    def compute_overlap_matrix(hp_items: List[HPItem]):

        ParameterValidator.assert_all_type_and_value(
            [hp_item.encoder for hp_item in hp_items],
            SequenceAbundanceEncoder, 'Overlap matrix computation', 'encoders')

        overlap_matrix = np.zeros((len(hp_items), len(hp_items)))

        import_sequences_as_set = lambda path: set(
            pd.read_csv(path).apply(frozenset, axis=1).values.tolist())

        for index1 in range(len(hp_items)):
            overlap_matrix[index1, index1] = 100
            sequences1 = import_sequences_as_set(
                hp_items[index1].encoder.relevant_sequence_csv_path)
            if len(sequences1) == 0:
                return None
            for index2 in range(index1 + 1, len(hp_items)):
                sequences2 = import_sequences_as_set(
                    hp_items[index2].encoder.relevant_sequence_csv_path)
                if len(sequences2) == 0:
                    return None
                intersection = sequences1.intersection(sequences2)
                overlap_matrix[index1, index2] = round(
                    len(intersection) * 100 /
                    min(len(sequences1), len(sequences2)), 2)
                overlap_matrix[index2, index1] = overlap_matrix[index1, index2]

        return overlap_matrix
Example #4
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> SimulationInstruction:
        ParameterValidator.assert_keys(
            instruction.keys(),
            ["dataset", "simulation", "type", "export_formats"],
            "SimulationParser", key)

        signals = [
            signal.item
            for signal in symbol_table.get_by_type(SymbolType.SIGNAL)
        ]
        simulation = symbol_table.get(instruction["simulation"])
        dataset = symbol_table.get(instruction["dataset"])

        exporters = self.parse_exporters(instruction)

        process = SimulationInstruction(signals=signals,
                                        simulation=simulation,
                                        dataset=dataset,
                                        name=key,
                                        exporters=exporters)
        return process
Example #5
0
    def _prepare_specs(self):
        with self.yaml_path.open("r") as file:
            specs = yaml.safe_load(file)

        ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], GalaxyTrainMLModel.__name__, "YAML specification")
        ParameterValidator.assert_all_in_valid_list(specs.keys(), ["definitions", "instructions", "output"], GalaxyTrainMLModel.__name__,
                                                    "YAML specification")

        ParameterValidator.assert_type_and_value(specs["instructions"], dict, GalaxyTrainMLModel.__name__, "instructions")

        assert len(list(specs["instructions"].keys())) == 1, f"{GalaxyTrainMLModel.__name__}: one instruction has to be specified under " \
                                                             f"`instructions`, got the following instead: {list(specs['instructions'].keys())}."

        self.instruction_name = list(specs["instructions"].keys())[0]

        ParameterValidator.assert_type_and_value(specs['instructions'][self.instruction_name], dict, GalaxyTrainMLModel.__name__,
                                                 self.instruction_name)
        ParameterValidator.assert_keys_present(specs['instructions'][self.instruction_name].keys(), ['type'], GalaxyTrainMLModel.__name__,
                                               self.instruction_name)

        assert specs['instructions'][self.instruction_name]['type'] == TrainMLModelInstruction.__name__[:-11], \
            f"{GalaxyTrainMLModel.__name__}: instruction `type` under {self.instruction_name} has to be {TrainMLModelInstruction.__name__[:-11]} " \
            f"for this tool."

        assert len(
            specs['instructions'][self.instruction_name]['labels']) == 1, f"{GalaxyTrainMLModel.__name__}: one label has to be specified under " \
                                                                          f"`labels`, got the following instead: {specs['instructions'][self.instruction_name]['labels']}."
        Util.check_paths(specs, GalaxyTrainMLModel.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
Example #6
0
    def build_object(cls, **kwargs):
        comparison_label = kwargs[
            "comparison_label"] if "comparison_label" in kwargs else None
        color_grouping_label = kwargs[
            "color_grouping_label"] if "color_grouping_label" in kwargs else None
        row_grouping_label = kwargs[
            "row_grouping_label"] if "row_grouping_label" in kwargs else None
        column_grouping_label = kwargs[
            "column_grouping_label"] if "column_grouping_label" in kwargs else None
        log_scale = kwargs["log_scale"] if "log_scale" in kwargs else None
        keep_fraction = float(
            kwargs["keep_fraction"]) if "keep_fraction" in kwargs else 1.0
        ParameterValidator.assert_type_and_value(keep_fraction,
                                                 float,
                                                 "FeatureComparison",
                                                 "keep_fraction",
                                                 min_inclusive=0,
                                                 max_inclusive=1)
        ParameterValidator.assert_type_and_value(log_scale, bool,
                                                 "FeatureComparison",
                                                 "log_scale")

        assert comparison_label is not None, "FeatureComparison: the parameter 'comparison_label' must be set in order to be able to compare across this label"

        assert comparison_label != color_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as color_grouping_label"
        assert comparison_label != row_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as row_grouping_label"
        assert comparison_label != column_grouping_label, f"FeatureComparison: comparison label {comparison_label} can not be used as column_grouping_label"

        return FeatureComparison(**kwargs)
Example #7
0
    def _prepare_report_config(self, instruction_key, instruction, split_key,
                               symbol_table):
        if "reports" in instruction[split_key] and len(
                instruction[split_key]["reports"]) > 0:
            location = f"{instruction_key}/{split_key}/reports"
            report_types = list(signature(ReportConfig).parameters.keys())
            ParameterValidator.assert_all_in_valid_list(
                instruction[split_key]["reports"].keys(), report_types,
                location, "reports")

            for report_type in instruction[split_key]["reports"]:
                ParameterValidator.assert_type_and_value(
                    instruction[split_key]["reports"][report_type], list,
                    f"{location}/{report_type}", report_type)

            report_config_input = {
                report_type: {
                    report_id: symbol_table.get(report_id)
                    for report_id in instruction[split_key]["reports"]
                    [report_type]
                }
                for report_type in instruction[split_key]["reports"]
            }
        else:
            report_config_input = {}

        return report_config_input
Example #8
0
    def prepare_specs(self):
        with self.yaml_path.open("r") as file:
            specs = yaml.safe_load(file)

        self.instruction_name = Util.check_instruction_type(
            specs, DataSimulationTool.__name__, self.expected_instruction)
        self.export_format = Util.check_export_format(
            specs, DataSimulationTool.__name__, self.instruction_name)

        ParameterValidator.assert_keys_present(specs["definitions"],
                                               ["datasets"],
                                               DataSimulationTool.__name__,
                                               "definitions/datasets")
        ParameterValidator.assert_type_and_value(
            specs['definitions']['datasets'], dict,
            DataSimulationTool.__name__, "definitions/datasets")

        dataset_names = list(specs['definitions']['datasets'].keys())
        assert len(dataset_names) == 1, f"{DataSimulationTool.__name__}: one dataset has to be defined under definitions/datasets, got " \
                                        f"{dataset_names} instead."

        self.dataset_name = dataset_names[0]

        Util.check_paths(specs, DataSimulationTool.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
Example #9
0
    def prepare_reference(reference_params: dict, location: str, paired: bool):
        ParameterValidator.assert_keys(list(reference_params.keys()), ["format", "params"], location,
                                       "reference")

        seq_import_params = reference_params["params"] if "params" in reference_params else {}

        assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \
                                                  f"Specify the correct path under reference."

        if "is_repertoire" in seq_import_params:
            assert seq_import_params["is_repertoire"] == False, f"{location}: is_repertoire must be False for SequenceImport"
        else:
            seq_import_params["is_repertoire"] = False

        if "paired" in seq_import_params:
            assert seq_import_params["paired"] == paired, f"{location}: paired must be {paired} for SequenceImport"
        else:
            seq_import_params["paired"] = paired

        format_str = reference_params["format"]

        import_class = ReflectionHandler.get_class_by_name("{}Import".format(format_str))
        default_params = DefaultParamsLoader.load(EnvironmentSettings.default_params_path / "datasets",
                                          DefaultParamsLoader.convert_to_snake_case(format_str))

        params = {**default_params, **seq_import_params}

        processed_params = DatasetImportParams.build_object(**params)

        receptors = ImportHelper.import_items(import_class, reference_params["params"]["path"], processed_params)

        return receptors
Example #10
0
    def _prepare_parameters(use_positional_info: bool,
                            distance_to_seq_middle: int,
                            flatten: bool,
                            sequence_type: str,
                            name: str = None):

        location = OneHotEncoder.__name__

        ParameterValidator.assert_type_and_value(use_positional_info, bool,
                                                 location,
                                                 "use_positional_info")
        if use_positional_info:
            ParameterValidator.assert_type_and_value(distance_to_seq_middle,
                                                     int,
                                                     location,
                                                     "distance_to_seq_middle",
                                                     min_inclusive=1)
        else:
            distance_to_seq_middle = None

        ParameterValidator.assert_type_and_value(flatten, bool, location,
                                                 "flatten")
        ParameterValidator.assert_type_and_value(sequence_type, str, location,
                                                 'sequence_type')
        ParameterValidator.assert_in_valid_list(
            sequence_type.upper(), [item.name for item in SequenceType],
            location, 'sequence_type')

        return {
            "use_positional_info": use_positional_info,
            "distance_to_seq_middle": distance_to_seq_middle,
            "flatten": flatten,
            "sequence_type": SequenceType[sequence_type.upper()],
            "name": name
        }
 def __init__(self,
              k: int,
              skip_first_n_aa: int,
              skip_last_n_aa: int,
              abundance: str,
              normalize_all_features: bool,
              name: str = None):
     location = "AtchleyKmerEncoder"
     ParameterValidator.assert_type_and_value(k, int, location, "k", 1)
     ParameterValidator.assert_type_and_value(skip_first_n_aa, int,
                                              location, "skip_first_n_aa",
                                              0)
     ParameterValidator.assert_type_and_value(skip_last_n_aa, int, location,
                                              "skip_last_n_aa", 0)
     ParameterValidator.assert_in_valid_list(
         abundance.upper(), [ab.name for ab in RelativeAbundanceType],
         location, "abundance")
     ParameterValidator.assert_type_and_value(normalize_all_features, bool,
                                              location,
                                              "normalize_all_features")
     self.k = k
     self.skip_first_n_aa = skip_first_n_aa
     self.skip_last_n_aa = skip_last_n_aa
     self.abundance = RelativeAbundanceType[abundance.upper()]
     self.normalize_all_features = normalize_all_features
     self.name = name
     self.scaler_path = None
     self.vectorizer_path = None
Example #12
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting and setting[
                        "preprocessing"] is not None:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                        if not all(preproc.keeps_example_count()
                                   for preproc in preprocessing_sequence):
                            raise ValueError(
                                f"{TrainMLModelParser.__name__}: preprocessing sequence {preproc_name} includes preprocessing that "
                                f"change the number of examples at runtime and as such cannot be used with this instruction. See the "
                                f"documentation for the preprocessing or alternatively use them with other instructions."
                            )
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                ml_method = symbol_table.get(setting["ml_method"])
                ml_method.check_encoder_compatibility(encoder)

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=ml_method,
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
    def _prepare_parameters(reference: dict,
                            max_edit_distances: dict,
                            name: str = None):
        location = "MatchedReceptorsEncoder"

        legal_chains = [
            chain
            for receptor in (TCABReceptor(), TCGDReceptor(), BCReceptor())
            for chain in receptor.get_chains()
        ]

        if type(max_edit_distances) is int:
            max_edit_distances = {
                chain: max_edit_distances
                for chain in legal_chains
            }
        elif type(max_edit_distances) is dict:
            ParameterValidator.assert_keys(max_edit_distances.keys(),
                                           legal_chains,
                                           location,
                                           "max_edit_distances",
                                           exclusive=False)
        else:
            ParameterValidator.assert_type_and_value(max_edit_distances, dict,
                                                     location,
                                                     'max_edit_distances')

        reference_receptors = MatchedReferenceUtil.prepare_reference(
            reference, location=location, paired=True)

        return {
            "reference_receptors": reference_receptors,
            "max_edit_distances": max_edit_distances,
            "name": name
        }
Example #14
0
    def add_label(self,
                  label_name: str,
                  values: list = None,
                  auxiliary_labels: list = None,
                  positive_class=None):

        vals = list(values) if values else None

        if label_name in self._labels and self._labels[
                label_name] is not None and len(self._labels[label_name]) > 0:
            warnings.warn(
                "Label " + label_name +
                " has already been set. Overriding existing values...",
                Warning)

        if positive_class is not None:
            if all(isinstance(val, str)
                   for val in values) and not isinstance(positive_class, str):
                positive_class = str(positive_class)
            ParameterValidator.assert_in_valid_list(positive_class, values,
                                                    Label.__name__,
                                                    'positive_class')
        else:
            positive_class = self._get_default_positive_class(values)
            if positive_class:
                logging.info(
                    f"LabelConfiguration: set default positive class '{positive_class}' for label {label_name}"
                )

        self._labels[label_name] = Label(label_name, vals, auxiliary_labels,
                                         positive_class)
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> ExploratoryAnalysisInstruction:
        exp_analysis_units = {}

        ParameterValidator.assert_keys(
            instruction, ["analyses", "type", "number_of_processes"],
            "ExploratoryAnalysisParser", "ExploratoryAnalysis")
        ParameterValidator.assert_type_and_value(
            instruction["number_of_processes"], int,
            ExploratoryAnalysisParser.__name__, "number_of_processes")

        for analysis_key, analysis in instruction["analyses"].items():

            params = self._prepare_params(analysis, symbol_table,
                                          f"{key}/{analysis_key}")
            params["number_of_processes"] = instruction["number_of_processes"]
            exp_analysis_units[analysis_key] = ExploratoryAnalysisUnit(
                **params)

        process = ExploratoryAnalysisInstruction(
            exploratory_analysis_units=exp_analysis_units, name=key)
        return process
Example #16
0
    def build(cls, **kwargs):
        ParameterValidator.assert_keys_present(
            list(kwargs.keys()),
            ['metadata_file', 'name', 'repertoire_ids', 'metadata_fields'],
            RepertoireDataset.__name__, "repertoire dataset")
        repertoires = []
        metadata_df = pd.read_csv(kwargs['metadata_file'],
                                  comment=Constants.COMMENT_SIGN)
        for index, row in metadata_df.iterrows():
            filename = Path(kwargs['metadata_file']).parent / row['filename']
            if not filename.is_file() and 'repertoires' in str(filename):
                filename = filename.parent.parent / Path(row['filename']).name
            repertoire = Repertoire(data_filename=filename,
                                    metadata_filename=filename.parent /
                                    f'{filename.stem}_metadata.yaml',
                                    identifier=row['identifier'])
            repertoires.append(repertoire)

        if "repertoire_ids" in kwargs.keys(
        ) and "repertoires" not in kwargs.keys(
        ) and kwargs['repertoire_ids'] is not None:
            assert all(rep.identifier == kwargs['repertoire_ids'][i] for i, rep in enumerate(repertoires)), \
                f"{RepertoireDataset.__name__}: repertoire ids from the iml_dataset file and metadata file don't match for the dataset " \
                f"{kwargs['name']} with identifier {kwargs['identifier']}."

        return RepertoireDataset(**{**kwargs, **{"repertoires": repertoires}})
Example #17
0
    def parse_object(specs,
                     valid_class_names: list,
                     class_name_ending: str,
                     class_path: str,
                     location: str,
                     key: str,
                     builder: bool = False,
                     return_params_dict: bool = False):
        class_name = ObjectParser.get_class_name(specs, valid_class_names,
                                                 class_name_ending, location,
                                                 key)
        ParameterValidator.assert_in_valid_list(class_name, valid_class_names,
                                                location, key)

        cls = ReflectionHandler.get_class_by_name(
            f"{class_name}{class_name_ending}", class_path)
        params = ObjectParser.get_all_params(specs, class_path, class_name,
                                             key)

        try:
            if "name" not in inspect.signature(cls.__init__).parameters.keys():
                del params["name"]
            obj = cls.build_object(
                **params) if builder and hasattr(cls, "build_object") else cls(
                    **params)
        except TypeError as err:
            raise AssertionError(
                f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} "
                f"under key {key}. Valid parameter names are: "
                f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}"
            )

        return (obj, {class_name: params}) if return_params_dict else obj
Example #18
0
 def _prepare_reports(self, reports: list, symbol_table: SymbolTable) -> dict:
     if reports is not None:
         ParameterValidator.assert_type_and_value(reports, list, TrainMLModelParser.__name__, "reports")
         report_objects = {report_id: symbol_table.get(report_id) for report_id in reports}
         ParameterValidator.assert_all_type_and_value(report_objects.values(), TrainMLModelReport, TrainMLModelParser.__name__, 'reports')
         return report_objects
     else:
         return {}
Example #19
0
    def _check_specs(self, workflow_specification):
        location = 'MultiDatasetBenchmarkTool'
        ParameterValidator.assert_keys(
            workflow_specification.keys(),
            ['definitions', 'instructions', 'output'], location,
            'YAML specification')

        self._check_dataset_specs(workflow_specification, location)
        self._check_instruction_specs(workflow_specification, location)
Example #20
0
    def parse(specs: dict, symbol_table: SymbolTable) -> dict:
        if "output" in specs:
            ParameterValidator.assert_keys(specs["output"], ["format"], "OutputParser", "output")
            ParameterValidator.assert_in_valid_list(specs["output"]["format"], ["HTML"], "OutputParser", "format")
        else:
            specs["output"] = {"format": "HTML"}
        symbol_table.add("output", SymbolType.OUTPUT, specs["output"])

        return specs["output"]
Example #21
0
    def _check_label_format(self, labels: list, instruction_key: str):
        ParameterValidator.assert_type_and_value(labels, list, TrainMLModelParser.__name__, f'{instruction_key}/labels')
        assert all(isinstance(label, str) or isinstance(label, dict) for label in labels), \
            f"{TrainMLModelParser.__name__}: labels under {instruction_key} were not defined properly. The list of labels has to either be a list of " \
            f"label names, or there can be a parameter 'positive_class' defined under the label name."

        assert all(len(list(label.keys())) == 1 and isinstance(list(label.values())[0], dict) and 'positive_class' in list(label.values())[0]
                   and len(list(list(label.values())[0].keys())) == 1 for label in [l for l in labels if isinstance(l, dict)]), \
            f"{TrainMLModelParser.__name__}: labels that are specified by more than label name, can include only one parameter called 'positive_class'."
Example #22
0
    def __init__(self, percentage: float, show_warnings: bool = True):
        super().__init__()

        ParameterValidator.assert_type_and_value(percentage, float, "TCRdistClassifier", "percentage", min_inclusive=0., max_inclusive=1.)

        self.percentage = percentage
        self.k = None
        self.label = None
        self.show_warnings = show_warnings
Example #23
0
    def _prepare_parameters(distance_metric: str, attributes_to_match: list, sequence_batch_size: int, context: dict = None):
        valid_metrics = [metric.name for metric in DistanceMetricType]
        ParameterValidator.assert_in_valid_list(distance_metric, valid_metrics, "DistanceEncoder", "distance_metric")

        return {
            "distance_metric": DistanceMetricType[distance_metric.upper()],
            "attributes_to_match": attributes_to_match,
            "sequence_batch_size": sequence_batch_size,
            "context": context
        }
Example #24
0
    def parse_motifs(motifs: dict, symbol_table: SymbolTable):

        valid_motif_keys = ["seed", "instantiation", "seed_chain1", "seed_chain2", "name_chain1", "name_chain2"]
        for key in motifs.keys():

            ParameterValidator.assert_keys(motifs[key].keys(), valid_motif_keys, "MotifParser", key, exclusive=False)

            motif = MotifParser._parse_motif(key, motifs[key])
            symbol_table.add(key, SymbolType.MOTIF, motif)

        return symbol_table, motifs
    def build_object(cls, **kwargs):

        if kwargs["additional_node_attributes"] is None:
            kwargs["additional_node_attributes"] = []
        if kwargs["additional_edge_attributes"] is None:
            kwargs["additional_edge_attributes"] = []

        ParameterValidator.assert_type_and_value(kwargs["additional_node_attributes"], list, "CytoscapeNetworkExporter", "additional_node_attributes")
        ParameterValidator.assert_type_and_value(kwargs["additional_edge_attributes"], list, "CytoscapeNetworkExporter", "additional_edge_attributes")

        return CytoscapeNetworkExporter(**kwargs)
Example #26
0
    def build_object(cls, **kwargs):
        location = "TrainingPerformance"        
        valid_metrics = [m.name for m in Metric]

        name = kwargs["name"] if "name" in kwargs else None
        metrics = kwargs["metrics"] if "metrics" in kwargs else valid_metrics
        metrics = [m.upper() for m in metrics]
        
        ParameterValidator.assert_all_in_valid_list(metrics, valid_metrics, location, 'metrics')

        return TrainingPerformance(set(metrics), name=name)
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> ExploratoryAnalysisInstruction:
        exp_analysis_units = {}

        ParameterValidator.assert_keys(instruction, ["analyses", "type"], "ExploratoryAnalysisParser", "ExploratoryAnalysis")
        for analysis_key, analysis in instruction["analyses"].items():

            params = self._prepare_params(analysis, symbol_table)
            exp_analysis_units[analysis_key] = ExploratoryAnalysisUnit(**params)

        process = ExploratoryAnalysisInstruction(exploratory_analysis_units=exp_analysis_units, name=key)
        return process
Example #28
0
    def build_object(cls, **kwargs):
        ParameterValidator.assert_keys_present(list(kwargs.keys()),
                                               ['file_format', 'name'],
                                               DesignMatrixExporter.__name__,
                                               DesignMatrixExporter.__name__)
        ParameterValidator.assert_in_valid_list(
            kwargs['file_format'],
            ['npy', 'csv', 'npy.zip', 'csv.zip', 'hdf5.zip'],
            DesignMatrixExporter.__name__, 'file_format')

        return DesignMatrixExporter(**kwargs)
    def _prepare_params(self, analysis: dict, symbol_table: SymbolTable) -> dict:

        valid_keys = ["dataset", "report", "preprocessing_sequence", "labels", "encoding", "number_of_processes"]
        ParameterValidator.assert_keys(list(analysis.keys()), valid_keys, "ExploratoryAnalysisParser", "analysis", False)

        params = {"dataset": symbol_table.get(analysis["dataset"]), "report": copy.deepcopy(symbol_table.get(analysis["report"]))}

        optional_params = self._prepare_optional_params(analysis, symbol_table)
        params = {**params, **optional_params}

        return params
Example #30
0
    def _check_dataset(self, specs):
        ParameterValidator.assert_keys_present(specs["definitions"].keys(),
                                               ['datasets'],
                                               DatasetGenerationTool.__name__,
                                               'definitions')
        assert len(specs['definitions']['datasets'].keys()) == 1, \
            f"{DatasetGenerationTool.__name__}: only one dataset can be defined with this Galaxy tool, got these " \
            f"instead: {list(specs['definitions']['datasets'].keys())}."

        assert len(specs['instructions'].keys()) == 1, \
            f"{DatasetGenerationTool.__name__}: only one instruction of type DatasetExport can be defined with this Galaxy tool, got these " \
            f"instructions instead: {list(specs['instructions'].keys())}."