def compute_overlap_matrix(hp_items: List[HPItem]): ParameterValidator.assert_all_type_and_value( [hp_item.encoder for hp_item in hp_items], SequenceAbundanceEncoder, 'Overlap matrix computation', 'encoders') overlap_matrix = np.zeros((len(hp_items), len(hp_items))) import_sequences_as_set = lambda path: set( pd.read_csv(path).apply(frozenset, axis=1).values.tolist()) for index1 in range(len(hp_items)): overlap_matrix[index1, index1] = 100 sequences1 = import_sequences_as_set( hp_items[index1].encoder.relevant_sequence_csv_path) if len(sequences1) == 0: return None for index2 in range(index1 + 1, len(hp_items)): sequences2 = import_sequences_as_set( hp_items[index2].encoder.relevant_sequence_csv_path) if len(sequences2) == 0: return None intersection = sequences1.intersection(sequences2) overlap_matrix[index1, index2] = round( len(intersection) * 100 / min(len(sequences1), len(sequences2)), 2) overlap_matrix[index2, index1] = overlap_matrix[index1, index2] return overlap_matrix
def _prepare_reports(self, reports: list, symbol_table: SymbolTable) -> dict: if reports is not None: ParameterValidator.assert_type_and_value(reports, list, TrainMLModelParser.__name__, "reports") report_objects = {report_id: symbol_table.get(report_id) for report_id in reports} ParameterValidator.assert_all_type_and_value(report_objects.values(), TrainMLModelReport, TrainMLModelParser.__name__, 'reports') return report_objects else: return {}
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: Path = None) -> SubsamplingInstruction: valid_keys = [ "type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats" ] ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key) dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET) ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset') dataset = symbol_table.get(instruction['dataset']) ParameterValidator.assert_type_and_value( instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes') ParameterValidator.assert_all_type_and_value( instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count()) valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names( DataExporter, 'Exporter', "dataset_export/") ParameterValidator.assert_type_and_value( instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats") ParameterValidator.assert_all_in_valid_list( instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats") return SubsamplingInstruction( dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'], dataset_export_formats=[ ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/") for export_format in instruction['dataset_export_formats'] ], name=key)
def build_object(cls, **kwargs): ParameterValidator.assert_keys(kwargs.keys(), ['metadata_labels', 'name'], ConfounderAnalysis.__name__, ConfounderAnalysis.__name__) ParameterValidator.assert_type_and_value(kwargs['metadata_labels'], list, ConfounderAnalysis.__name__, 'metadata_labels') ParameterValidator.assert_all_type_and_value( kwargs['metadata_labels'], str, ConfounderAnalysis.__name__, 'metadata_labels') ParameterValidator.assert_type_and_value(kwargs['name'], str, ConfounderAnalysis.__name__, 'name') return ConfounderAnalysis(metadata_labels=kwargs['metadata_labels'], name=kwargs['name'])
def build_object(cls, **kwargs): location = "Coefficients" coefs_to_plot = [coef.upper() for coef in kwargs["coefs_to_plot"]] name = kwargs["name"] if "name" in kwargs else None ParameterValidator.assert_all_in_valid_list( coefs_to_plot, [item.name.upper() for item in CoefficientPlottingSetting], location, "coefs_to_plot") if CoefficientPlottingSetting.CUTOFF.name in coefs_to_plot: cutoff = kwargs["cutoff"] ParameterValidator.assert_type_and_value(cutoff, list, location, "cutoff") ParameterValidator.assert_all_type_and_value(cutoff, Number, location, "cutoff", min_inclusive=1e-15) else: cutoff = [] if CoefficientPlottingSetting.N_LARGEST.name in coefs_to_plot: n_largest = kwargs["n_largest"] ParameterValidator.assert_type_and_value(n_largest, list, location, "n_largest") ParameterValidator.assert_all_type_and_value(n_largest, int, location, "n_largest", min_inclusive=1) else: n_largest = [] coefs = CoefficientPlottingSettingList() for keyword in coefs_to_plot: coefs.append(CoefficientPlottingSetting[keyword.upper()]) return Coefficients(coefs_to_plot=coefs, cutoff=cutoff, n_largest=n_largest, name=name)