コード例 #1
0
ファイル: Coefficients.py プロジェクト: rofrank/immuneML
    def build_object(cls, **kwargs):
        location = "Coefficients"
        coefs_to_plot = [coef.upper() for coef in kwargs["coefs_to_plot"]]

        name = kwargs["name"] if "name" in kwargs else None

        ParameterValidator.assert_all_in_valid_list(coefs_to_plot, [item.name.upper() for item in CoefficientPlottingSetting], location,
                                                    "coefs_to_plot")

        if CoefficientPlottingSetting.CUTOFF.name in coefs_to_plot:
            cutoff = kwargs["cutoff"]
            ParameterValidator.assert_type_and_value(cutoff, list, location, "cutoff")
            ParameterValidator.assert_all_type_and_value(cutoff, Number, location, "cutoff", min_inclusive=1e-15)
        else:
            cutoff = []

        if CoefficientPlottingSetting.N_LARGEST.name in coefs_to_plot:
            n_largest = kwargs["n_largest"]
            ParameterValidator.assert_type_and_value(n_largest, list, location, "n_largest")
            ParameterValidator.assert_all_type_and_value(n_largest, int, location, "n_largest", min_inclusive=1)
        else:
            n_largest = []

        coefs = CoefficientPlottingSettingList()
        for keyword in coefs_to_plot:
            coefs.append(CoefficientPlottingSetting[keyword.upper()])

        return Coefficients(coefs, cutoff, n_largest, name)
コード例 #2
0
 def _prepare_reports(self, reports: list,
                      symbol_table: SymbolTable) -> dict:
     if reports is not None:
         report_objects = {
             report_id: symbol_table.get(report_id)
             for report_id in reports
         }
         ParameterValidator.assert_all_type_and_value(
             report_objects.values(), TrainMLModelReport,
             TrainMLModelParser.__name__, 'reports')
         return report_objects
     else:
         return {}
コード例 #3
0
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> SubsamplingInstruction:

        valid_keys = ["type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats"]
        ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key)

        dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET)
        ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset')

        dataset = symbol_table.get(instruction['dataset'])
        ParameterValidator.assert_type_and_value(instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes')
        ParameterValidator.assert_all_type_and_value(instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__,
                                                     f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count())

        valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', "dataset_export/")
        ParameterValidator.assert_type_and_value(instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats")
        ParameterValidator.assert_all_in_valid_list(instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats")

        return SubsamplingInstruction(dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'],
                                      dataset_export_formats=[ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/")
                                                              for export_format in instruction['dataset_export_formats']], name=key)
コード例 #4
0
    def compute_overlap_matrix(hp_items: List[HPItem]):

        ParameterValidator.assert_all_type_and_value([hp_item.encoder for hp_item in hp_items], SequenceAbundanceEncoder,
                                                     'Overlap matrix computation', 'encoders')

        overlap_matrix = np.zeros((len(hp_items), len(hp_items)))

        import_sequences_as_set = lambda path: set(pd.read_csv(path).apply(frozenset, axis=1).values.tolist())

        for index1 in range(len(hp_items)):
            overlap_matrix[index1, index1] = 100
            sequences1 = import_sequences_as_set(hp_items[index1].encoder.relevant_sequence_csv_path)
            if len(sequences1) == 0:
                return None
            for index2 in range(index1 + 1, len(hp_items)):
                sequences2 = import_sequences_as_set(hp_items[index2].encoder.relevant_sequence_csv_path)
                if len(sequences2) == 0:
                    return None
                intersection = sequences1.intersection(sequences2)
                overlap_matrix[index1, index2] = round(len(intersection) * 100 / min(len(sequences1), len(sequences2)), 2)
                overlap_matrix[index2, index1] = overlap_matrix[index1, index2]

        return overlap_matrix