def transform(data: utils.URLPath, meta: utils.URLPath, output: utils.URLPath, reference: utils.URLPath, transargs: json.loads = None, sample: int = 0): """Transform dataset using a reference SOM. Args: recreate: Delete and recreate SOMs even if they already exist. sample: Number of samples to transform from each group, only useful for testing purposes. """ dataset = io_functions.load_case_collection(data, meta) # randomly sample 'sample' number cases from each group if sample: dataset = dataset.sample(sample) if transargs is None: transargs = DEFAULT_TRANSFORM_SOM_ARGS print(f"Loading referece from {reference}") model = io_functions.load_casesom(reference, **transargs) transform_dataset_to_som(model, dataset, output)
def load_existing_casesom(path: utils.URLPath, args: dict) -> CaseSom: try: casesom = io_functions.load_casesom(path, **args) return casesom except Exception as e: LOGGER.warning("Failed to load existing casesom at %s with error %s:", path, e) return None
def reconfigure_som_model(som_model: CaseSom, args: dict) -> CaseSom: """Reconfigure SOM by saving a copy and loading it again.""" tmp_path = utils.URLPath("/tmp/flowcat/sommodel") io_functions.save_casesom(som_model, tmp_path) reconfigured_model = io_functions.load_casesom(tmp_path, **args) rmtree(str(tmp_path)) return reconfigured_model
def main( data: utils.URLPath, meta: utils.URLPath, output: utils.URLPath, reference_ids: utils.URLPath = None, reference: utils.URLPath = None, tensorboard_dir: utils.URLPath = None, modelargs: json.loads = None, transargs: json.loads = None, mode: str = "fit_transform", ): """ Train a SOM and use its weights to initialize individual SOM training. Args: data: Path to fcs data. meta: Path to dataset metadata, this should correctly reference fcs data. output: Path to output model and transformed cases. reference_ids: Optionally list ids to be used for reference SOM generation. reference: Optionally use pretrained model. modelargs: Optionally give specific options for reference SOM generation. transargs: Optionally give specific options for transforming individual SOMs. mode: Whether to fit or to transform. Default both. """ dataset = io_functions.load_case_collection(data, meta) if reference is None: reference_ids = io_functions.load_json(reference_ids) reference_dataset = dataset.filter(labels=reference_ids) print("Training reference SOM on", reference_dataset) reference = train_model(reference_dataset, modelargs=modelargs) reference_output = output / "reference" io_functions.save_casesom(reference, reference_output) reference = reference_output if mode == "fit": return if transargs is None: transargs = { "max_epochs": 4, "batch_size": 50000, "initial_radius": 4, "end_radius": 1, } model = io_functions.load_casesom(reference, tensorboard_dir=tensorboard_dir, **transargs) som_output = output / "som" transform_cases(dataset, model, som_output)
def load(cls, path: str = None, ref_path: str = None, cls_path: str = None): """Load classifier from the given path, alternatively give a separate path for reference and classifier.""" if path is not None: ref_path = utils.URLPath(path) / "reference" cls_path = utils.URLPath(path) / "classifier" elif ref_path is not None and cls_path is not None: ref_path = utils.URLPath(ref_path) cls_path = utils.URLPath(cls_path) else: raise ValueError( "Either path or ref_path and cls_path need to be set.") return cls(io_functions.load_casesom(ref_path), SOMClassifier.load(cls_path), SOMSaliency.load(cls_path))
def main(data: utils.URLPath, meta: utils.URLPath, reference: utils.URLPath, model: utils.URLPath): data, meta, soms, model = map(utils.URLPath, [ "/data/flowcat-data/mll-flowdata/decCLL-9F", "output/0-final-dataset/train.json.gz", "output/som-fix-test/soms-test/som_r4_1", "output/0-final/classifier-minmax-new", ]) sommodel = utils.URLPath("output/som-fix-test/unjoined-ref") sommodel = io_functions.load_casesom(sommodel) output = utils.URLPath("output/0-final/model-analysis/saliency") output.mkdir() dataset = io_functions.load_case_collection(data, meta) soms = som_dataset.SOMDataset.from_path(soms) model = SaliencySOMClassifier.load(model) val_dataset = model.get_validation_data(dataset) val_seq = model.create_sequence(soms) selected_labels = [ "c3a6098bd5216c7d1f958396dd31bd6ef1646c18", "df726c162ed728c2886107e665ad931e5bf0baae", "3eb03bea6651c302ac013f187b288ee990889b29", "e539b3ec66b1c9d7a0aae1fbd37c19c7ac86a18c", "762a2a19d1913383f41ead7b5ef74a8133d67847", "bbfafb3d9053e212279aaada5faf23eddf4a5926", "9503bfad60524615a06613cfbffa3861fb66ede3", ] sel_dataset = dataset.filter(labels=selected_labels) # annotate each fcs point with saliency info session = tf.Session() bmu_calc = calculate_bmu_indexes() normalize = mpl.colors.Normalize(vmin=0, vmax=1) case = sel_dataset[0] for case in sel_dataset: case_output = output / f"{case.id}_g{case.group}" case_output.mkdir() print("Plotting", case) # plot som and saliency activations result = model.calculate_saliency(val_seq, case, case.group, maximization=False) xdata, _ = val_seq.get_batch_by_label([case.id]) xdata = [x[0, ...] for x in xdata] for tube in ("1", "2", "3"): fig = plot_saliency_som_map(model, xdata, result, tube, ("CD45-KrOr", "SS INT LIN", "CD19-APCA750")) fig.savefig(str(case_output / f"t{tube}_overlay.png")) fig = plot_saliency_scatterplot(model, bmu_calc, session, case, tube, xdata, result, norm=normalize) fig.savefig(str(case_output / f"t{tube}_scatter_saliency.png")) for case in sel_dataset: case_output = output / f"maxall_{case.id}_g{case.group}" case_output.mkdir() print("Plotting", case) # plot som and saliency activations result = model.calculate_saliency(val_seq, case, case.group, maximization=False) for r in result: print("Max", np.max(r)) xdata, _ = val_seq.get_batch_by_label([case.id]) xdata = [x[0, ...] for x in xdata] for tube in ("1", "2", "3"): fig = plot_saliency_som_map(model, xdata, result, tube, ("CD45-KrOr", "SS INT LIN", "CD19-APCA750")) fig.savefig(str(case_output / f"t{tube}_overlay.png")) fig = plot_saliency_scatterplot(model, bmu_calc, session, case, tube, xdata, result, norm=normalize) fig.savefig(str(case_output / f"t{tube}_scatter_saliency.png")) # case_som = soms.get_labels([case.id]).iloc[0] hcls = val_dataset.filter(groups=["HCL"]) from collections import defaultdict max_vals = defaultdict(lambda: defaultdict(list)) mean_vals = defaultdict(lambda: defaultdict(list)) for case in hcls: print(case) gradient = model.calculate_saliency(val_seq, case, case.group, maximization=False) for i, (tube, markers) in enumerate(model.config["tubes"].items()): tgrad = gradient[i] for i, marker in enumerate(markers["channels"]): mgrad = tgrad[:, :, i] gmax = np.max(mgrad) max_vals[tube][marker].append(gmax) gmean = np.mean(mgrad) mean_vals[tube][marker].append(gmean) max_markers = defaultdict(list) for tube, markers in model.config["tubes"].items(): for marker in markers["channels"]: print("Max", tube, marker, np.mean(max_vals[tube][marker])) print("Mean", tube, marker, np.mean(mean_vals[tube][marker])) max_markers[tube].append((marker, np.mean(max_vals[tube][marker])))
def main(data: utils.URLPath, reference: utils.URLPath, output: utils.URLPath): """ """ cases = io_functions.load_case_collection(data, data / data.name + ".json") default_settings = { "max_epochs": 4, "initial_learning_rate": 0.05, "end_learning_rate": 0.01, "batch_size": 50000, "initial_radius": 4, "end_radius": 1, } # settings = [ # ("learning_rate_001_0001", {"initial_learning_rate": 0.01, "end_learning_rate": 0.001}), # ("learning_rate_001_001", {"initial_learning_rate": 0.01, "end_learning_rate": 0.01}), # ("learning_rate_005_0001", {"initial_learning_rate": 0.05, "end_learning_rate": 0.001}), # ("learning_rate_005_001", {"initial_learning_rate": 0.05, "end_learning_rate": 0.01}), # ("learning_rate_005_005", {"initial_learning_rate": 0.05, "end_learning_rate": 0.05}), # ("learning_rate_05_0001", {"initial_learning_rate": 0.5, "end_learning_rate": 0.001}), # ("learning_rate_05_001", {"initial_learning_rate": 0.5, "end_learning_rate": 0.01}), # ("learning_rate_05_01", {"initial_learning_rate": 0.5, "end_learning_rate": 0.1}), # ("learning_rate_05_05", {"initial_learning_rate": 0.5, "end_learning_rate": 0.5}), # ] settings = [ ("radius_24_1", { "initial_radius": 24, "end_radius": 1 }), ("radius_24_2", { "initial_radius": 24, "end_radius": 2 }), ("radius_24_1", { "initial_radius": 16, "end_radius": 1 }), ("radius_16_2", { "initial_radius": 16, "end_radius": 2 }), ("radius_8_1", { "initial_radius": 8, "end_radius": 1 }), ("radius_8_2", { "initial_radius": 8, "end_radius": 2 }), ("radius_4_1", { "initial_radius": 4, "end_radius": 1 }), ("radius_4_2", { "initial_radius": 4, "end_radius": 2 }), ] for name, setting in settings: model = io_functions.load_casesom( reference, **{ **default_settings, **setting }, ) transform_data(cases, model, output / name)