Exemple #1
0
def transform(data: utils.URLPath,
              meta: utils.URLPath,
              output: utils.URLPath,
              reference: utils.URLPath,
              transargs: json.loads = None,
              sample: int = 0):
    """Transform dataset using a reference SOM.

    Args:
        recreate: Delete and recreate SOMs even if they already exist.
        sample: Number of samples to transform from each group, only useful for testing purposes.
    """
    dataset = io_functions.load_case_collection(data, meta)

    # randomly sample 'sample' number cases from each group
    if sample:
        dataset = dataset.sample(sample)

    if transargs is None:
        transargs = DEFAULT_TRANSFORM_SOM_ARGS

    print(f"Loading referece from {reference}")
    model = io_functions.load_casesom(reference, **transargs)

    transform_dataset_to_som(model, dataset, output)
Exemple #2
0
def load_existing_casesom(path: utils.URLPath, args: dict) -> CaseSom:
    try:
        casesom = io_functions.load_casesom(path, **args)
        return casesom
    except Exception as e:
        LOGGER.warning("Failed to load existing casesom at %s with error %s:",
                       path, e)
        return None
Exemple #3
0
def reconfigure_som_model(som_model: CaseSom, args: dict) -> CaseSom:
    """Reconfigure SOM by saving a copy and loading it again."""
    tmp_path = utils.URLPath("/tmp/flowcat/sommodel")

    io_functions.save_casesom(som_model, tmp_path)
    reconfigured_model = io_functions.load_casesom(tmp_path, **args)

    rmtree(str(tmp_path))

    return reconfigured_model
Exemple #4
0
def main(
    data: utils.URLPath,
    meta: utils.URLPath,
    output: utils.URLPath,
    reference_ids: utils.URLPath = None,
    reference: utils.URLPath = None,
    tensorboard_dir: utils.URLPath = None,
    modelargs: json.loads = None,
    transargs: json.loads = None,
    mode: str = "fit_transform",
):
    """
    Train a SOM and use its weights to initialize individual SOM training.

    Args:
        data: Path to fcs data.
        meta: Path to dataset metadata, this should correctly reference fcs data.
        output: Path to output model and transformed cases.
        reference_ids: Optionally list ids to be used for reference SOM generation.
        reference: Optionally use pretrained model.
        modelargs: Optionally give specific options for reference SOM generation.
        transargs: Optionally give specific options for transforming individual SOMs.
        mode: Whether to fit or to transform. Default both.
    """
    dataset = io_functions.load_case_collection(data, meta)

    if reference is None:
        reference_ids = io_functions.load_json(reference_ids)
        reference_dataset = dataset.filter(labels=reference_ids)
        print("Training reference SOM on", reference_dataset)
        reference = train_model(reference_dataset, modelargs=modelargs)
        reference_output = output / "reference"
        io_functions.save_casesom(reference, reference_output)
        reference = reference_output

    if mode == "fit":
        return

    if transargs is None:
        transargs = {
            "max_epochs": 4,
            "batch_size": 50000,
            "initial_radius": 4,
            "end_radius": 1,
        }

    model = io_functions.load_casesom(reference,
                                      tensorboard_dir=tensorboard_dir,
                                      **transargs)

    som_output = output / "som"
    transform_cases(dataset, model, som_output)
Exemple #5
0
    def load(cls,
             path: str = None,
             ref_path: str = None,
             cls_path: str = None):
        """Load classifier from the given path, alternatively give a separate path for reference and classifier."""
        if path is not None:
            ref_path = utils.URLPath(path) / "reference"
            cls_path = utils.URLPath(path) / "classifier"
        elif ref_path is not None and cls_path is not None:
            ref_path = utils.URLPath(ref_path)
            cls_path = utils.URLPath(cls_path)
        else:
            raise ValueError(
                "Either path or ref_path and cls_path need to be set.")

        return cls(io_functions.load_casesom(ref_path),
                   SOMClassifier.load(cls_path), SOMSaliency.load(cls_path))
def main(data: utils.URLPath, meta: utils.URLPath, reference: utils.URLPath, model: utils.URLPath):
    data, meta, soms, model = map(utils.URLPath, [
        "/data/flowcat-data/mll-flowdata/decCLL-9F",
        "output/0-final-dataset/train.json.gz",
        "output/som-fix-test/soms-test/som_r4_1",
        "output/0-final/classifier-minmax-new",
    ])
    sommodel = utils.URLPath("output/som-fix-test/unjoined-ref")
    sommodel = io_functions.load_casesom(sommodel)

    output = utils.URLPath("output/0-final/model-analysis/saliency")
    output.mkdir()
    dataset = io_functions.load_case_collection(data, meta)
    soms = som_dataset.SOMDataset.from_path(soms)
    model = SaliencySOMClassifier.load(model)
    val_dataset = model.get_validation_data(dataset)
    val_seq = model.create_sequence(soms)

    selected_labels = [
        "c3a6098bd5216c7d1f958396dd31bd6ef1646c18",
        "df726c162ed728c2886107e665ad931e5bf0baae",
        "3eb03bea6651c302ac013f187b288ee990889b29",
        "e539b3ec66b1c9d7a0aae1fbd37c19c7ac86a18c",
        "762a2a19d1913383f41ead7b5ef74a8133d67847",
        "bbfafb3d9053e212279aaada5faf23eddf4a5926",
        "9503bfad60524615a06613cfbffa3861fb66ede3",
    ]
    sel_dataset = dataset.filter(labels=selected_labels)

    # annotate each fcs point with saliency info
    session = tf.Session()
    bmu_calc = calculate_bmu_indexes()

    normalize = mpl.colors.Normalize(vmin=0, vmax=1)

    case = sel_dataset[0]
    for case in sel_dataset:
        case_output = output / f"{case.id}_g{case.group}"
        case_output.mkdir()
        print("Plotting", case)

        # plot som and saliency activations
        result = model.calculate_saliency(val_seq, case, case.group, maximization=False)

        xdata, _ = val_seq.get_batch_by_label([case.id])
        xdata = [x[0, ...] for x in xdata]

        for tube in ("1", "2", "3"):
            fig = plot_saliency_som_map(model, xdata, result, tube, ("CD45-KrOr", "SS INT LIN", "CD19-APCA750"))
            fig.savefig(str(case_output / f"t{tube}_overlay.png"))

            fig = plot_saliency_scatterplot(model, bmu_calc, session, case, tube, xdata, result, norm=normalize)
            fig.savefig(str(case_output / f"t{tube}_scatter_saliency.png"))

    for case in sel_dataset:
        case_output = output / f"maxall_{case.id}_g{case.group}"
        case_output.mkdir()
        print("Plotting", case)

        # plot som and saliency activations
        result = model.calculate_saliency(val_seq, case, case.group, maximization=False)
        for r in result:
            print("Max", np.max(r))

        xdata, _ = val_seq.get_batch_by_label([case.id])
        xdata = [x[0, ...] for x in xdata]

        for tube in ("1", "2", "3"):
            fig = plot_saliency_som_map(model, xdata, result, tube, ("CD45-KrOr", "SS INT LIN", "CD19-APCA750"))
            fig.savefig(str(case_output / f"t{tube}_overlay.png"))

            fig = plot_saliency_scatterplot(model, bmu_calc, session, case, tube, xdata, result, norm=normalize)
            fig.savefig(str(case_output / f"t{tube}_scatter_saliency.png"))

    # case_som = soms.get_labels([case.id]).iloc[0]
    hcls = val_dataset.filter(groups=["HCL"])
    from collections import defaultdict
    max_vals = defaultdict(lambda: defaultdict(list))
    mean_vals = defaultdict(lambda: defaultdict(list))
    for case in hcls:
        print(case)
        gradient = model.calculate_saliency(val_seq, case, case.group, maximization=False)
        for i, (tube, markers) in enumerate(model.config["tubes"].items()):
            tgrad = gradient[i]
            for i, marker in enumerate(markers["channels"]):
                mgrad = tgrad[:, :, i]
                gmax = np.max(mgrad)
                max_vals[tube][marker].append(gmax)
                gmean = np.mean(mgrad)
                mean_vals[tube][marker].append(gmean)
    max_markers = defaultdict(list)
    for tube, markers in model.config["tubes"].items():
        for marker in markers["channels"]:
            print("Max", tube, marker, np.mean(max_vals[tube][marker]))
            print("Mean", tube, marker, np.mean(mean_vals[tube][marker]))
            max_markers[tube].append((marker, np.mean(max_vals[tube][marker])))
Exemple #7
0
def main(data: utils.URLPath, reference: utils.URLPath, output: utils.URLPath):
    """
    """
    cases = io_functions.load_case_collection(data, data / data.name + ".json")
    default_settings = {
        "max_epochs": 4,
        "initial_learning_rate": 0.05,
        "end_learning_rate": 0.01,
        "batch_size": 50000,
        "initial_radius": 4,
        "end_radius": 1,
    }
    # settings = [
    #     ("learning_rate_001_0001", {"initial_learning_rate": 0.01, "end_learning_rate": 0.001}),
    #     ("learning_rate_001_001", {"initial_learning_rate": 0.01, "end_learning_rate": 0.01}),
    #     ("learning_rate_005_0001", {"initial_learning_rate": 0.05, "end_learning_rate": 0.001}),
    #     ("learning_rate_005_001", {"initial_learning_rate": 0.05, "end_learning_rate": 0.01}),
    #     ("learning_rate_005_005", {"initial_learning_rate": 0.05, "end_learning_rate": 0.05}),
    #     ("learning_rate_05_0001", {"initial_learning_rate": 0.5, "end_learning_rate": 0.001}),
    #     ("learning_rate_05_001", {"initial_learning_rate": 0.5, "end_learning_rate": 0.01}),
    #     ("learning_rate_05_01", {"initial_learning_rate": 0.5, "end_learning_rate": 0.1}),
    #     ("learning_rate_05_05", {"initial_learning_rate": 0.5, "end_learning_rate": 0.5}),
    # ]
    settings = [
        ("radius_24_1", {
            "initial_radius": 24,
            "end_radius": 1
        }),
        ("radius_24_2", {
            "initial_radius": 24,
            "end_radius": 2
        }),
        ("radius_24_1", {
            "initial_radius": 16,
            "end_radius": 1
        }),
        ("radius_16_2", {
            "initial_radius": 16,
            "end_radius": 2
        }),
        ("radius_8_1", {
            "initial_radius": 8,
            "end_radius": 1
        }),
        ("radius_8_2", {
            "initial_radius": 8,
            "end_radius": 2
        }),
        ("radius_4_1", {
            "initial_radius": 4,
            "end_radius": 1
        }),
        ("radius_4_2", {
            "initial_radius": 4,
            "end_radius": 2
        }),
    ]
    for name, setting in settings:
        model = io_functions.load_casesom(
            reference,
            **{
                **default_settings,
                **setting
            },
        )
        transform_data(cases, model, output / name)