Exemple #1
0
def main():
    # dataset = io_functions.load_case_collection(
    #     utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"),
    #     utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/test.json.gz")
    # )
    dataset = io_functions.load_case_collection(
        utils.URLPath("/data/flowcat-data/paper-cytometry/unused-data"), )

    LOGGER.info("Anonymizing dataset: %s", dataset)

    OUTPUT = utils.URLPath(
        "/data/flowcat-data/paper-cytometry-resubmit/unused_data_anonymized")

    data_dir = OUTPUT / "data"
    data_dir.mkdir()

    for case in dataset:
        # if case.id != "ffc59330acb49e6fcf5e679dbabcd01e56991345":
        #     continue

        for sample in case.samples:
            old_path = sample.complete_path
            new_path = data_dir / sample.path

            LOGGER.info("Saving %s sample to %s", case.id, new_path)

            new_path.parent.mkdir()
            anon_move(str(old_path), str(new_path))
Exemple #2
0
 def test_concatenation(self):
     cases = [("a", "b", "a/b"), ("/c", "d", "/c/d"),
              ("file:///a", "telnet", "file:///a/telnet")]
     for part_a, part_b, expected in cases:
         url_a = utils.URLPath(part_a)
         url_b = utils.URLPath(part_b)
         result = url_a / url_b
         self.assertEqual(str(result), expected)
def main():
    output = utils.URLPath("output/4-flowsom-cmp/retrain_figures")
    output.mkdir()
    data = utils.URLPath("output/4-flowsom-cmp/retrain_tests_32_learning_rate")
    # data = utils.URLPath("output/4-flowsom-cmp/retrain_tests_32_radius")

    datasets = load_datasets(data)
    groups = mappings.GROUPS
    tube = "1"
    group = "CLL"

    joined_datasets = merged_data(datasets, group, tube)
    # plot_hexplot_datasets(joined_datasets, ("CD45-KrOr", "SS INT LIN"), output / "radius_cll_cd_45_ss.png")
    plot_hexplot_datasets(joined_datasets, ("CD20-PC7", "CD5-PacBlue"),
                          output / "learn_rate_cd20_cd5.png")
Exemple #4
0
def run_transfer(options, train_dataset, validate_dataset):
    config = options["config"]

    base_model = models.load_model(options["base_model_path"])

    tl_model = create_tl_model(base_model, config)

    model = SOMClassifier(config, tl_model)
    train = model.create_sequence(train_dataset, config.train_batch_size)

    if validate_dataset is not None:
        validate = model.create_sequence(validate_dataset, config.valid_batch_size)
    else:
        validate = None

    model.train_generator(train, validate, epochs=config.train_epochs, class_weight=None)

    output = utils.URLPath(options["output_path"])

    if validate:
        pred_arr, pred_labels = model.predict_generator(validate)
        true_labels = validate.true_labels
        pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels)
        io_functions.save_csv(pred_df, output / "preds.csv")
        io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json")
        generate_all_metrics(true_labels, pred_labels, config.mapping, output)

    model.save(output)
    model.save_information(output)

    keras.backend.clear_session()
    del model
Exemple #5
0
def sampleinfo_to_sample(sample_info: dict, case_id: str,
                         dataset_path: utils.URLPath) -> "Sample":
    """Create a tube sample from sample info dict."""
    assert "fcs" in sample_info and "path" in sample_info[
        "fcs"], "Path to sample_info is missing"
    assert "date" in sample_info, "Date is missing"
    path = utils.URLPath(sample_info["fcs"]["path"])
    date = utils.str_to_date(sample_info["date"])

    tube = str(sample_info.get("tube", "0"))
    material = Material.from_str(sample_info.get("material", ""))
    panel = sample_info.get("panel", "")

    markers = sample_info["fcs"].get("markers", None)
    count = int(sample_info["fcs"].get("event_count", 0)) or None

    sample_id = f"{case_id}_t{tube}_{material.name}_{sample_info['date']}"

    sample = FCSSample(id=sample_id,
                       case_id=case_id,
                       path=path,
                       dataset_path=dataset_path,
                       date=date,
                       tube=tube,
                       material=material,
                       panel=panel,
                       markers=markers,
                       count=count)
    return sample
Exemple #6
0
def main(
        data: utils.URLPath = None,
        model: utils.URLPath = None,
        preds: utils.URLPath = None,
        output: utils.URLPath = None,
):
    data = utils.URLPath("/data/flowcat-data/paper-cytometry/som/unused")
    dataset = io_functions.load_case_collection(data, data + ".json.gz")
    # output = utils.URLPath("/data/flowcat-data/paper-cytometry/tsne")
    output = utils.URLPath("teststuff_unused_style")
    output.mkdir()

    # predictions = io_functions.load_json(utils.URLPath("/data/flowcat-data/paper-cytometry/tsne/prediction.json"))
    model = SOMClassifier.load(utils.URLPath("/data/flowcat-data/paper-cytometry/classifier"))

    som_tsne(dataset, model, output)
Exemple #7
0
def main(data: utils.URLPath, kfold_dir: utils.URLPath, output: utils.URLPath):
    # dataset = io_functions.load_case_collection(data, meta)
    # dataset.set_data_path(utils.URLPath(""))

    dataset = som_dataset.SOMDataset.from_path(data)
    models = []
    dirs = next(os.walk(kfold_dir))[1]

    for dir in dirs:
        models.append(utils.URLPath(os.path.join(kfold_dir, dir)))

    aucs = []
    curves = []
    for i, model in enumerate(models):
        print(model)
        model = SOMClassifier.load(model)
        validate = model.get_validation_data(dataset)
        grps = validate.group_count
        groups = model.config.groups

        if len(grps.keys()) != len(groups):
            continue
        else:
            val_seq = model.create_sequence(validate)

            trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))])
            preds = np.array([p for p in model.model.predict_generator(val_seq)])

            auc, curve = create_roc_results(trues, preds, output / f"roc_n{i}", model)
            aucs.append(auc)
            curves.append(curve)

    compute_mean_ROC(curves, output)
Exemple #8
0
    def load(cls,
             path: str = None,
             ref_path: str = None,
             cls_path: str = None):
        """Load classifier from the given path, alternatively give a separate path for reference and classifier."""
        if path is not None:
            ref_path = utils.URLPath(path) / "reference"
            cls_path = utils.URLPath(path) / "classifier"
        elif ref_path is not None and cls_path is not None:
            ref_path = utils.URLPath(ref_path)
            cls_path = utils.URLPath(cls_path)
        else:
            raise ValueError(
                "Either path or ref_path and cls_path need to be set.")

        return cls(io_functions.load_casesom(ref_path),
                   SOMClassifier.load(cls_path), SOMSaliency.load(cls_path))
Exemple #9
0
def json_to_fcssample(samplejson: dict) -> "FCSSample":
    samplejson["date"] = utils.str_to_date(samplejson["date"])
    samplejson["path"] = utils.URLPath(samplejson["path"])
    if samplejson["material"]:
        samplejson["material"] = Material[samplejson["material"]]
    else:
        samplejson["material"] = None
    return FCSSample(**samplejson)
Exemple #10
0
 def test_addition(self):
     cases = [
         ("testfile", "as", "testfileas"),
         ("/a/", "test",
          "/atest"),  # trailing slashes will get removed on creation
         ("/file", ".lmd", "/file.lmd"),
     ]
     for part_a, part_b, expected in cases:
         result = utils.URLPath(part_a) + part_b
         self.assertEqual(str(result), expected)
Exemple #11
0
 def test_urls(self):
     cases = [
         ("a", "", ""),
         ("https://a", "https", "a"),
         ("https://dest.de/a", "https", "dest.de"),
     ]
     for url, scheme, netloc in cases:
         result = utils.URLPath(url)
         self.assertEqual(result._scheme, scheme)
         self.assertEqual(result._netloc, netloc)
Exemple #12
0
def reconfigure_som_model(som_model: CaseSom, args: dict) -> CaseSom:
    """Reconfigure SOM by saving a copy and loading it again."""
    tmp_path = utils.URLPath("/tmp/flowcat/sommodel")

    io_functions.save_casesom(som_model, tmp_path)
    reconfigured_model = io_functions.load_casesom(tmp_path, **args)

    rmtree(str(tmp_path))

    return reconfigured_model
Exemple #13
0
    def test_wrapping(self):
        @cast_urlpath
        def testfun(a: utils.URLPath = None, b: str = None):
            return a, b

        res = testfun(utils.URLPath("a"), "b")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun("a", "b")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun("a", b="b")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun(a=utils.URLPath("b"), b="a")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun(a="b", b="a")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)
Exemple #14
0
def run_denovo(options, train_dataset, validate_dataset):
    config = options["config"]

    model = train_som_classifier(train_dataset, validate_dataset, config)

    output = utils.URLPath(options["output_path"])

    if validate_dataset:
        validate = model.create_sequence(validate_dataset, config.valid_batch_size)

        pred_arr, pred_labels = model.predict_generator(validate)
        true_labels = validate.true_labels
        pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels)
        io_functions.save_csv(pred_df, output / "preds.csv")
        io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json")
        generate_all_metrics(true_labels, pred_labels, config.mapping, output)

    model.save(output)
    model.save_information(output)

    keras.backend.clear_session()
    del model
"""
from dataclasses import dataclass
import pandas as pd
import matplotlib as mpl
mpl.use("Agg")
import matplotlib.pyplot as plt

import seaborn as sns
import scipy.stats as sst

from flowcat import utils, io_functions

NAME = "result_analysis_removeedge"

RESULTS = {
    "path": utils.URLPath("output"),
    "names": ["classifier_ungated", "classifier_gated_removeedge"],
}

OUTPUT = utils.URLPath(f"output/{NAME}")

LOGGER = utils.setup_logging(utils.URLPath(f"logs/{NAME}_{utils.create_stamp()}"), NAME)

def get_result_dirs(path: utils.URLPath, names: list):
    """Get result directories for individual iterations from given path and names"""
    result_dirs = {
        name: Metrics(list(map(Result, path.glob(f"./{name}*")))) for name in names
    }
    return result_dirs

import matplotlib
matplotlib.use("Agg")

from flowcat import io_functions, utils
from flowcat.plots import som as fc_somplot

LOGPATH = utils.URLPath("logs/visualize_datasets_{utils.create_stamp()}.log")

LOGGER = utils.logs.setup_logging(LOGPATH, "visualize_datasets")
OUTPUT = utils.URLPath("output/visualization/soms-ungated")

# OUTPUT.mkdir()
# 
# som_dataset = io_functions.load_case_collection(utils.URLPath("output/classifier_ungated/som"))
# 
# # testsample = som_dataset[0].samples[0]
# 
# for case in som_dataset.filter(groups=["CLL"]):
#     testsample = case.get_tube("1", kind="som")
#     LOGGER.info(testsample)
#     somdata = testsample.get_data()
#     fig = fc_somplot.plot_som_grid(somdata, channels=["SS INT LIN", "CD45-KrOr", None])
#     fig.savefig(str(OUTPUT / f"test_{case.id}.png"))

OUTPUT = utils.URLPath("output/visualization/soms-original")
som_dataset = io_functions.load_case_collection(utils.URLPath("/data/flowcat-data/paper-cytometry/som/train"), utils.URLPath("/data/flowcat-data/paper-cytometry/som/train.json.gz"))
OUTPUT.mkdir()
for case in som_dataset.filter(groups=["CLL"]):
    testsample = case.get_tube("1", kind="som")
    LOGGER.info(testsample)
    somdata = testsample.get_data()
Exemple #17
0

def print_usage():
    """print syntax of script invocation"""
    print("\nUsage:")
    print("python {0:} SOM_datapath outputpath panel(Erlangen, Bonn, MLL,"
          "or Berlin)\n".format(os.path.basename(sys.argv[0])))
    return


if __name__ == "__main__":
    if len(sys.argv) != 5:
        print_usage()
        raise Exception("Invalid arguments")

    SOM_DATASET = utils.URLPath(sys.argv[1])
    OUTPUT = utils.URLPath(sys.argv[2])
    PANEL = sys.argv[3]
    EPOCHS = int(sys.argv[4])

    LOGGER = utils.logs.setup_logging(None, "merged model")

    # set the groups according to the panel
    if panel == "MLL":
        groups = GROUPS

    elif panel == "ERLANGEN":
        groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]

    else:
        groups = ["CLL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]
def main(data: utils.URLPath, model: utils.URLPath, output: utils.URLPath):
    dataset = io_functions.load_case_collection(data, data + ".json")
    dataset.set_data_path(utils.URLPath(""))

    model = SOMClassifier.load(model)
    validate = model.get_validation_data(dataset)
    val_seq = model.create_sequence(validate)

    trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))])
    preds = np.array([p for p in model.model.predict_generator(val_seq)])

    create_roc_results(trues, preds, output / "roc", model)
    create_threshold_results(trues, preds, output / "threshold", model)

    # tsne of result vectors
    embedding_path = output / "embedding-preds"
    embedding_path.mkdir()

    pred_labels = val_seq.true_labels
    groups = model.config["groups"]
    groups.remove("normal")
    groups = ["normal", *groups]
    all_groups = groups + ["AML", "MM", "HCLv"]
    colors = sns.cubehelix_palette(len(all_groups), rot=4, dark=0.30)
    perplexity = 50

    # tsne of intermediate layers
    intermediate_model = keras.Model(
        inputs=model.model.input,
        outputs=model.model.get_layer("concatenate_1").output)
    intermed_preds = np.array(
        [p for p in intermediate_model.predict_generator(val_seq)])

    # unknown data
    udata = utils.URLPath("output/unknown-cohorts-processing/som/som")
    udataset = io_functions.load_case_collection(udata, udata + ".json")
    udataset.set_data_path(utils.URLPath(""))
    un_seq = model.create_sequence(udataset)
    intermed_upreds = np.array(
        [p for p in intermediate_model.predict_generator(un_seq)])

    all_intermed = np.concatenate((intermed_preds, intermed_upreds))
    all_labels = pred_labels + un_seq.true_labels

    umap_inter_all = UMAP(n_neighbors=30).fit_transform(all_intermed)
    plot_embedded(umap_inter_all, all_labels, all_groups,
                  colors=colors).savefig(str(embedding_path /
                                             f"umap_intermediate_all.png"),
                                         dpi=300)

    tsne_inter_all = manifold.TSNE(
        perplexity=perplexity).fit_transform(all_intermed)
    plot_embedded(
        tsne_inter_all, all_labels, all_groups, colors=colors).savefig(str(
            embedding_path / f"tsne_intermediate_all_p{perplexity}.png"),
                                                                       dpi=300)

    # create som tsne for known and unknown data
    all_cases = validate.cases + udataset.cases

    case_data = []
    for case in all_cases:
        somdata = np.concatenate([
            case.get_tube(tube, kind="som").get_data().data
            for tube in model.config["tubes"]
        ],
                                 axis=2).flatten()
        case_data.append(somdata)
    case_data = np.array(case_data)

    perplexity = 50
    umap_som_all = UMAP(n_neighbors=30).fit_transform(case_data)
    plot_embedded(umap_som_all, all_labels, all_groups, colors=colors).savefig(
        str(embedding_path / f"umap_som_all.png"), dpi=300)

    tsne_som_all = manifold.TSNE(
        perplexity=perplexity).fit_transform(case_data)
    plot_embedded(tsne_som_all, all_labels, all_groups, colors=colors).savefig(
        str(embedding_path / f"tsne_som_all_p{perplexity}.png"), dpi=300)

    # plot legend
    fig = plt.figure()
    patches = [
        mpl.patches.Patch(color=color, label=group)
        for group, color in zip(all_groups, colors)
    ]
    fig.legend(patches, all_groups, loc='center', frameon=False)
    fig.savefig(str(embedding_path / "legend.png"), dpi=300)
import math


def create_class_weight(labels_dict, mu=0.15):
    total = np.sum(list(labels_dict.values()))
    keys = labels_dict.keys()
    class_weight = dict()

    for key in keys:
        score = math.log(mu * total / float(labels_dict[key]))
        class_weight[key] = score if score > 1.0 else 1.0

    return class_weight


SOM_DATASET = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL5F")
OUTPUT = utils.URLPath(
    "/data/flowcat-data/2020-04_merged_train/TL/class_weights/mu_30/model_30")


def create_class_weight(labels_dict, mu=0.30):
    total = np.sum(list(labels_dict.values()))
    keys = labels_dict.keys()
    class_weight = dict()

    for key in keys:
        score = math.log(mu * total / float(labels_dict[key]))
        class_weight[key] = score if score > 1.0 else 1.0

    return class_weight
Exemple #20
0
 def save(self, path: str):
     """Save the current model into the given path."""
     path = utils.URLPath(path)
     path.mkdir()
     io_functions.save_casesom(self.reference, path / "reference")
     self.classifier.save(path / path / "classifier")
Exemple #21
0
def json_to_somsample(samplejson: dict) -> "SOMSample":
    samplejson["date"] = utils.str_to_date(samplejson["date"])
    samplejson["path"] = utils.URLPath(samplejson["path"])
    samplejson["dims"] = tuple(samplejson["dims"])
    return SOMSample(**samplejson)
Exemple #22
0
   model : base - 9F all CLL and normal samples
           target - 5F - increasing sample size (start with very few samples)

2) groups - only rare subtypes ( no CLL, MBl, normal)
   model : base - 9F all rare subtypes samples
           target - 5F - increasing sample size (start with very few samples)
"""

from flowcat import classifier, utils, io_functions
from flowcat.constants import DEFAULT_CLASSIFIER_CONFIG, GROUPS, DEFAULT_CLASSIFIER_ARGS
from flowcat import flowcat_api as fc_api
from flowcat.classifier import som_dataset
from flowcat.classifier.models import create_model_multi_input

#MARKERS = io_functions.load_json(utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/markers.json"))
SOM_DATASET = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F")
OUTPUT = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/Exp1")

LOGGER = utils.logs.setup_logging(None, "classify")

groups = ["MCL", "PL", "LPL", "MZL", "FL", "HCL"]
tubes = ("1")

mapping = None
dataset = som_dataset.SOMDataset.from_path(SOM_DATASET)
train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset(
    dataset, split_ratio=0.9, groups=groups, mapping=mapping, balance=None)

config = classifier.SOMClassifierConfig(
    **{
        "tubes": {tube: dataset.config[tube]
Exemple #23
0
def main(data: utils.URLPath, meta: utils.URLPath, reference: utils.URLPath,
         model: utils.URLPath):
    data, meta, soms, model = map(utils.URLPath, [
        "/data/flowcat-data/mll-flowdata/decCLL-9F",
        "output/0-final-dataset/train.json.gz",
        "output/som-fix-test/soms-test/som_r4_1",
        "output/0-final/classifier-minmax-new",
    ])
    dataset = io_functions.load_case_collection(data, meta)
    soms = som_dataset.SOMDataset.from_path(soms)
    model = SaliencySOMClassifier.load(model)
    val_dataset = model.get_validation_data(dataset)
    val_seq = model.create_sequence(soms)

    # printing out weights and biases, unsure whether they actually contain
    # information
    # in theory we could extend that to attempt to describe them as gates
    tube = "3"
    weights, biases = model.model.layers[int(tube) + 2].get_weights()
    for j, chname in enumerate(model.config["tubes"][tube]["channels"]):
        ch_mean_weight = np.mean(weights[:, :, j, :])
        print(j, chname, ch_mean_weight)

    for i in range(weights.shape[-1]):
        mean_weight = np.mean(weights[:, :, :, i])
        print(i, mean_weight, biases[i])
        for j, chname in enumerate(model.config["tubes"]["1"]["channels"]):
            print(i, j, chname)
            print(weights[:, :, j, i])

    # zero out specific columns and see how that impacts performance
    output = utils.URLPath("output/0-final/model-analysis/occlusion")
    for group in model.config["groups"]:
        print(group)
        sel_cases = val_dataset.filter(groups=[group])
        avg_results = model.channel_occlusion(sel_cases, val_seq)
        print(sorted(avg_results, key=lambda t: t[2], reverse=True))
        io_functions.save_json(avg_results, output / f"{group}_avg_std.json")

    # case_som = soms.get_labels([case.id]).iloc[0]
    hcls = val_dataset.filter(groups=["HCL"])
    from collections import defaultdict
    max_vals = defaultdict(lambda: defaultdict(list))
    mean_vals = defaultdict(lambda: defaultdict(list))
    for case in hcls:
        print(case)
        gradient = model.calculate_saliency(val_seq,
                                            case,
                                            case.group,
                                            maximization=False)
        for i, (tube, markers) in enumerate(model.config["tubes"].items()):
            tgrad = gradient[i]
            for i, marker in enumerate(markers["channels"]):
                mgrad = tgrad[:, :, i]
                gmax = np.max(mgrad)
                max_vals[tube][marker].append(gmax)
                gmean = np.mean(mgrad)
                mean_vals[tube][marker].append(gmean)
    max_markers = defaultdict(list)
    for tube, markers in model.config["tubes"].items():
        for marker in markers["channels"]:
            print("Max", tube, marker, np.mean(max_vals[tube][marker]))
            print("Mean", tube, marker, np.mean(mean_vals[tube][marker]))
            max_markers[tube].append((marker, np.mean(max_vals[tube][marker])))

    for tube in model.config["tubes"]:
        print("Tube", tube)
        print("\n".join(": ".join((t[0], str(t[1]))) for t in sorted(
            max_markers[tube], key=lambda t: t[1], reverse=True)))

    c_model = MLLDATA / "mll-sommaps/models/relunet_samplescaled_sommap_6class/model_0.h5"
    c_labels = MLLDATA / "mll-sommaps/output/relunet_samplescaled_sommap_6class/test_labels.json"
    c_preds = MLLDATA / "mll-sommaps/models/relunet_samplescaled_sommap_6class/predictions_0.csv"
    c_config = MLLDATA / "mll-sommaps/output/relunet_samplescaled_sommap_6class/config.json"
    c_cases = MLLDATA / "mll-flowdata/CLL-9F"
    c_sommaps = MLLDATA / "mll-sommaps/sample_maps/selected1_toroid_s32"
    c_misclass = MLLDATA / "mll-sommaps/misclassifications/"
    c_tube = [1, 2]

    # load datasets
    somdataset = sd.SOMDataset.from_path(c_sommaps)
    cases = cc.CaseCollection.from_path(c_cases, how="case_info.json")

    # filter datasets
    test_labels = flowutils.load_json(c_labels)

    filtered_cases = cases.filter(labels=test_labels)
    somdataset.data[1] = somdataset.data[1].loc[test_labels, :]

    # get mapping
    config = flowutils.load_json(c_config)
    groupinfo = mappings.GROUP_MAPS[config["c_groupmap"]]

    dataset = cd.CombinedDataset(filtered_cases, {
        dd.Dataset.from_str('SOM'): somdataset,
        dd.Dataset.from_str('FCS'): filtered_cases
    },
                                 group_names=groupinfo['groups'])

    # modify mapping
    dataset.set_mapping(groupinfo)

    xoutputs = [
        loaders.loader_builder(
            loaders.Map2DLoader.create_inferred,
            tube=1,
            sel_count="counts",
            pad_width=1,
        ),
        loaders.loader_builder(
            loaders.Map2DLoader.create_inferred,
            tube=2,
            sel_count="counts",
            pad_width=1,
        )
    ]

    dataset = loaders.DatasetSequence.from_data(dataset,
                                                xoutputs,
                                                batch_size=1,
                                                draw_method="sequential")

    predictions = pd.read_csv(c_preds, index_col=0)

    predictions = add_correct_magnitude(predictions)
    predictions = add_infiltration(predictions, cases)

    misclass_labels = ['507777582649cbed8dfb3fe552a6f34f8b6c28e3']

    for label in misclass_labels:
        label_path = pathlib.Path(f"{c_misclass}/{label}")
        if not label_path.exists():
            label_path.mkdir()

        case = cases.get_label(label)

        #get the actual and the predicited class
        corr_group = predictions.loc[case.id, "correct"]
        pred_group = predictions.loc[case.id, "pred"]
        classes = [corr_group, pred_group]

        gradients = plotting.calc_saliency(dataset,
                                           case,
                                           c_model,
                                           classes=classes)

        for tube in c_tube:

            heatmaps = plotting.draw_saliency_heatmap(case, gradients, classes,
                                                      tube)
            for idx, heatmap in enumerate(heatmaps):
                plotting.save_figure(
                    heatmap,
                    f"{c_misclass}/{label}/{classes[idx]}_tube_{tube}_saliency_heatmap.png"
                )

            scatterplots = plotting.plot_tube(case,
                                              tube,
                                              gradients[tube - 1],
                                              classes=classes,
                                              sommappath=c_sommaps)
            for idx, scatterplot in enumerate(scatterplots):
                plotting.save_figure(
                    scatterplot,
                    f"{c_misclass}/{label}/{classes[idx]}_tube_{tube}_scatterplots.png"
                )
Exemple #24
0
            "base_model_path": str(base_model_path / "model.h5"),
            "output_path": output_path / f"kfold_n{n}",
            "config": classifier.SOMClassifierConfig(**{
                "tubes": {tube: dataset.config[tube] for tube in tubes},
                "groups": groups,
                "pad_width": 2,
                "mapping": mapping,
                "cost_matrix": None,
                "train_epochs": 15,
            })
        }
        run_transfer(options, train_dataset, validate_dataset)


if __name__ == "__main__":
    OUTPUT = utils.URLPath("/data/flowcat-data/2021-01_kfold_n10_startified")
    LOGGER = utils.logs.setup_logging(OUTPUT / "logs.txt", "merged model with TL")
    experiments = {
        "mll5f": {
            "output_path": OUTPUT / "mll5f",
            "som_dataset_path": "/data/flowcat-data/2020_Nov_rerun/Merged_SOM/MLL5F",
            "panel": "MLL",
            "base_model_path": "/data/flowcat-data/2020_Nov_rerun/Merged_model/MLL9F",
            "k_number": 10,
            "rerun": False,
            "stratified": False,
        },
        "bonn": {
            "output_path": OUTPUT / "bonn",
            "som_dataset_path": "/data/flowcat-data/2020_Nov_rerun/Merged_SOM/Bonn/with_9F_ref",
            "panel": "BONN",
        utils.logs.create_handler(utils.logs.print_stream()),
    ]


def setup_logging(logging_path, name):
    logging_path.parent.mkdir()

    logger = logging.getLogger(name)
    handlers = create_logging_handlers(logging_path)
    utils.logs.add_logger(logger, handlers)
    return logger


INPUT = {
    "data":
    utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"),
    "meta":
    utils.URLPath(
        "/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/train.json.gz"
    ),
}

OUTPUT = utils.URLPath("output/samples")

LOGPATH = utils.URLPath(f"logs/filter_samples_{utils.create_stamp()}.log")

LOGGER = setup_logging(LOGPATH, "filter_samples")

fc_seed.set_seed(42)
OUTPUT.mkdir()
train_dataset = io_functions.load_case_collection(INPUT["data"], INPUT["meta"])
Exemple #26
0

def print_usage():
    """print syntax of script invocation"""
    print("\nUsage:")
    print("python {0:} SOM_datapath outputpath panel(Erlangen, Bonn, MLL,"
          "or Berlin) basemodel_path\n".format(os.path.basename(sys.argv[0])))
    return


if __name__ == "__main__":
    if len(sys.argv) != 6:
        print_usage()
        raise Exception("Invalid arguments")

    SOM_DATASET = utils.URLPath(sys.argv[1])
    OUTPUT = utils.URLPath(sys.argv[2])
    PANEL = sys.argv[3]
    BASE_MODEL_PATH = utils.URLPath(sys.argv[4])
    EPOCHS = int(sys.argv[5])

    LOGGER = utils.logs.setup_logging(None, "merged model with TL")

    # set the groups according to the panel
    if panel == "MLL":
        groups = GROUPS

    elif panel == "ERLANGEN":
        groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]

    else:
Exemple #27
0
def load_case_collection(data: str, meta: str = None):
    data = utils.URLPath(data)
    if meta is not None:
        meta = utils.URLPath(meta)

    return io_functions.load_case_collection(data, meta)
Exemple #28
0
 def __init__(self, path, *args, **kwargs):
     super().__init__(*args, **kwargs)
     path = utils.URLPath(path)
     self.data = case_dataset.CaseCollection.from_path(path)
from collections import defaultdict
from flowcat import io_functions, utils, seed as fc_seed

INPUT = {
    "data":
    utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"),
    "meta":
    utils.URLPath(
        "/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/train.json.gz"
    ),
    "meta_test":
    utils.URLPath(
        "/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/test.json.gz"
    ),
}

train_dataset = io_functions.load_case_collection(INPUT["data"],
                                                  INPUT["meta_test"])
sorted_cases = sorted(train_dataset,
                      key=lambda c: c.infiltration
                      if c.infiltration > 0.0 else 1000)

perc01_count = 0
group_count = defaultdict(int)
for case in sorted_cases[:100]:
    print("Minimal infiltration sample:", case, case.infiltration)
    if case.infiltration == 0.1:
        perc01_count += 1
        group_count[case.group] += 1

print(perc01_count)
Exemple #30
0
"""
Acquire FCS information needed for Miflowcyt document.

Also roughly check whether we have strongly diverging data in our dataset.
"""
from flowcat import dataset as fc_dataset, io_functions, utils
import fcsparser


def section(text, level=4, deco="#"):
    deco_text = deco * level
    section_text = f"{deco_text} {text} {deco_text}"
    print(section_text)


train_dataset = io_functions.load_case_collection(utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"), utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/train.json.gz"))
test_dataset = io_functions.load_case_collection(utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"), utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/test.json.gz"))

print("Loading all data used in paper analysis.")
dataset = train_dataset + test_dataset
print(dataset)

section("Get info for case 0")
case = dataset[0]
print(case)

sample = case.samples[0]
meta, data = fcsparser.parse(sample.complete_path)
for i in range(1, 13):
    name = f"$P{i}S"
    voltage = f"$P{i}V"