Example #1
0
def main():
    # dataset = io_functions.load_case_collection(
    #     utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"),
    #     utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/test.json.gz")
    # )
    dataset = io_functions.load_case_collection(
        utils.URLPath("/data/flowcat-data/paper-cytometry/unused-data"), )

    LOGGER.info("Anonymizing dataset: %s", dataset)

    OUTPUT = utils.URLPath(
        "/data/flowcat-data/paper-cytometry-resubmit/unused_data_anonymized")

    data_dir = OUTPUT / "data"
    data_dir.mkdir()

    for case in dataset:
        # if case.id != "ffc59330acb49e6fcf5e679dbabcd01e56991345":
        #     continue

        for sample in case.samples:
            old_path = sample.complete_path
            new_path = data_dir / sample.path

            LOGGER.info("Saving %s sample to %s", case.id, new_path)

            new_path.parent.mkdir()
            anon_move(str(old_path), str(new_path))
Example #2
0
 def test_concatenation(self):
     cases = [("a", "b", "a/b"), ("/c", "d", "/c/d"),
              ("file:///a", "telnet", "file:///a/telnet")]
     for part_a, part_b, expected in cases:
         url_a = utils.URLPath(part_a)
         url_b = utils.URLPath(part_b)
         result = url_a / url_b
         self.assertEqual(str(result), expected)
Example #3
0
def main():
    output = utils.URLPath("output/4-flowsom-cmp/retrain_figures")
    output.mkdir()
    data = utils.URLPath("output/4-flowsom-cmp/retrain_tests_32_learning_rate")
    # data = utils.URLPath("output/4-flowsom-cmp/retrain_tests_32_radius")

    datasets = load_datasets(data)
    groups = mappings.GROUPS
    tube = "1"
    group = "CLL"

    joined_datasets = merged_data(datasets, group, tube)
    # plot_hexplot_datasets(joined_datasets, ("CD45-KrOr", "SS INT LIN"), output / "radius_cll_cd_45_ss.png")
    plot_hexplot_datasets(joined_datasets, ("CD20-PC7", "CD5-PacBlue"),
                          output / "learn_rate_cd20_cd5.png")
Example #4
0
def run_transfer(options, train_dataset, validate_dataset):
    config = options["config"]

    base_model = models.load_model(options["base_model_path"])

    tl_model = create_tl_model(base_model, config)

    model = SOMClassifier(config, tl_model)
    train = model.create_sequence(train_dataset, config.train_batch_size)

    if validate_dataset is not None:
        validate = model.create_sequence(validate_dataset, config.valid_batch_size)
    else:
        validate = None

    model.train_generator(train, validate, epochs=config.train_epochs, class_weight=None)

    output = utils.URLPath(options["output_path"])

    if validate:
        pred_arr, pred_labels = model.predict_generator(validate)
        true_labels = validate.true_labels
        pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels)
        io_functions.save_csv(pred_df, output / "preds.csv")
        io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json")
        generate_all_metrics(true_labels, pred_labels, config.mapping, output)

    model.save(output)
    model.save_information(output)

    keras.backend.clear_session()
    del model
Example #5
0
def sampleinfo_to_sample(sample_info: dict, case_id: str,
                         dataset_path: utils.URLPath) -> "Sample":
    """Create a tube sample from sample info dict."""
    assert "fcs" in sample_info and "path" in sample_info[
        "fcs"], "Path to sample_info is missing"
    assert "date" in sample_info, "Date is missing"
    path = utils.URLPath(sample_info["fcs"]["path"])
    date = utils.str_to_date(sample_info["date"])

    tube = str(sample_info.get("tube", "0"))
    material = Material.from_str(sample_info.get("material", ""))
    panel = sample_info.get("panel", "")

    markers = sample_info["fcs"].get("markers", None)
    count = int(sample_info["fcs"].get("event_count", 0)) or None

    sample_id = f"{case_id}_t{tube}_{material.name}_{sample_info['date']}"

    sample = FCSSample(id=sample_id,
                       case_id=case_id,
                       path=path,
                       dataset_path=dataset_path,
                       date=date,
                       tube=tube,
                       material=material,
                       panel=panel,
                       markers=markers,
                       count=count)
    return sample
Example #6
0
def main(
        data: utils.URLPath = None,
        model: utils.URLPath = None,
        preds: utils.URLPath = None,
        output: utils.URLPath = None,
):
    data = utils.URLPath("/data/flowcat-data/paper-cytometry/som/unused")
    dataset = io_functions.load_case_collection(data, data + ".json.gz")
    # output = utils.URLPath("/data/flowcat-data/paper-cytometry/tsne")
    output = utils.URLPath("teststuff_unused_style")
    output.mkdir()

    # predictions = io_functions.load_json(utils.URLPath("/data/flowcat-data/paper-cytometry/tsne/prediction.json"))
    model = SOMClassifier.load(utils.URLPath("/data/flowcat-data/paper-cytometry/classifier"))

    som_tsne(dataset, model, output)
Example #7
0
def main(data: utils.URLPath, kfold_dir: utils.URLPath, output: utils.URLPath):
    # dataset = io_functions.load_case_collection(data, meta)
    # dataset.set_data_path(utils.URLPath(""))

    dataset = som_dataset.SOMDataset.from_path(data)
    models = []
    dirs = next(os.walk(kfold_dir))[1]

    for dir in dirs:
        models.append(utils.URLPath(os.path.join(kfold_dir, dir)))

    aucs = []
    curves = []
    for i, model in enumerate(models):
        print(model)
        model = SOMClassifier.load(model)
        validate = model.get_validation_data(dataset)
        grps = validate.group_count
        groups = model.config.groups

        if len(grps.keys()) != len(groups):
            continue
        else:
            val_seq = model.create_sequence(validate)

            trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))])
            preds = np.array([p for p in model.model.predict_generator(val_seq)])

            auc, curve = create_roc_results(trues, preds, output / f"roc_n{i}", model)
            aucs.append(auc)
            curves.append(curve)

    compute_mean_ROC(curves, output)
Example #8
0
    def load(cls,
             path: str = None,
             ref_path: str = None,
             cls_path: str = None):
        """Load classifier from the given path, alternatively give a separate path for reference and classifier."""
        if path is not None:
            ref_path = utils.URLPath(path) / "reference"
            cls_path = utils.URLPath(path) / "classifier"
        elif ref_path is not None and cls_path is not None:
            ref_path = utils.URLPath(ref_path)
            cls_path = utils.URLPath(cls_path)
        else:
            raise ValueError(
                "Either path or ref_path and cls_path need to be set.")

        return cls(io_functions.load_casesom(ref_path),
                   SOMClassifier.load(cls_path), SOMSaliency.load(cls_path))
Example #9
0
def json_to_fcssample(samplejson: dict) -> "FCSSample":
    samplejson["date"] = utils.str_to_date(samplejson["date"])
    samplejson["path"] = utils.URLPath(samplejson["path"])
    if samplejson["material"]:
        samplejson["material"] = Material[samplejson["material"]]
    else:
        samplejson["material"] = None
    return FCSSample(**samplejson)
Example #10
0
 def test_addition(self):
     cases = [
         ("testfile", "as", "testfileas"),
         ("/a/", "test",
          "/atest"),  # trailing slashes will get removed on creation
         ("/file", ".lmd", "/file.lmd"),
     ]
     for part_a, part_b, expected in cases:
         result = utils.URLPath(part_a) + part_b
         self.assertEqual(str(result), expected)
Example #11
0
 def test_urls(self):
     cases = [
         ("a", "", ""),
         ("https://a", "https", "a"),
         ("https://dest.de/a", "https", "dest.de"),
     ]
     for url, scheme, netloc in cases:
         result = utils.URLPath(url)
         self.assertEqual(result._scheme, scheme)
         self.assertEqual(result._netloc, netloc)
Example #12
0
def reconfigure_som_model(som_model: CaseSom, args: dict) -> CaseSom:
    """Reconfigure SOM by saving a copy and loading it again."""
    tmp_path = utils.URLPath("/tmp/flowcat/sommodel")

    io_functions.save_casesom(som_model, tmp_path)
    reconfigured_model = io_functions.load_casesom(tmp_path, **args)

    rmtree(str(tmp_path))

    return reconfigured_model
Example #13
0
    def test_wrapping(self):
        @cast_urlpath
        def testfun(a: utils.URLPath = None, b: str = None):
            return a, b

        res = testfun(utils.URLPath("a"), "b")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun("a", "b")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun("a", b="b")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun(a=utils.URLPath("b"), b="a")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)

        res = testfun(a="b", b="a")
        self.assertEqual(type(res[0]), utils.URLPath)
        self.assertNotEqual(type(res[1]), utils.URLPath)
Example #14
0
def run_denovo(options, train_dataset, validate_dataset):
    config = options["config"]

    model = train_som_classifier(train_dataset, validate_dataset, config)

    output = utils.URLPath(options["output_path"])

    if validate_dataset:
        validate = model.create_sequence(validate_dataset, config.valid_batch_size)

        pred_arr, pred_labels = model.predict_generator(validate)
        true_labels = validate.true_labels
        pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels)
        io_functions.save_csv(pred_df, output / "preds.csv")
        io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json")
        generate_all_metrics(true_labels, pred_labels, config.mapping, output)

    model.save(output)
    model.save_information(output)

    keras.backend.clear_session()
    del model
Example #15
0
"""
from dataclasses import dataclass
import pandas as pd
import matplotlib as mpl
mpl.use("Agg")
import matplotlib.pyplot as plt

import seaborn as sns
import scipy.stats as sst

from flowcat import utils, io_functions

NAME = "result_analysis_removeedge"

RESULTS = {
    "path": utils.URLPath("output"),
    "names": ["classifier_ungated", "classifier_gated_removeedge"],
}

OUTPUT = utils.URLPath(f"output/{NAME}")

LOGGER = utils.setup_logging(utils.URLPath(f"logs/{NAME}_{utils.create_stamp()}"), NAME)

def get_result_dirs(path: utils.URLPath, names: list):
    """Get result directories for individual iterations from given path and names"""
    result_dirs = {
        name: Metrics(list(map(Result, path.glob(f"./{name}*")))) for name in names
    }
    return result_dirs

import matplotlib
matplotlib.use("Agg")

from flowcat import io_functions, utils
from flowcat.plots import som as fc_somplot

LOGPATH = utils.URLPath("logs/visualize_datasets_{utils.create_stamp()}.log")

LOGGER = utils.logs.setup_logging(LOGPATH, "visualize_datasets")
OUTPUT = utils.URLPath("output/visualization/soms-ungated")

# OUTPUT.mkdir()
# 
# som_dataset = io_functions.load_case_collection(utils.URLPath("output/classifier_ungated/som"))
# 
# # testsample = som_dataset[0].samples[0]
# 
# for case in som_dataset.filter(groups=["CLL"]):
#     testsample = case.get_tube("1", kind="som")
#     LOGGER.info(testsample)
#     somdata = testsample.get_data()
#     fig = fc_somplot.plot_som_grid(somdata, channels=["SS INT LIN", "CD45-KrOr", None])
#     fig.savefig(str(OUTPUT / f"test_{case.id}.png"))

OUTPUT = utils.URLPath("output/visualization/soms-original")
som_dataset = io_functions.load_case_collection(utils.URLPath("/data/flowcat-data/paper-cytometry/som/train"), utils.URLPath("/data/flowcat-data/paper-cytometry/som/train.json.gz"))
OUTPUT.mkdir()
for case in som_dataset.filter(groups=["CLL"]):
    testsample = case.get_tube("1", kind="som")
    LOGGER.info(testsample)
    somdata = testsample.get_data()
Example #17
0

def print_usage():
    """print syntax of script invocation"""
    print("\nUsage:")
    print("python {0:} SOM_datapath outputpath panel(Erlangen, Bonn, MLL,"
          "or Berlin)\n".format(os.path.basename(sys.argv[0])))
    return


if __name__ == "__main__":
    if len(sys.argv) != 5:
        print_usage()
        raise Exception("Invalid arguments")

    SOM_DATASET = utils.URLPath(sys.argv[1])
    OUTPUT = utils.URLPath(sys.argv[2])
    PANEL = sys.argv[3]
    EPOCHS = int(sys.argv[4])

    LOGGER = utils.logs.setup_logging(None, "merged model")

    # set the groups according to the panel
    if panel == "MLL":
        groups = GROUPS

    elif panel == "ERLANGEN":
        groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]

    else:
        groups = ["CLL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]
Example #18
0
def main(data: utils.URLPath, model: utils.URLPath, output: utils.URLPath):
    dataset = io_functions.load_case_collection(data, data + ".json")
    dataset.set_data_path(utils.URLPath(""))

    model = SOMClassifier.load(model)
    validate = model.get_validation_data(dataset)
    val_seq = model.create_sequence(validate)

    trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))])
    preds = np.array([p for p in model.model.predict_generator(val_seq)])

    create_roc_results(trues, preds, output / "roc", model)
    create_threshold_results(trues, preds, output / "threshold", model)

    # tsne of result vectors
    embedding_path = output / "embedding-preds"
    embedding_path.mkdir()

    pred_labels = val_seq.true_labels
    groups = model.config["groups"]
    groups.remove("normal")
    groups = ["normal", *groups]
    all_groups = groups + ["AML", "MM", "HCLv"]
    colors = sns.cubehelix_palette(len(all_groups), rot=4, dark=0.30)
    perplexity = 50

    # tsne of intermediate layers
    intermediate_model = keras.Model(
        inputs=model.model.input,
        outputs=model.model.get_layer("concatenate_1").output)
    intermed_preds = np.array(
        [p for p in intermediate_model.predict_generator(val_seq)])

    # unknown data
    udata = utils.URLPath("output/unknown-cohorts-processing/som/som")
    udataset = io_functions.load_case_collection(udata, udata + ".json")
    udataset.set_data_path(utils.URLPath(""))
    un_seq = model.create_sequence(udataset)
    intermed_upreds = np.array(
        [p for p in intermediate_model.predict_generator(un_seq)])

    all_intermed = np.concatenate((intermed_preds, intermed_upreds))
    all_labels = pred_labels + un_seq.true_labels

    umap_inter_all = UMAP(n_neighbors=30).fit_transform(all_intermed)
    plot_embedded(umap_inter_all, all_labels, all_groups,
                  colors=colors).savefig(str(embedding_path /
                                             f"umap_intermediate_all.png"),
                                         dpi=300)

    tsne_inter_all = manifold.TSNE(
        perplexity=perplexity).fit_transform(all_intermed)
    plot_embedded(
        tsne_inter_all, all_labels, all_groups, colors=colors).savefig(str(
            embedding_path / f"tsne_intermediate_all_p{perplexity}.png"),
                                                                       dpi=300)

    # create som tsne for known and unknown data
    all_cases = validate.cases + udataset.cases

    case_data = []
    for case in all_cases:
        somdata = np.concatenate([
            case.get_tube(tube, kind="som").get_data().data
            for tube in model.config["tubes"]
        ],
                                 axis=2).flatten()
        case_data.append(somdata)
    case_data = np.array(case_data)

    perplexity = 50
    umap_som_all = UMAP(n_neighbors=30).fit_transform(case_data)
    plot_embedded(umap_som_all, all_labels, all_groups, colors=colors).savefig(
        str(embedding_path / f"umap_som_all.png"), dpi=300)

    tsne_som_all = manifold.TSNE(
        perplexity=perplexity).fit_transform(case_data)
    plot_embedded(tsne_som_all, all_labels, all_groups, colors=colors).savefig(
        str(embedding_path / f"tsne_som_all_p{perplexity}.png"), dpi=300)

    # plot legend
    fig = plt.figure()
    patches = [
        mpl.patches.Patch(color=color, label=group)
        for group, color in zip(all_groups, colors)
    ]
    fig.legend(patches, all_groups, loc='center', frameon=False)
    fig.savefig(str(embedding_path / "legend.png"), dpi=300)
Example #19
0
import math


def create_class_weight(labels_dict, mu=0.15):
    total = np.sum(list(labels_dict.values()))
    keys = labels_dict.keys()
    class_weight = dict()

    for key in keys:
        score = math.log(mu * total / float(labels_dict[key]))
        class_weight[key] = score if score > 1.0 else 1.0

    return class_weight


SOM_DATASET = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL5F")
OUTPUT = utils.URLPath(
    "/data/flowcat-data/2020-04_merged_train/TL/class_weights/mu_30/model_30")


def create_class_weight(labels_dict, mu=0.30):
    total = np.sum(list(labels_dict.values()))
    keys = labels_dict.keys()
    class_weight = dict()

    for key in keys:
        score = math.log(mu * total / float(labels_dict[key]))
        class_weight[key] = score if score > 1.0 else 1.0

    return class_weight
Example #20
0
 def save(self, path: str):
     """Save the current model into the given path."""
     path = utils.URLPath(path)
     path.mkdir()
     io_functions.save_casesom(self.reference, path / "reference")
     self.classifier.save(path / path / "classifier")
Example #21
0
def json_to_somsample(samplejson: dict) -> "SOMSample":
    samplejson["date"] = utils.str_to_date(samplejson["date"])
    samplejson["path"] = utils.URLPath(samplejson["path"])
    samplejson["dims"] = tuple(samplejson["dims"])
    return SOMSample(**samplejson)
Example #22
0
   model : base - 9F all CLL and normal samples
           target - 5F - increasing sample size (start with very few samples)

2) groups - only rare subtypes ( no CLL, MBl, normal)
   model : base - 9F all rare subtypes samples
           target - 5F - increasing sample size (start with very few samples)
"""

from flowcat import classifier, utils, io_functions
from flowcat.constants import DEFAULT_CLASSIFIER_CONFIG, GROUPS, DEFAULT_CLASSIFIER_ARGS
from flowcat import flowcat_api as fc_api
from flowcat.classifier import som_dataset
from flowcat.classifier.models import create_model_multi_input

#MARKERS = io_functions.load_json(utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/markers.json"))
SOM_DATASET = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F")
OUTPUT = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/Exp1")

LOGGER = utils.logs.setup_logging(None, "classify")

groups = ["MCL", "PL", "LPL", "MZL", "FL", "HCL"]
tubes = ("1")

mapping = None
dataset = som_dataset.SOMDataset.from_path(SOM_DATASET)
train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset(
    dataset, split_ratio=0.9, groups=groups, mapping=mapping, balance=None)

config = classifier.SOMClassifierConfig(
    **{
        "tubes": {tube: dataset.config[tube]
Example #23
0
def main(data: utils.URLPath, meta: utils.URLPath, reference: utils.URLPath,
         model: utils.URLPath):
    data, meta, soms, model = map(utils.URLPath, [
        "/data/flowcat-data/mll-flowdata/decCLL-9F",
        "output/0-final-dataset/train.json.gz",
        "output/som-fix-test/soms-test/som_r4_1",
        "output/0-final/classifier-minmax-new",
    ])
    dataset = io_functions.load_case_collection(data, meta)
    soms = som_dataset.SOMDataset.from_path(soms)
    model = SaliencySOMClassifier.load(model)
    val_dataset = model.get_validation_data(dataset)
    val_seq = model.create_sequence(soms)

    # printing out weights and biases, unsure whether they actually contain
    # information
    # in theory we could extend that to attempt to describe them as gates
    tube = "3"
    weights, biases = model.model.layers[int(tube) + 2].get_weights()
    for j, chname in enumerate(model.config["tubes"][tube]["channels"]):
        ch_mean_weight = np.mean(weights[:, :, j, :])
        print(j, chname, ch_mean_weight)

    for i in range(weights.shape[-1]):
        mean_weight = np.mean(weights[:, :, :, i])
        print(i, mean_weight, biases[i])
        for j, chname in enumerate(model.config["tubes"]["1"]["channels"]):
            print(i, j, chname)
            print(weights[:, :, j, i])

    # zero out specific columns and see how that impacts performance
    output = utils.URLPath("output/0-final/model-analysis/occlusion")
    for group in model.config["groups"]:
        print(group)
        sel_cases = val_dataset.filter(groups=[group])
        avg_results = model.channel_occlusion(sel_cases, val_seq)
        print(sorted(avg_results, key=lambda t: t[2], reverse=True))
        io_functions.save_json(avg_results, output / f"{group}_avg_std.json")

    # case_som = soms.get_labels([case.id]).iloc[0]
    hcls = val_dataset.filter(groups=["HCL"])
    from collections import defaultdict
    max_vals = defaultdict(lambda: defaultdict(list))
    mean_vals = defaultdict(lambda: defaultdict(list))
    for case in hcls:
        print(case)
        gradient = model.calculate_saliency(val_seq,
                                            case,
                                            case.group,
                                            maximization=False)
        for i, (tube, markers) in enumerate(model.config["tubes"].items()):
            tgrad = gradient[i]
            for i, marker in enumerate(markers["channels"]):
                mgrad = tgrad[:, :, i]
                gmax = np.max(mgrad)
                max_vals[tube][marker].append(gmax)
                gmean = np.mean(mgrad)
                mean_vals[tube][marker].append(gmean)
    max_markers = defaultdict(list)
    for tube, markers in model.config["tubes"].items():
        for marker in markers["channels"]:
            print("Max", tube, marker, np.mean(max_vals[tube][marker]))
            print("Mean", tube, marker, np.mean(mean_vals[tube][marker]))
            max_markers[tube].append((marker, np.mean(max_vals[tube][marker])))

    for tube in model.config["tubes"]:
        print("Tube", tube)
        print("\n".join(": ".join((t[0], str(t[1]))) for t in sorted(
            max_markers[tube], key=lambda t: t[1], reverse=True)))

    c_model = MLLDATA / "mll-sommaps/models/relunet_samplescaled_sommap_6class/model_0.h5"
    c_labels = MLLDATA / "mll-sommaps/output/relunet_samplescaled_sommap_6class/test_labels.json"
    c_preds = MLLDATA / "mll-sommaps/models/relunet_samplescaled_sommap_6class/predictions_0.csv"
    c_config = MLLDATA / "mll-sommaps/output/relunet_samplescaled_sommap_6class/config.json"
    c_cases = MLLDATA / "mll-flowdata/CLL-9F"
    c_sommaps = MLLDATA / "mll-sommaps/sample_maps/selected1_toroid_s32"
    c_misclass = MLLDATA / "mll-sommaps/misclassifications/"
    c_tube = [1, 2]

    # load datasets
    somdataset = sd.SOMDataset.from_path(c_sommaps)
    cases = cc.CaseCollection.from_path(c_cases, how="case_info.json")

    # filter datasets
    test_labels = flowutils.load_json(c_labels)

    filtered_cases = cases.filter(labels=test_labels)
    somdataset.data[1] = somdataset.data[1].loc[test_labels, :]

    # get mapping
    config = flowutils.load_json(c_config)
    groupinfo = mappings.GROUP_MAPS[config["c_groupmap"]]

    dataset = cd.CombinedDataset(filtered_cases, {
        dd.Dataset.from_str('SOM'): somdataset,
        dd.Dataset.from_str('FCS'): filtered_cases
    },
                                 group_names=groupinfo['groups'])

    # modify mapping
    dataset.set_mapping(groupinfo)

    xoutputs = [
        loaders.loader_builder(
            loaders.Map2DLoader.create_inferred,
            tube=1,
            sel_count="counts",
            pad_width=1,
        ),
        loaders.loader_builder(
            loaders.Map2DLoader.create_inferred,
            tube=2,
            sel_count="counts",
            pad_width=1,
        )
    ]

    dataset = loaders.DatasetSequence.from_data(dataset,
                                                xoutputs,
                                                batch_size=1,
                                                draw_method="sequential")

    predictions = pd.read_csv(c_preds, index_col=0)

    predictions = add_correct_magnitude(predictions)
    predictions = add_infiltration(predictions, cases)

    misclass_labels = ['507777582649cbed8dfb3fe552a6f34f8b6c28e3']

    for label in misclass_labels:
        label_path = pathlib.Path(f"{c_misclass}/{label}")
        if not label_path.exists():
            label_path.mkdir()

        case = cases.get_label(label)

        #get the actual and the predicited class
        corr_group = predictions.loc[case.id, "correct"]
        pred_group = predictions.loc[case.id, "pred"]
        classes = [corr_group, pred_group]

        gradients = plotting.calc_saliency(dataset,
                                           case,
                                           c_model,
                                           classes=classes)

        for tube in c_tube:

            heatmaps = plotting.draw_saliency_heatmap(case, gradients, classes,
                                                      tube)
            for idx, heatmap in enumerate(heatmaps):
                plotting.save_figure(
                    heatmap,
                    f"{c_misclass}/{label}/{classes[idx]}_tube_{tube}_saliency_heatmap.png"
                )

            scatterplots = plotting.plot_tube(case,
                                              tube,
                                              gradients[tube - 1],
                                              classes=classes,
                                              sommappath=c_sommaps)
            for idx, scatterplot in enumerate(scatterplots):
                plotting.save_figure(
                    scatterplot,
                    f"{c_misclass}/{label}/{classes[idx]}_tube_{tube}_scatterplots.png"
                )
Example #24
0
            "base_model_path": str(base_model_path / "model.h5"),
            "output_path": output_path / f"kfold_n{n}",
            "config": classifier.SOMClassifierConfig(**{
                "tubes": {tube: dataset.config[tube] for tube in tubes},
                "groups": groups,
                "pad_width": 2,
                "mapping": mapping,
                "cost_matrix": None,
                "train_epochs": 15,
            })
        }
        run_transfer(options, train_dataset, validate_dataset)


if __name__ == "__main__":
    OUTPUT = utils.URLPath("/data/flowcat-data/2021-01_kfold_n10_startified")
    LOGGER = utils.logs.setup_logging(OUTPUT / "logs.txt", "merged model with TL")
    experiments = {
        "mll5f": {
            "output_path": OUTPUT / "mll5f",
            "som_dataset_path": "/data/flowcat-data/2020_Nov_rerun/Merged_SOM/MLL5F",
            "panel": "MLL",
            "base_model_path": "/data/flowcat-data/2020_Nov_rerun/Merged_model/MLL9F",
            "k_number": 10,
            "rerun": False,
            "stratified": False,
        },
        "bonn": {
            "output_path": OUTPUT / "bonn",
            "som_dataset_path": "/data/flowcat-data/2020_Nov_rerun/Merged_SOM/Bonn/with_9F_ref",
            "panel": "BONN",
Example #25
0
        utils.logs.create_handler(utils.logs.print_stream()),
    ]


def setup_logging(logging_path, name):
    logging_path.parent.mkdir()

    logger = logging.getLogger(name)
    handlers = create_logging_handlers(logging_path)
    utils.logs.add_logger(logger, handlers)
    return logger


INPUT = {
    "data":
    utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"),
    "meta":
    utils.URLPath(
        "/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/train.json.gz"
    ),
}

OUTPUT = utils.URLPath("output/samples")

LOGPATH = utils.URLPath(f"logs/filter_samples_{utils.create_stamp()}.log")

LOGGER = setup_logging(LOGPATH, "filter_samples")

fc_seed.set_seed(42)
OUTPUT.mkdir()
train_dataset = io_functions.load_case_collection(INPUT["data"], INPUT["meta"])
Example #26
0

def print_usage():
    """print syntax of script invocation"""
    print("\nUsage:")
    print("python {0:} SOM_datapath outputpath panel(Erlangen, Bonn, MLL,"
          "or Berlin) basemodel_path\n".format(os.path.basename(sys.argv[0])))
    return


if __name__ == "__main__":
    if len(sys.argv) != 6:
        print_usage()
        raise Exception("Invalid arguments")

    SOM_DATASET = utils.URLPath(sys.argv[1])
    OUTPUT = utils.URLPath(sys.argv[2])
    PANEL = sys.argv[3]
    BASE_MODEL_PATH = utils.URLPath(sys.argv[4])
    EPOCHS = int(sys.argv[5])

    LOGGER = utils.logs.setup_logging(None, "merged model with TL")

    # set the groups according to the panel
    if panel == "MLL":
        groups = GROUPS

    elif panel == "ERLANGEN":
        groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]

    else:
Example #27
0
def load_case_collection(data: str, meta: str = None):
    data = utils.URLPath(data)
    if meta is not None:
        meta = utils.URLPath(meta)

    return io_functions.load_case_collection(data, meta)
Example #28
0
 def __init__(self, path, *args, **kwargs):
     super().__init__(*args, **kwargs)
     path = utils.URLPath(path)
     self.data = case_dataset.CaseCollection.from_path(path)
from collections import defaultdict
from flowcat import io_functions, utils, seed as fc_seed

INPUT = {
    "data":
    utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"),
    "meta":
    utils.URLPath(
        "/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/train.json.gz"
    ),
    "meta_test":
    utils.URLPath(
        "/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/test.json.gz"
    ),
}

train_dataset = io_functions.load_case_collection(INPUT["data"],
                                                  INPUT["meta_test"])
sorted_cases = sorted(train_dataset,
                      key=lambda c: c.infiltration
                      if c.infiltration > 0.0 else 1000)

perc01_count = 0
group_count = defaultdict(int)
for case in sorted_cases[:100]:
    print("Minimal infiltration sample:", case, case.infiltration)
    if case.infiltration == 0.1:
        perc01_count += 1
        group_count[case.group] += 1

print(perc01_count)
Example #30
0
"""
Acquire FCS information needed for Miflowcyt document.

Also roughly check whether we have strongly diverging data in our dataset.
"""
from flowcat import dataset as fc_dataset, io_functions, utils
import fcsparser


def section(text, level=4, deco="#"):
    deco_text = deco * level
    section_text = f"{deco_text} {text} {deco_text}"
    print(section_text)


train_dataset = io_functions.load_case_collection(utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"), utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/train.json.gz"))
test_dataset = io_functions.load_case_collection(utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F"), utils.URLPath("/data/flowcat-data/mll-flowdata/decCLL-9F.2019-10-29.meta/test.json.gz"))

print("Loading all data used in paper analysis.")
dataset = train_dataset + test_dataset
print(dataset)

section("Get info for case 0")
case = dataset[0]
print(case)

sample = case.samples[0]
meta, data = fcsparser.parse(sample.complete_path)
for i in range(1, 13):
    name = f"$P{i}S"
    voltage = f"$P{i}V"