Esempio n. 1
0
def main(data: utils.URLPath, kfold_dir: utils.URLPath, output: utils.URLPath):
    # dataset = io_functions.load_case_collection(data, meta)
    # dataset.set_data_path(utils.URLPath(""))

    dataset = som_dataset.SOMDataset.from_path(data)
    models = []
    dirs = next(os.walk(kfold_dir))[1]

    for dir in dirs:
        models.append(utils.URLPath(os.path.join(kfold_dir, dir)))

    aucs = []
    curves = []
    for i, model in enumerate(models):
        print(model)
        model = SOMClassifier.load(model)
        validate = model.get_validation_data(dataset)
        grps = validate.group_count
        groups = model.config.groups

        if len(grps.keys()) != len(groups):
            continue
        else:
            val_seq = model.create_sequence(validate)

            trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))])
            preds = np.array([p for p in model.model.predict_generator(val_seq)])

            auc, curve = create_roc_results(trues, preds, output / f"roc_n{i}", model)
            aucs.append(auc)
            curves.append(curve)

    compute_mean_ROC(curves, output)
Esempio n. 2
0
def main(
        data: utils.URLPath = None,
        model: utils.URLPath = None,
        preds: utils.URLPath = None,
        output: utils.URLPath = None,
):
    data = utils.URLPath("/data/flowcat-data/paper-cytometry/som/unused")
    dataset = io_functions.load_case_collection(data, data + ".json.gz")
    # output = utils.URLPath("/data/flowcat-data/paper-cytometry/tsne")
    output = utils.URLPath("teststuff_unused_style")
    output.mkdir()

    # predictions = io_functions.load_json(utils.URLPath("/data/flowcat-data/paper-cytometry/tsne/prediction.json"))
    model = SOMClassifier.load(utils.URLPath("/data/flowcat-data/paper-cytometry/classifier"))

    som_tsne(dataset, model, output)
Esempio n. 3
0
    def load(cls,
             path: str = None,
             ref_path: str = None,
             cls_path: str = None):
        """Load classifier from the given path, alternatively give a separate path for reference and classifier."""
        if path is not None:
            ref_path = utils.URLPath(path) / "reference"
            cls_path = utils.URLPath(path) / "classifier"
        elif ref_path is not None and cls_path is not None:
            ref_path = utils.URLPath(ref_path)
            cls_path = utils.URLPath(cls_path)
        else:
            raise ValueError(
                "Either path or ref_path and cls_path need to be set.")

        return cls(io_functions.load_casesom(ref_path),
                   SOMClassifier.load(cls_path), SOMSaliency.load(cls_path))
Esempio n. 4
0
def train_som_classifier(
    train_dataset: "CaseCollection",
    validate_dataset: "CaseCollection",
    config: SOMClassifierConfig = None,
    class_weights=None,
    model_fun: "Callable" = create_model_multi_input,
) -> "SOMClassifier":
    """Configure the dataset based on config and train a given model."""
    model = SOMClassifier(config)
    model.create_model(model_fun)

    train = model.create_sequence(train_dataset, config.train_batch_size)

    if validate_dataset is not None:
        validate = model.create_sequence(validate_dataset,
                                         config.valid_batch_size)
    else:
        validate = None

    model.train_generator(train,
                          validate,
                          epochs=config.train_epochs,
                          class_weight=class_weights)
    return model
Esempio n. 5
0
def run_transfer(options, train_dataset, validate_dataset):
    config = options["config"]

    base_model = models.load_model(options["base_model_path"])

    tl_model = create_tl_model(base_model, config)

    model = SOMClassifier(config, tl_model)
    train = model.create_sequence(train_dataset, config.train_batch_size)

    if validate_dataset is not None:
        validate = model.create_sequence(validate_dataset, config.valid_batch_size)
    else:
        validate = None

    model.train_generator(train, validate, epochs=config.train_epochs, class_weight=None)

    output = utils.URLPath(options["output_path"])

    if validate:
        pred_arr, pred_labels = model.predict_generator(validate)
        true_labels = validate.true_labels
        pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels)
        io_functions.save_csv(pred_df, output / "preds.csv")
        io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json")
        generate_all_metrics(true_labels, pred_labels, config.mapping, output)

    model.save(output)
    model.save_information(output)

    keras.backend.clear_session()
    del model
Esempio n. 6
0
def main(data: utils.URLPath, model: utils.URLPath, output: utils.URLPath):
    dataset = io_functions.load_case_collection(data, data + ".json")
    dataset.set_data_path(utils.URLPath(""))

    model = SOMClassifier.load(model)
    validate = model.get_validation_data(dataset)
    val_seq = model.create_sequence(validate)

    trues = np.concatenate([val_seq[i][1] for i in range(len(val_seq))])
    preds = np.array([p for p in model.model.predict_generator(val_seq)])

    create_roc_results(trues, preds, output / "roc", model)
    create_threshold_results(trues, preds, output / "threshold", model)

    # tsne of result vectors
    embedding_path = output / "embedding-preds"
    embedding_path.mkdir()

    pred_labels = val_seq.true_labels
    groups = model.config["groups"]
    groups.remove("normal")
    groups = ["normal", *groups]
    all_groups = groups + ["AML", "MM", "HCLv"]
    colors = sns.cubehelix_palette(len(all_groups), rot=4, dark=0.30)
    perplexity = 50

    # tsne of intermediate layers
    intermediate_model = keras.Model(
        inputs=model.model.input,
        outputs=model.model.get_layer("concatenate_1").output)
    intermed_preds = np.array(
        [p for p in intermediate_model.predict_generator(val_seq)])

    # unknown data
    udata = utils.URLPath("output/unknown-cohorts-processing/som/som")
    udataset = io_functions.load_case_collection(udata, udata + ".json")
    udataset.set_data_path(utils.URLPath(""))
    un_seq = model.create_sequence(udataset)
    intermed_upreds = np.array(
        [p for p in intermediate_model.predict_generator(un_seq)])

    all_intermed = np.concatenate((intermed_preds, intermed_upreds))
    all_labels = pred_labels + un_seq.true_labels

    umap_inter_all = UMAP(n_neighbors=30).fit_transform(all_intermed)
    plot_embedded(umap_inter_all, all_labels, all_groups,
                  colors=colors).savefig(str(embedding_path /
                                             f"umap_intermediate_all.png"),
                                         dpi=300)

    tsne_inter_all = manifold.TSNE(
        perplexity=perplexity).fit_transform(all_intermed)
    plot_embedded(
        tsne_inter_all, all_labels, all_groups, colors=colors).savefig(str(
            embedding_path / f"tsne_intermediate_all_p{perplexity}.png"),
                                                                       dpi=300)

    # create som tsne for known and unknown data
    all_cases = validate.cases + udataset.cases

    case_data = []
    for case in all_cases:
        somdata = np.concatenate([
            case.get_tube(tube, kind="som").get_data().data
            for tube in model.config["tubes"]
        ],
                                 axis=2).flatten()
        case_data.append(somdata)
    case_data = np.array(case_data)

    perplexity = 50
    umap_som_all = UMAP(n_neighbors=30).fit_transform(case_data)
    plot_embedded(umap_som_all, all_labels, all_groups, colors=colors).savefig(
        str(embedding_path / f"umap_som_all.png"), dpi=300)

    tsne_som_all = manifold.TSNE(
        perplexity=perplexity).fit_transform(case_data)
    plot_embedded(tsne_som_all, all_labels, all_groups, colors=colors).savefig(
        str(embedding_path / f"tsne_som_all_p{perplexity}.png"), dpi=300)

    # plot legend
    fig = plt.figure()
    patches = [
        mpl.patches.Patch(color=color, label=group)
        for group, color in zip(all_groups, colors)
    ]
    fig.legend(patches, all_groups, loc='center', frameon=False)
    fig.savefig(str(embedding_path / "legend.png"), dpi=300)
Esempio n. 7
0
def main(args):
    dataset = som_dataset.SOMDataset.from_path(args.input)
    val = args.val
    train = args.train
    OUTPUT = args.output
    PANEL = args.panel
    basemodel = args.basemodel
    bal = args.bal

    # set the groups according to the panel
    if PANEL == "MLL":
        groups = GROUPS
    elif PANEL == "ERLANGEN":
        groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]
    else:
        groups = ["CLL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]

    tubes = ("1")
    mapping = None

    balance = dict((key, bal) for key in groups)

    config = classifier.SOMClassifierConfig(
        **{
            "tubes": {tube: dataset.config[tube]
                      for tube in tubes},
            "groups": groups,
            "pad_width": 2,
            "mapping": mapping,
            "cost_matrix": None,
            "train_epochs": 15,
        })
    val = io_functions.load_json(val)
    validate_dataset = dataset.filter(labels=val)

    labels = io_functions.load_json(train)
    train_dataset = dataset.filter(labels=labels)

    train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset(
        train_dataset,
        split_ratio=0.9,
        groups=groups,
        mapping=mapping,
        balance=balance,
        val_dataset=validate_dataset)

    print(train_dataset.group_count)
    print(validate_dataset.group_count)

    # load base model and get weights
    base_model = models.load_model(str(basemodel / "model.h5"))
    weights = base_model.get_weights()

    # create model
    model = create_model(config.inputs, config.output)

    model.set_weights(weights)

    # freeze 2 dense layers: check for each dataset
    model.get_layer('dense_1').trainable = False
    model.get_layer('dense_2').trainable = False

    model.compile(loss=config.get_loss(modeldir=None),
                  optimizer="adam",
                  metrics=["accuracy"])

    # cast to SOMConfig instance
    model = SOMClassifier(config, model)

    train = model.create_sequence(train_dataset, config.train_batch_size)

    if validate_dataset is not None:
        validate = model.create_sequence(validate_dataset,
                                         config.valid_batch_size)
    else:
        validate = None

    model.train_generator(train,
                          validate,
                          epochs=config.train_epochs,
                          class_weight=None)

    model.save(OUTPUT)
    model.save_information(OUTPUT)
Esempio n. 8
0
    # create model
    model = create_model(config.inputs, config.output)

    model.set_weights(weights)

    # freeze 2 dense layers: check for each dataset
    model.get_layer('dense_1').trainable = False
    model.get_layer('dense_2').trainable = False

    model.compile(loss=config.get_loss(modeldir=None),
                  optimizer="adam",
                  metrics=["accuracy"])

    # cast to SOMConfig instance
    model = SOMClassifier(config, model)

    train = model.create_sequence(train_dataset, config.train_batch_size)

    if validate_dataset is not None:
        validate = model.create_sequence(validate_dataset,
                                         config.valid_batch_size)
    else:
        validate = None

    model.train_generator(train,
                          validate,
                          epochs=config.train_epochs,
                          class_weight=None)

    model.save(OUTPUT)