Esempio n. 1
0
def train(data: utils.URLPath, output: utils.URLPath):
    """Train a new classifier using SOM data."""
    groups = GROUPS
    tubes = ("1", "2", "3")
    balance = {
        "CLL": 4000,
        "MBL": 2000,
        "MCL": 1000,
        "PL": 1000,
        "LPL": 1000,
        "MZL": 1000,
        "FL": 1000,
        "HCL": 1000,
        "normal": 6000,
    }
    mapping = None
    dataset = som_dataset.SOMDataset.from_path(data)
    train_dataset, validate_dataset = prepare_classifier_train_dataset(
        dataset, groups=groups, mapping=mapping, balance=balance)

    config = classifier.SOMClassifierConfig(
        **{
            "tubes": {tube: dataset.config[tube]
                      for tube in tubes},
            "groups": groups,
            "pad_width": 2,
            "mapping": mapping,
            "cost_matrix": None,
        })
    model = train_som_classifier(train_dataset, validate_dataset, config)

    model.save(output)
    model.save_information(output)
Esempio n. 2
0
def main(args):
    dataset = som_dataset.SOMDataset.from_path(args.input)
    val = args.val
    train = args.train
    OUTPUT = args.output
    PANEL = args.panel
    bal = args.bal

    # set the groups according to the panel
    if PANEL == "MLL":
        groups = GROUPS
    elif PANEL == "ERLANGEN":
        groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]
    else:
        groups = ["CLL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"]

    tubes = ("1")
    mapping = None

    balance = dict((key, bal) for key in groups)

    config = classifier.SOMClassifierConfig(
        **{
            "tubes": {tube: dataset.config[tube]
                      for tube in tubes},
            "groups": groups,
            "pad_width": 2,
            "mapping": mapping,
            "cost_matrix": None,
            "train_epochs": 20,
        })
    val = io_functions.load_json(val)
    validate_dataset = dataset.filter(labels=val)

    labels = io_functions.load_json(train)
    train_dataset = dataset.filter(labels=labels)

    train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset(
        train_dataset,
        split_ratio=0.9,
        groups=groups,
        mapping=mapping,
        balance=balance,
        val_dataset=validate_dataset)

    print(train_dataset.group_count)
    print(validate_dataset.group_count)

    model = fc_api.train_som_classifier(train_dataset, validate_dataset,
                                        config)

    model.save(OUTPUT)
    model.save_information(OUTPUT)
Esempio n. 3
0
def run_denovo(options, train_dataset, validate_dataset):
    config = options["config"]

    model = train_som_classifier(train_dataset, validate_dataset, config)

    output = utils.URLPath(options["output_path"])

    if validate_dataset:
        validate = model.create_sequence(validate_dataset, config.valid_batch_size)

        pred_arr, pred_labels = model.predict_generator(validate)
        true_labels = validate.true_labels
        pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels)
        io_functions.save_csv(pred_df, output / "preds.csv")
        io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json")
        generate_all_metrics(true_labels, pred_labels, config.mapping, output)

    model.save(output)
    model.save_information(output)

    keras.backend.clear_session()
    del model
Esempio n. 4
0
#tubes = ("1", "2")

mapping = None
dataset = som_dataset.SOMDataset.from_path(SOM_DATASET)

train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset(
    dataset, split_ratio=0.3, groups=groups, mapping=mapping, balance=None)
labels_dict = train_dataset.group_count

config = classifier.SOMClassifierConfig(
    **{
        "tubes": {tube: dataset.config[tube]
                  for tube in tubes},
        "groups": groups,
        "pad_width": 2,
        "mapping": mapping,
        "cost_matrix": None,
    })

class_weight = create_class_weight(labels_dict)
#class_weight = utils.classification_utils.calculate_group_weights(labels_dict)
class_weight = {i: class_weight.get(g, 1.0) for i, g in enumerate(groups)}
print(class_weight)
model = fc_api.train_som_classifier(train_dataset,
                                    validate_dataset,
                                    config,
                                    class_weights=class_weight)

model.save(OUTPUT)
model.save_information(OUTPUT)
Esempio n. 5
0
from flowcat.classifier import som_dataset
from flowcat.classifier.models import create_model_multi_input

#MARKERS = io_functions.load_json(utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/markers.json"))
SOM_DATASET = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F")
OUTPUT = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/Exp1")

LOGGER = utils.logs.setup_logging(None, "classify")

groups = ["MCL", "PL", "LPL", "MZL", "FL", "HCL"]
tubes = ("1")

mapping = None
dataset = som_dataset.SOMDataset.from_path(SOM_DATASET)
train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset(
    dataset, split_ratio=0.9, groups=groups, mapping=mapping, balance=None)

config = classifier.SOMClassifierConfig(
    **{
        "tubes": {tube: dataset.config[tube]
                  for tube in tubes},
        "groups": groups,
        "pad_width": 2,
        "mapping": mapping,
        "cost_matrix": None,
    })
model = fc_api.train_som_classifier(train_dataset, validate_dataset, config)

model.save(OUTPUT)
model.save_information(OUTPUT)