def train(data: utils.URLPath, output: utils.URLPath): """Train a new classifier using SOM data.""" groups = GROUPS tubes = ("1", "2", "3") balance = { "CLL": 4000, "MBL": 2000, "MCL": 1000, "PL": 1000, "LPL": 1000, "MZL": 1000, "FL": 1000, "HCL": 1000, "normal": 6000, } mapping = None dataset = som_dataset.SOMDataset.from_path(data) train_dataset, validate_dataset = prepare_classifier_train_dataset( dataset, groups=groups, mapping=mapping, balance=balance) config = classifier.SOMClassifierConfig( **{ "tubes": {tube: dataset.config[tube] for tube in tubes}, "groups": groups, "pad_width": 2, "mapping": mapping, "cost_matrix": None, }) model = train_som_classifier(train_dataset, validate_dataset, config) model.save(output) model.save_information(output)
def main(args): dataset = som_dataset.SOMDataset.from_path(args.input) val = args.val train = args.train OUTPUT = args.output PANEL = args.panel bal = args.bal # set the groups according to the panel if PANEL == "MLL": groups = GROUPS elif PANEL == "ERLANGEN": groups = ["CLL", "MBL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"] else: groups = ["CLL", "MCL", "LPL", "MZL", "FL", "HCL", "normal"] tubes = ("1") mapping = None balance = dict((key, bal) for key in groups) config = classifier.SOMClassifierConfig( **{ "tubes": {tube: dataset.config[tube] for tube in tubes}, "groups": groups, "pad_width": 2, "mapping": mapping, "cost_matrix": None, "train_epochs": 20, }) val = io_functions.load_json(val) validate_dataset = dataset.filter(labels=val) labels = io_functions.load_json(train) train_dataset = dataset.filter(labels=labels) train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset( train_dataset, split_ratio=0.9, groups=groups, mapping=mapping, balance=balance, val_dataset=validate_dataset) print(train_dataset.group_count) print(validate_dataset.group_count) model = fc_api.train_som_classifier(train_dataset, validate_dataset, config) model.save(OUTPUT) model.save_information(OUTPUT)
def run_denovo(options, train_dataset, validate_dataset): config = options["config"] model = train_som_classifier(train_dataset, validate_dataset, config) output = utils.URLPath(options["output_path"]) if validate_dataset: validate = model.create_sequence(validate_dataset, config.valid_batch_size) pred_arr, pred_labels = model.predict_generator(validate) true_labels = validate.true_labels pred_df = pd.DataFrame(pred_arr, columns=validate.binarizer.classes_, index=validate.dataset.labels) io_functions.save_csv(pred_df, output / "preds.csv") io_functions.save_json({"true": list(true_labels), "pred": list(pred_labels)}, output / "preds_labels.json") generate_all_metrics(true_labels, pred_labels, config.mapping, output) model.save(output) model.save_information(output) keras.backend.clear_session() del model
#tubes = ("1", "2") mapping = None dataset = som_dataset.SOMDataset.from_path(SOM_DATASET) train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset( dataset, split_ratio=0.3, groups=groups, mapping=mapping, balance=None) labels_dict = train_dataset.group_count config = classifier.SOMClassifierConfig( **{ "tubes": {tube: dataset.config[tube] for tube in tubes}, "groups": groups, "pad_width": 2, "mapping": mapping, "cost_matrix": None, }) class_weight = create_class_weight(labels_dict) #class_weight = utils.classification_utils.calculate_group_weights(labels_dict) class_weight = {i: class_weight.get(g, 1.0) for i, g in enumerate(groups)} print(class_weight) model = fc_api.train_som_classifier(train_dataset, validate_dataset, config, class_weights=class_weight) model.save(OUTPUT) model.save_information(OUTPUT)
from flowcat.classifier import som_dataset from flowcat.classifier.models import create_model_multi_input #MARKERS = io_functions.load_json(utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/markers.json")) SOM_DATASET = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F") OUTPUT = utils.URLPath("/data/flowcat-data/2020-04_merged_train/MLL9F/Exp1") LOGGER = utils.logs.setup_logging(None, "classify") groups = ["MCL", "PL", "LPL", "MZL", "FL", "HCL"] tubes = ("1") mapping = None dataset = som_dataset.SOMDataset.from_path(SOM_DATASET) train_dataset, validate_dataset = fc_api.prepare_classifier_train_dataset( dataset, split_ratio=0.9, groups=groups, mapping=mapping, balance=None) config = classifier.SOMClassifierConfig( **{ "tubes": {tube: dataset.config[tube] for tube in tubes}, "groups": groups, "pad_width": 2, "mapping": mapping, "cost_matrix": None, }) model = fc_api.train_som_classifier(train_dataset, validate_dataset, config) model.save(OUTPUT) model.save_information(OUTPUT)