Python setup_gpus Beispiele, utils.setup_gpus Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: eval.py Projekt: hixio-mh/DALI

def run_eval(args):
    logging.set_verbosity(logging.WARNING)

    args = utils.dict_to_namedtuple(args)

    config = hparams_config.get_efficientdet_config(args.model_name)
    config.override(args.hparams, allow_new_keys=True)
    config.image_size = utils.parse_image_size(config.image_size)

    params = dict(config.as_dict(), seed=None)

    logging.info(params)

    utils.setup_gpus()

    dataset = utils.get_dataset(args, 1, False, params, None)

    model = efficientdet_net.EfficientDetNet(params=params)
    model.compile()

    if args.weights:
        image_size = params["image_size"]
        model.predict(np.zeros((1, image_size[0], image_size[1], 3)))
        model.load_weights(args.weights)

    model.evaluate(dataset, steps=args.eval_steps)

Beispiel #2

0

Datei anzeigen

def run_augment_data(args: argparse.Namespace) -> None:
    """MAKEDOC: What is augment_data doing?"""
    logg = logging.getLogger(f"c.{__name__}.run_augment_data")
    logg.debug("Starting run_augment_data")

    # magic to fix the GPUs
    setup_gpus()

    augmentation_type = args.augmentation_type
    words_type = args.words_type
    force_augment = args.force_augment

    if augmentation_type == "2345":
        aug_type_list = ["aug02", "aug03", "aug04", "aug05"]
    elif augmentation_type == "6789":
        aug_type_list = ["aug06", "aug07", "aug08", "aug09"]
    elif augmentation_type == "10123":
        aug_type_list = ["aug10", "aug11", "aug12", "aug13"]
    elif augmentation_type == "14567":
        aug_type_list = ["aug14", "aug15", "aug16", "aug17"]
    elif augmentation_type == "auA1234":
        aug_type_list = ["auA01", "auA02", "auA03", "auA04"]
    elif augmentation_type == "auA5678":
        aug_type_list = ["auA05", "auA06", "auA07", "auA08"]
    else:
        aug_type_list = [augmentation_type]

    for at in aug_type_list:
        do_augmentation(at, words_type, force_augment)

Beispiel #3

0

Datei anzeigen

Datei: demo_stream.py Projekt: Pitrified/hda-speech-recog

def run_demo(args: argparse.Namespace) -> None:
    """TODO: What is demo doing?"""
    logg = logging.getLogger(f"c.{__name__}.run_demo")
    logg.debug("Starting run_demo")

    arch_type = args.arch_type
    train_words_type = args.train_words_type

    # magic to fix the GPUs
    setup_gpus()

    device = None

    device_info = sd.query_devices(device=device, kind="input")
    logg.debug(f"device_info: {device_info}")

    if train_words_type.endswith("LS"):
        window = 500
    else:
        window = 1000

    # window = 200
    # interval = 1000
    # interval = 30
    interval = args.interval
    # interval = 100
    # interval = 500
    # blocksize = 0
    samplerate_input = device_info["default_samplerate"]
    channels = [1]
    # block_duration = 50

    the_demo = Demo(
        device,
        window,
        interval,
        samplerate_input,
        channels,
        arch_type=arch_type,
        train_words_type=train_words_type,
    )

    the_demo.run()

Beispiel #4

0

Datei anzeigen

Datei: evaluation.py Projekt: miguelalba96/COVID19-detection

    def __init__(self,
                 model_name,
                 data_path,
                 explain=False,
                 save_predictions=False,
                 **kwargs):
        utils.setup_gpus()

        self.model_path = os.path.join('./trained_models', model_name,
                                       'frozen')

        print('Loading model from: {}'.format(self.model_path))

        self.model_name = model_name
        self.data = DataLoader(data_path, training=False).test_dataset()
        self.model = tf.keras.models.load_model(self.model_path)
        self.explain = explain
        self.outdir = os.path.join('./trained_models', model_name, 'results')
        self.class_names = ['normal', 'pneumonia', 'COVID-19']
        self.save_predictions = save_predictions
        utils.mdir(self.outdir)

Beispiel #5

0

Datei anzeigen

def do_stream_evaluation(
    architecture_type,
    which_dataset,
    train_words_type,
    sentence_wav_paths,
    sentence_norm_tra,
    good_sentences,
) -> None:
    r"""MAKEDOC: what is do_stream_evaluation doing?"""
    logg = logging.getLogger(f"c.{__name__}.do_stream_evaluation")
    # logg.setLevel("INFO")
    logg.debug("Start do_stream_evaluation")

    wav_IDs = list(sentence_wav_paths.keys())
    logg.debug(f"len(wav_IDs): {len(wav_IDs)}")

    good_count = 0
    bad_count = 0

    # magic to fix the GPUs
    setup_gpus()

    # load the model
    model, model_name = load_trained_model(architecture_type, which_dataset,
                                           train_words_type)

    # all_y_pred: ty.Dict[str, ty.List[float]] = {}

    ypred_folder = Path("plot_stream") / "y_pred" / model_name
    if not ypred_folder.exists():
        ypred_folder.mkdir(parents=True, exist_ok=True)

    for sentence_index in good_sentences:

        # get info for one sentence
        wav_ID = wav_IDs[sentence_index]
        logg.debug(
            f"\nsentence_index {sentence_index} / {len(good_sentences)-1}")
        orig_wav_path = sentence_wav_paths[wav_ID]
        logg.debug(f"sentence_wav_paths[{wav_ID}]: {orig_wav_path}")
        norm_tra = sentence_norm_tra[wav_ID]
        logg.debug(f"sentence_norm_tra[{wav_ID}]: {norm_tra}")

        # build the output path
        pred_name = f"{model_name}"
        pred_name += f"_{wav_ID}"
        pred_name += ".npy"
        logg.debug(f"pred_name SINGLE: {pred_name}")
        pred_path = ypred_folder / pred_name

        if pred_path.exists():
            logg.info(f"Already predicted {pred_path}")
            continue

        y_pred = evaluate_stream(
            model,
            which_dataset,
            train_words_type,
            architecture_type,
            model_name,
            orig_wav_path,
            norm_tra,
            wav_ID,
        )
        logg.debug(f"y_pred.shape: {y_pred.shape}")

        np.save(pred_path, y_pred)

        # all_y_pred[wav_ID] = y_pred.tolist()

        # wasgood = input()
        # if wasgood == "y":
        #     good_count += 1
        # else:
        #     bad_count += 1

    logg.debug(f"good_count: {good_count}")
    logg.debug(f"bad_count: {bad_count}")
    logg.debug(f"total: {bad_count+good_count}")

Beispiel #6

0

Datei anzeigen

def run_training(args):
    logging.set_verbosity(logging.WARNING)

    args = utils.dict_to_namedtuple(args)

    config = hparams_config.get_efficientdet_config(args.model_name)
    config.override(args.hparams, allow_new_keys=True)
    config.image_size = utils.parse_image_size(config.image_size)

    params = dict(
        config.as_dict(),
        seed=args.seed,
        batch_size=args.batch_size,
    )

    logging.info(params)

    if args.ckpt_dir:
        ckpt_dir = args.ckpt_dir
        if not tf.io.gfile.exists(ckpt_dir):
            tf.io.gfile.makedirs(ckpt_dir)
        config_file = os.path.join(ckpt_dir, "config.yaml")
        if not tf.io.gfile.exists(config_file):
            tf.io.gfile.GFile(config_file, "w").write(str(config))

    if params["seed"]:
        seed = params["seed"]
        os.environ["PYTHONHASHSEED"] = str(seed)
        tf.random.set_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        os.environ["TF_DETERMINISTIC_OPS"] = "1"
        os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

    utils.setup_gpus()

    num_devices = 1
    physical_devices = tf.config.list_physical_devices("GPU")
    multi_gpu = args.multi_gpu
    if multi_gpu is not None and len(multi_gpu) != 1 and len(
            physical_devices) > 1:
        devices = [f"GPU:{gpu}"
                   for gpu in multi_gpu] if len(multi_gpu) != 0 else None
        strategy = tf.distribute.MirroredStrategy(devices)
        num_devices = len(devices) if devices else len(physical_devices)
    else:
        strategy = tf.distribute.get_strategy()

    train_dataset = utils.get_dataset(
        args,
        args.batch_size * num_devices,
        True,
        params,
        strategy if num_devices > 1 else None,
    )

    if args.eval_after_training or args.eval_during_training:
        eval_dataset = utils.get_dataset(
            args,
            num_devices,
            False,
            params,
            strategy if num_devices > 1 else None,
        )
        options = tf.data.Options()
        options.experimental_distribute.auto_shard_policy = (
            tf.data.experimental.AutoShardPolicy.DATA)
        eval_dataset = eval_dataset.with_options(options)

    with strategy.scope():
        model = efficientdet_net.EfficientDetNet(params=params)

        global_batch_size = args.batch_size * strategy.num_replicas_in_sync
        model.compile(optimizer=optimizers.get_optimizer(
            params, args.epochs, global_batch_size, args.train_steps))

        initial_epoch = args.initial_epoch
        if args.start_weights:
            image_size = params["image_size"]
            model.predict(np.zeros((1, image_size[0], image_size[1], 3)))
            model.load_weights(args.start_weights)
            fname = args.start_weights.split("/")[-1]
            ckpt_pattern = f"{args.model_name}\.(\d\d+)\.h5"
            match = re.match(ckpt_pattern, fname)
            if match:
                initial_epoch = int(match.group(1).lstrip("0"))

        callbacks = []

        if args.ckpt_dir:
            ckpt_dir = args.ckpt_dir
            if not tf.io.gfile.exists(ckpt_dir):
                tf.io.gfile.makedirs(tensorboard_dir)
            callbacks.append(
                tf.keras.callbacks.ModelCheckpoint(
                    filepath=os.path.join(
                        ckpt_dir, "".join([args.model_name,
                                           ".{epoch:02d}.h5"])),
                    save_weights_only=True,
                ))

        if args.log_dir:
            log_dir = args.log_dir
            if not tf.io.gfile.exists(log_dir):
                tf.io.gfile.makedirs(log_dir)
            callbacks.append(
                tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                               update_freq="epoch"))

        model.fit(
            train_dataset,
            epochs=args.epochs,
            steps_per_epoch=args.train_steps,
            initial_epoch=initial_epoch,
            callbacks=callbacks,
            validation_data=eval_dataset
            if args.eval_during_training else None,
            validation_steps=args.eval_steps,
            validation_freq=args.eval_freq,
        )

        if args.eval_after_training:
            print("Evaluation after training:")
            model.evaluate(eval_dataset, steps=args.eval_steps)

        model.save_weights(args.output_filename)

Beispiel #7

0

Datei anzeigen

Datei: train_attention.py Projekt: Pitrified/hda-speech-recog

def find_best_lr(hypa: ty.Dict[str, str]) -> None:
    """MAKEDOC: what is find_best_lr doing?"""
    logg = logging.getLogger(f"c.{__name__}.find_best_lr")
    # logg.setLevel("INFO")
    logg.debug("Start find_best_lr")

    # get the word list
    words = words_types[hypa["words_type"]]
    num_labels = len(words)

    # load data
    processed_folder = Path("data_proc")
    processed_path = processed_folder / f"{hypa['dataset_name']}"
    data, labels = load_processed(processed_path, words)

    # no need for validation
    x = np.concatenate((data["training"], data["validation"]))
    y = np.concatenate((labels["training"], labels["validation"]))

    # the shape of each sample
    input_shape = data["training"][0].shape

    # from hypa extract model param
    model_param = get_model_param_attention(hypa, num_labels, input_shape)

    # magic to fix the GPUs
    setup_gpus()

    model = AttentionModel(**model_param)
    # model.summary()

    start_lr = 1e-9
    end_lr = 1e1

    batch_size_types = {"01": 32, "02": 16}
    batch_size = batch_size_types[hypa["batch_size_type"]]

    epoch_num_types = {"01": 15, "02": 30, "03": 2}
    epoch_num = epoch_num_types[hypa["epoch_num_type"]]

    optimizer_types = {"a1": Adam(), "r1": RMSprop()}
    opt = optimizer_types[hypa["optimizer_type"]]

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # find the best values
    lrf = LearningRateFinder(model)
    lrf.find((x, y), start_lr, end_lr, epochs=epoch_num, batchSize=batch_size)

    model_name = build_attention_name(hypa, False)

    fig_title = "LR_sweep"
    fig_title += f"_bs{batch_size}"
    fig_title += f"_en{epoch_num}"
    fig_title += f"__{model_name}"
    fig, ax = plt.subplots(figsize=(8, 8))

    # get the plot
    lrf.plot_loss(ax=ax, title=fig_title)

    # save the plot
    plot_fol = Path("plot_results") / "att" / "find_best_lr"
    if not plot_fol.exists():
        plot_fol.mkdir(parents=True, exist_ok=True)
    fig_name = fig_title + ".{}"
    fig.savefig(plot_fol / fig_name.format("png"))
    fig.savefig(plot_fol / fig_name.format("pdf"))

    # TODO: save the loss history

    plt.show()

Beispiel #8

0

Datei anzeigen

Datei: train_attention.py Projekt: Pitrified/hda-speech-recog

def train_attention(hypa: ty.Dict[str, str], force_retrain: bool,
                    use_validation: bool) -> None:
    """MAKEDOC: what is train_attention doing?"""
    logg = logging.getLogger(f"c.{__name__}.train_attention")
    # logg.setLevel("INFO")
    logg.debug("Start train_attention")

    # build the model name
    model_name = build_attention_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_folder = Path("trained_models") / "attention"
    if not model_folder.exists():
        model_folder.mkdir(parents=True, exist_ok=True)
    model_path = model_folder / f"{model_name}.h5"
    placeholder_path = model_folder / f"{model_name}.txt"

    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    info_folder = Path("info") / "attention" / model_name
    if not info_folder.exists():
        info_folder.mkdir(parents=True, exist_ok=True)

    # get the word list
    words = words_types[hypa["words_type"]]
    num_labels = len(words)

    # load data
    processed_folder = Path("data_proc")
    processed_path = processed_folder / f"{hypa['dataset_name']}"
    data, labels = load_processed(processed_path, words)

    # concatenate train and val for final train
    val_data = None
    if use_validation:
        x = data["training"]
        y = labels["training"]
        val_data = (data["validation"], labels["validation"])
        logg.debug("Using validation data")
    else:
        x = np.concatenate((data["training"], data["validation"]))
        y = np.concatenate((labels["training"], labels["validation"]))
        logg.debug("NOT using validation data")

    # the shape of each sample
    input_shape = data["training"][0].shape

    # from hypa extract model param
    model_param = get_model_param_attention(hypa, num_labels, input_shape)

    batch_size_types = {"01": 32, "02": 16}
    batch_size = batch_size_types[hypa["batch_size_type"]]

    epoch_num_types = {"01": 15, "02": 30, "03": 2, "04": 4}
    epoch_num = epoch_num_types[hypa["epoch_num_type"]]

    # magic to fix the GPUs
    setup_gpus()

    model = AttentionModel(**model_param)
    # model.summary()

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    learning_rate_types = {
        "01": "fixed01",
        "02": "fixed02",
        "03": "exp_decay_step_01",
        "04": "exp_decay_smooth_01",
        "05": "clr_triangular2_01",
        "06": "clr_triangular2_02",
        "07": "clr_triangular2_03",
        "08": "clr_triangular2_04",
        "09": "clr_triangular2_05",
        "10": "exp_decay_smooth_02",
    }
    learning_rate_type = hypa["learning_rate_type"]
    lr_value = learning_rate_types[learning_rate_type]

    # setup opt fixed lr values
    if lr_value.startswith("fixed"):
        if lr_value == "fixed01":
            lr = 1e-3
        elif lr_value == "fixed02":
            lr = 1e-4
    else:
        lr = 1e-3

    optimizer_types = {
        "a1": Adam(learning_rate=lr),
        "r1": RMSprop(learning_rate=lr)
    }
    opt = optimizer_types[hypa["optimizer_type"]]

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # setup callbacks
    callbacks = []

    # setup exp decay step / smooth
    if lr_value.startswith("exp_decay"):
        if lr_value == "exp_decay_step_01":
            exp_decay_part = partial(exp_decay_step, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_01":
            exp_decay_part = partial(exp_decay_smooth, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_02":
            exp_decay_part = partial(exp_decay_smooth,
                                     epochs_drop=5,
                                     initial_lrate=1e-2)
        lrate = LearningRateScheduler(exp_decay_part)
        callbacks.append(lrate)

    # setup cyclic learning rate
    if lr_value.startswith("clr_triangular2"):
        base_lr = 1e-5
        max_lr = 1e-3

        # training iteration per epoch = num samples // batch size
        # step size suggested = 2~8 * iterations
        if lr_value == "clr_triangular2_01":
            step_factor = 8
            step_size = step_factor * x.shape[0] // batch_size

        elif lr_value == "clr_triangular2_02":
            step_factor = 2
            step_size = step_factor * x.shape[0] // batch_size

        # target_cycles = the number of cycles we want in those epochs
        # it_per_epoch = num_samples // batch_size
        # total_iterations = it_per_epoch * epoch_num
        # step_size = total_iterations // target_cycles
        elif lr_value == "clr_triangular2_03":
            # the number of cycles we want in those epochs
            target_cycles = 4
            it_per_epoch = x.shape[0] // batch_size
            total_iterations = it_per_epoch * epoch_num
            step_size = total_iterations // (target_cycles * 2)

        elif lr_value == "clr_triangular2_04":
            # the number of cycles we want in those epochs
            target_cycles = 2
            it_per_epoch = x.shape[0] // batch_size
            total_iterations = it_per_epoch * epoch_num
            step_size = total_iterations // (target_cycles * 2)

        elif lr_value == "clr_triangular2_05":
            # the number of cycles we want in those epochs
            target_cycles = 2
            it_per_epoch = x.shape[0] // batch_size
            total_iterations = it_per_epoch * epoch_num
            step_size = total_iterations // (target_cycles * 2)
            # set bigger starting value
            max_lr = 1e-2

        logg.debug(f"x.shape[0]: {x.shape[0]}")
        logg.debug(f"CLR is using step_size: {step_size}")

        mode = "triangular2"
        cyclic_lr = CyclicLR(base_lr, max_lr, step_size, mode)
        callbacks.append(cyclic_lr)

    # setup early stopping
    if learning_rate_type in ["01", "02", "03", "04"]:
        metric_to_monitor = "val_loss" if use_validation else "loss"
        early_stop = EarlyStopping(
            monitor=metric_to_monitor,
            patience=4,
            restore_best_weights=True,
            verbose=1,
        )
        callbacks.append(early_stop)

    # model_checkpoint = ModelCheckpoint(
    #     model_name,
    #     monitor="val_loss",
    #     save_best_only=True,
    # )

    # a dict to recreate this training
    # FIXME this should be right before fit and have epoch_num/batch_size/lr info
    recap: ty.Dict[str, ty.Any] = {}
    recap["words"] = words
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["use_validation"] = use_validation
    recap["model_name"] = model_name
    recap["version"] = "001"
    # logg.debug(f"recap: {recap}")
    recap_path = info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    results = model.fit(
        x,
        y,
        validation_data=val_data,
        epochs=epoch_num,
        batch_size=batch_size,
        callbacks=callbacks,
    )

    results_recap: ty.Dict[str, ty.Any] = {}
    results_recap["model_name"] = model_name
    results_recap["results_recap_version"] = "002"

    # eval performance on the various metrics
    eval_testing = model.evaluate(data["testing"], labels["testing"])
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_recap[metrics_name] = value

    # compute the confusion matrix
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, words)
    # logg.debug(f"cm: {cm}")
    results_recap["cm"] = cm.tolist()

    # compute the fscore
    fscore = analyze_confusion(cm, words)
    logg.debug(f"fscore: {fscore}")
    results_recap["fscore"] = fscore

    # save the histories
    results_recap["history_train"] = {
        mn: results.history[mn]
        for mn in model.metrics_names
    }
    if use_validation:
        results_recap["history_val"] = {
            f"val_{mn}": results.history[f"val_{mn}"]
            for mn in model.metrics_names
        }

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, words, fscore)
    plot_cm_path = info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the results
    res_recap_path = info_folder / "results_recap.json"
    res_recap_path.write_text(json.dumps(results_recap, indent=4))

    # if cyclic_lr was used save the history
    if lr_value.startswith("clr_triangular2"):
        logg.debug(f"cyclic_lr.history.keys(): {cyclic_lr.history.keys()}")
        clr_recap = {}
        for metric_name, values in cyclic_lr.history.items():
            clr_recap[metric_name] = list(float(v) for v in values)
        clr_recap_path = info_folder / "clr_recap.json"
        clr_recap_path.write_text(json.dumps(clr_recap, indent=4))

    # save the trained model
    model.save(model_path)

    placeholder_path.write_text(f"Trained. F-score: {fscore}")

Beispiel #9

0

Datei anzeigen


def standardize_sample(img):
    mean = np.mean(img)
    n = len(img.ravel())
    adjusted_stddev = max(np.std(img), 1.0 / np.sqrt(n))
    return (img - mean) / adjusted_stddev


def show(img):
    import matplotlib.pyplot as plt
    plt.imshow(img, cmap='gray')
    plt.show()


if __name__ == '__main__':
    utils.setup_gpus()
    dme = glob.glob(
        '/media/miguel/ALICIUM/Miguel/DOWNLOADS/ZhangLabData/CellData/OCT/test/DME/*'
    )
    data = prep_eval_data(dme)
    img = data[10]
    modelname = '20201011_vanilla_cnn_batch64'
    model_path = os.path.join('./trained_models', modelname, 'frozen')
    model = tf.keras.models.load_model(model_path)
    explainer = LIME(model, areas=7, perturbations=700)
    ex, mask, super_pix = explainer.fit_linear_model(img, label=2)
    super_pix = skimage.segmentation.mark_boundaries(img, super_pix)
    full_image = np.concatenate((ex, super_pix), axis=1)
    show(full_image)

Beispiel #10

0

Datei anzeigen

Datei: training_covid.py Projekt: miguelalba96/COVID19-detection

    def __init__(self,
                 modelname,
                 data_path,
                 architecture,
                 hyperparams,
                 img_size=224,
                 **kwargs):
        """
        :param modelname:
        :param data_path: data of records
        :param model: tensorflow model
        :param hyperparams: params
        :param kwargs: 
        """
        utils.setup_gpus()
        self.modelname = modelname
        self.model_path = os.path.join('./trained_models', modelname)
        self.data_path = data_path
        self.params = {
            'batch_size': 32,
            'learning_rate': 0.001,
            'schedule': False,
            'optimizer': 'ADAM',
            'test_iter': 100,
            'epochs': 50,
            'max_class_samples': 8514  # number of pneumonia cases in the data
        }
        self.params.update(hyperparams)
        self.img_size = img_size
        self.log_dir, self.ckpt_dir, self.train_writer, self.test_writer = self.create_dirs(
        )

        self.steps_epoch = np.ceil(2.0 * self.params['max_class_samples'] /
                                   self.params['batch_size'])
        self.epochs = self.params['epochs']
        self.epoch_counter = tf.Variable(initial_value=0,
                                         trainable=False,
                                         dtype=tf.int64)
        self.step = tf.Variable(initial_value=0,
                                trainable=False,
                                dtype=tf.int64)

        self.architecure_params = dict(**kwargs)
        self.model = self.build_model(architecture, **self.architecure_params)

        self.train_data = DataLoader(self.data_path, training=True)
        self.test_data = DataLoader(self.data_path, training=False)

        self.lr, self.opt = self.optimizer()
        self.loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False)

        self.train_loss, self.test_loss, self.train_acc, self.test_acc = self.build_metrics(
        )

        architecture = dict(model=self.model,
                            optimizer=self.opt,
                            current_epoch=self.epoch_counter,
                            step=self.step)
        self.ckpt = Checkpoint(architecture, self.ckpt_dir, max_to_keep=3)
        try:
            self.ckpt.restore().assert_existing_objects_matched()
            print('Loading pre trained model')
        except Exception as e:
            print(e)

Beispiel #11

0

Datei anzeigen

Datei: evaluate_area.py Projekt: Pitrified/hda-speech-recog

def evaluate_model_area(model_name: str, test_words_type: str) -> None:
    r"""MAKEDOC: what is evaluate_model_area doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_model_area")
    # logg.setLevel("INFO")
    logg.debug("Start evaluate_model_area")

    # magic to fix the GPUs
    setup_gpus()

    # # VAN_opa1_lr05_bs32_en15_dsaug07_wLTall
    # hypa = {
    #     "batch_size_type": "32",
    #     "dataset_name": "aug07",
    #     "epoch_num_type": "15",
    #     "learning_rate_type": "03",
    #     "net_type": "VAN",
    #     "optimizer_type": "a1",
    #     # "words_type": "LTall",
    #     "words_type": train_words_type,
    # }
    # # use_validation = True
    # use_validation = False
    # dataset_name = hypa["dataset_name"]

    # get the model name
    # model_name = build_area_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    dataset_re = re.compile("_ds(.*?)_")
    match = dataset_re.search(model_name)
    if match is not None:
        logg.debug(f"match[1]: {match[1]}")
        dataset_name = match[1]

    train_words_type_re = re.compile("_w(.*?)[_.]")
    match = train_words_type_re.search(model_name)
    if match is not None:
        logg.debug(f"match[1]: {match[1]}")
        train_words_type = match[1]

    # load the model
    model_folder = Path("trained_models") / "area"
    model_path = model_folder / f"{model_name}.h5"
    model = tf_models.load_model(model_path)
    # model.summary()

    train_words = words_types[train_words_type]
    logg.debug(f"train_words: {train_words}")
    test_words = words_types[test_words_type]
    logg.debug(f"test_words: {test_words}")

    # input data
    processed_path = Path("data_proc") / f"{dataset_name}"
    data, labels = load_processed(processed_path, test_words)
    logg.debug(f"list(data.keys()): {list(data.keys())}")
    logg.debug(f"data['testing'].shape: {data['testing'].shape}")

    # evaluate on the words you trained on
    logg.debug("Evaluate on test data:")
    model.evaluate(data["testing"], labels["testing"])
    # model.evaluate(data["validation"], labels["validation"])

    # predict labels/cm/fscore
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, test_words)
    # y_pred = model.predict(data["validation"])
    # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words)
    fscore = analyze_confusion(cm, test_words)
    logg.debug(f"fscore: {fscore}")

    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, test_words, fscore, train_words)

    fig_name = f"{model_name}_test{test_words_type}_cm.{{}}"
    cm_folder = Path("plot_results") / "cm"
    if not cm_folder.exists():
        cm_folder.mkdir(parents=True, exist_ok=True)

    plot_cm_path = cm_folder / fig_name.format("png")
    fig.savefig(plot_cm_path)
    plot_cm_path = cm_folder / fig_name.format("pdf")
    fig.savefig(plot_cm_path)

    plt.show()

Beispiel #12

0

Datei anzeigen

def train_model(hypa, force_retrain):
    """MAKEDOC: What is train_model doing?"""
    logg = logging.getLogger(f"c.{__name__}.train_model")
    # logg.debug("Starting train_model")

    # get the words
    words = words_types[hypa["words"]]

    # name the model
    model_name = build_cnn_name(hypa)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_folder = Path("trained_models") / "cnn"
    if not model_folder.exists():
        model_folder.mkdir(parents=True, exist_ok=True)
    model_path = model_folder / f"{model_name}.h5"
    # logg.debug(f"model_path: {model_path}")

    placeholder_path = model_folder / f"{model_name}.txt"
    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    info_folder = Path("info") / "cnn" / model_name
    if not info_folder.exists():
        info_folder.mkdir(parents=True, exist_ok=True)

    # magic to fix the GPUs
    setup_gpus()

    # input data
    processed_path = Path("data_proc") / f"{hypa['dataset']}"
    data, labels = load_processed(processed_path, words)

    # from hypa extract model param
    model_param = {}
    model_param["num_labels"] = len(words)
    model_param["input_shape"] = data["training"][0].shape
    model_param["base_filters"] = hypa["base_filters"]
    model_param["base_dense_width"] = hypa["base_dense_width"]

    # translate types to actual values

    kernel_size_types = {
        "01": [(2, 2), (2, 2), (2, 2)],
        "02": [(5, 1), (3, 3), (3, 3)],
        "03": [(1, 5), (3, 3), (3, 3)],
    }
    model_param["kernel_sizes"] = kernel_size_types[hypa["kernel_size_type"]]

    pool_size_types = {
        "01": [(2, 2), (2, 2), (2, 2)],
        "02": [(2, 1), (2, 2), (2, 2)],
        "03": [(1, 2), (2, 2), (2, 2)],
    }
    model_param["pool_sizes"] = pool_size_types[hypa["pool_size_type"]]

    dropout_types = {"01": [0.03, 0.01], "02": [0.3, 0.1]}
    model_param["dropouts"] = dropout_types[hypa["dropout_type"]]

    # a dict to recreate this training
    recap = {}
    recap["words"] = words
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["model_name"] = model_name
    recap["version"] = "002"
    # logg.debug(f"recap: {recap}")
    recap_path = info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    learning_rate_types = {
        "01": "fixed01",
        "02": "fixed02",
        "03": "fixed03",
        "e1": "exp_decay_keras_01",
        "04": "exp_decay_step_01",
        "05": "exp_decay_smooth_01",
        "06": "exp_decay_smooth_02",
    }
    learning_rate_type = hypa["learning_rate_type"]
    lr_value = learning_rate_types[learning_rate_type]

    # setup opt fixed lr values
    if lr_value.startswith("fixed"):
        if lr_value == "fixed01":
            lr = 1e-2
        elif lr_value == "fixed02":
            lr = 1e-3
        elif lr_value == "fixed03":
            lr = 1e-4
    else:
        lr = 1e-3

    if lr_value == "exp_decay_keras_01":
        lr = ExponentialDecay(0.1, decay_steps=100000, decay_rate=0.96, staircase=True)

    optimizer_types = {
        "a1": Adam(learning_rate=lr),
        "r1": RMSprop(learning_rate=lr),
    }
    opt = optimizer_types[hypa["optimizer_type"]]

    # create the model
    model = CNNmodel(**model_param)
    # model.summary()

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # setup callbacks
    callbacks = []

    # setup exp decay step / smooth
    if lr_value.startswith("exp_decay"):
        if lr_value == "exp_decay_step_01":
            exp_decay_part = partial(exp_decay_step, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_01":
            exp_decay_part = partial(exp_decay_smooth, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_02":
            exp_decay_part = partial(
                exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2
            )
        lrate = LearningRateScheduler(exp_decay_part)
        callbacks.append(lrate)

    # # setup early stopping
    # early_stop = EarlyStopping(
    #     # monitor="val_categorical_accuracy",
    #     monitor="val_loss",
    #     patience=4,
    #     verbose=1,
    #     restore_best_weights=True,
    # )
    # callbacks.append(early_stop)

    # get training parameters
    BATCH_SIZE = hypa["batch_size"]
    SHUFFLE_BUFFER_SIZE = BATCH_SIZE
    EPOCH_NUM = hypa["epoch_num"]

    # load the datasets
    datasets = {}
    for which in ["training", "validation", "testing"]:
        # logg.debug(f"data[{which}].shape: {data[which].shape}")
        datasets[which] = Dataset.from_tensor_slices((data[which], labels[which]))
        # logg.debug(f"datasets[{which}]: {datasets[which]}")
        datasets[which] = datasets[which].shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
        # logg.debug(f"datasets[{which}]: {datasets[which]}")

    # train the model
    results = model.fit(
        data["training"],
        labels["training"],
        # validation_data=datasets["validation"],
        validation_data=(data["validation"], labels["validation"]),
        batch_size=BATCH_SIZE,
        epochs=EPOCH_NUM,
        verbose=1,
        callbacks=callbacks,
    )

    # save the trained model
    model.save(model_path)

    results_recap = {}
    results_recap["model_name"] = model_name

    # version of the results saved
    results_recap["results_recap_version"] = "002"

    # quickly evaluate the results
    # logg.debug(f"\nmodel.metrics_names: {model.metrics_names}")
    # for which in ["training", "validation", "testing"]:
    #     model_eval = model.evaluate(datasets[which])
    #     logg.debug(f"{which}: model_eval: {model_eval}")

    # save the evaluation results
    logg.debug("Evaluate on test data:")
    # eval_testing = model.evaluate(datasets["testing"])
    # results_recap[model.metrics_names[0]] = eval_testing[0]
    # results_recap[model.metrics_names[1]] = eval_testing[1]
    eval_testing = model.evaluate(data["testing"], labels["testing"])
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_recap[metrics_name] = value

    # compute the confusion matrix
    # y_pred = model.predict(datasets["testing"])
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, words)
    # logg.debug(f"cm: {cm}")
    results_recap["cm"] = cm.tolist()

    # compute the fscore
    fscore = analyze_confusion(cm, words)
    logg.debug(f"fscore: {fscore}")

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, words, fscore)
    plot_cm_path = info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the histories
    results_recap["history"] = {
        "loss": results.history["loss"],
        "val_loss": results.history["val_loss"],
        "categorical_accuracy": results.history["categorical_accuracy"],
        "val_categorical_accuracy": results.history["val_categorical_accuracy"],
    }

    # save the results
    res_recap_path = info_folder / "results_recap.json"
    res_recap_path.write_text(json.dumps(results_recap, indent=4))

    y_pred_dataset = model.predict(datasets["testing"])
    cm_dataset = pred_hot_2_cm(labels["testing"], y_pred_dataset, words)
    fscore_dataset = analyze_confusion(cm_dataset, words)
    logg.debug(f"fscore_dataset: {fscore_dataset} fscore {fscore}")
    # for i, (ys, yd) in enumerate(zip(y_pred, y_pred_dataset)):
    #     pred_split = np.argmax(ys)
    #     pred_dataset = np.argmax(yd)
    #     logg.debug(f"i: {i} pred_split: {pred_split} pred_dataset: {pred_dataset}")

    # plt.show()

    placeholder_path.write_text(f"Trained. F-score: {fscore}")

    return "done_training"

Beispiel #13

0

Datei anzeigen

Datei: train_area.py Projekt: Pitrified/hda-speech-recog

def train_area(
    hypa: ty.Dict[str, str],
    force_retrain: bool,
    use_validation: bool,
    trained_folder: Path,
    root_info_folder: Path,
) -> None:
    """MAKEDOC: what is train_area doing?"""
    logg = logging.getLogger(f"c.{__name__}.train_area")
    # logg.setLevel("INFO")
    logg.debug("Start train_area")

    ##########################################################
    #   Setup folders
    ##########################################################

    # name the model
    model_name = build_area_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_path = trained_folder / f"{model_name}.h5"
    placeholder_path = trained_folder / f"{model_name}.txt"

    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    model_info_folder = root_info_folder / model_name
    if not model_info_folder.exists():
        model_info_folder.mkdir(parents=True, exist_ok=True)

    # magic to fix the GPUs
    setup_gpus()

    ##########################################################
    #   Load data
    ##########################################################

    # get the words
    words = words_types[hypa["words_type"]]
    num_labels = len(words)

    # load data
    processed_folder = Path("data_proc")
    processed_path = processed_folder / f"{hypa['dataset_name']}"
    data, labels = load_processed(processed_path, words)

    # concatenate train and val for final train
    val_data = None
    if use_validation:
        x = data["training"]
        y = labels["training"]
        val_data = (data["validation"], labels["validation"])
        logg.debug("Using validation data")
    else:
        x = np.concatenate((data["training"], data["validation"]))
        y = np.concatenate((labels["training"], labels["validation"]))
        logg.debug("NOT using validation data")

    ##########################################################
    #   Setup model
    ##########################################################

    # the shape of each sample
    input_shape = data["training"][0].shape

    # from hypa extract model param
    model_param = get_model_param_area(hypa, num_labels, input_shape)

    # get the model with the chosen params
    net_type = hypa["net_type"]
    if net_type == "ARN":
        model = AreaNet.build(**model_param)
    elif net_type == "AAN":
        model = ActualAreaNet.build(**model_param)
    elif net_type == "VAN":
        model = VerticalAreaNet.build(**model_param)
    elif net_type.startswith("SI"):
        if net_type == "SIM":
            sim_type = "1"
        elif net_type == "SI2":
            sim_type = "2"
        model = SimpleNet.build(sim_type=sim_type, **model_param)

    num_samples = x.shape[0]
    logg.debug(f"num_samples: {num_samples}")

    # from hypa extract training param (epochs, batch, opt, ...)
    training_param = get_training_param_area(hypa, use_validation, model_path,
                                             num_samples)

    # a few metrics to track
    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    # compile the model
    model.compile(
        optimizer=training_param["opt"],
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # recap
    recap: ty.Dict[str, ty.Any] = {}
    recap["model_name"] = model_name
    recap["words"] = words
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["use_validation"] = use_validation
    recap["batch_size"] = training_param["batch_size"]
    recap["epochs"] = training_param["epochs"]
    recap["lr_name"] = training_param["lr_name"]
    recap["version"] = "002"

    # logg.debug(f"recap: {recap}")
    recap_path = model_info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    # https://stackoverflow.com/a/45546663/2237151
    model_summary_path = model_info_folder / "model_summary.txt"
    with model_summary_path.open("w") as msf:
        model.summary(line_length=150, print_fn=lambda x: msf.write(x + "\n"))

    ##########################################################
    #   Fit model
    ##########################################################

    results = model.fit(
        x,
        y,
        validation_data=val_data,
        epochs=training_param["epochs"],
        batch_size=training_param["batch_size"],
        callbacks=training_param["callbacks"],
    )

    ##########################################################
    #   Save results, history, performance
    ##########################################################

    # results_recap
    results_recap: ty.Dict[str, ty.Any] = {}
    results_recap["model_name"] = model_name
    results_recap["results_recap_version"] = "001"

    # evaluate performance
    eval_testing = model.evaluate(data["testing"], labels["testing"])
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_recap[metrics_name] = value

    # confusion matrix
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, words)
    results_recap["cm"] = cm.tolist()

    # fscore
    fscore = analyze_confusion(cm, words)
    logg.debug(f"fscore: {fscore}")
    results_recap["fscore"] = fscore

    # save the histories
    results_recap["history_train"] = {
        mn: results.history[mn]
        for mn in model.metrics_names
    }
    if use_validation:
        results_recap["history_val"] = {
            f"val_{mn}": results.history[f"val_{mn}"]
            for mn in model.metrics_names
        }

    # save the results
    res_recap_path = model_info_folder / "results_recap.json"
    res_recap_path.write_text(json.dumps(results_recap, indent=4))

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, words, fscore)
    plot_cm_path = model_info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the trained model
    model.save(model_path)

    # save the placeholder
    placeholder_path.write_text(f"Trained. F-score: {fscore}")

Beispiel #14

0

Datei anzeigen

def train_img(
    hypa: ty.Dict[str, str],
    force_retrain: bool,
    use_validation: bool,
    trained_folder: Path,
    root_info_folder: Path,
) -> None:
    """MAKEDOC: what is train_img doing?"""
    logg = logging.getLogger(f"c.{__name__}.train_img")
    # logg.setLevel("INFO")
    logg.debug("Start train_img")

    ##########################################################
    #   Setup folders
    ##########################################################

    # name the model
    model_name = build_img_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_path = trained_folder / f"{model_name}.h5"
    placeholder_path = trained_folder / f"{model_name}.txt"

    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    model_info_folder = root_info_folder / model_name
    if not model_info_folder.exists():
        model_info_folder.mkdir(parents=True, exist_ok=True)

    # magic to fix the GPUs
    setup_gpus()

    ##########################################################
    #   Load data
    ##########################################################

    label_type = hypa["words_type"]
    label_list = get_label_list(label_type)
    num_labels = len(label_list)

    dataset_raw_folder = Path.home(
    ) / "datasets" / "imagenet" / "imagenet_images"
    dataset_proc_base_folder = Path.home() / "datasets" / "imagenet"

    # get the partition of the data
    partition, ids2labels = prepare_partitions(label_list, dataset_raw_folder)

    num_samples = len(partition["training"])

    # from hypa extract training param (epochs, batch, opt, ...)
    training_param = get_training_param_img(hypa, use_validation, model_path,
                                            num_samples)

    preprocess_type = hypa["dataset_name"]
    dataset_proc_folder = dataset_proc_base_folder / preprocess_type

    val_generator: ty.Optional[ImageNetGenerator] = None
    if use_validation:
        val_generator = ImageNetGenerator(
            partition["validation"],
            ids2labels,
            label_list,
            dataset_proc_folder=dataset_proc_folder,
            dataset_raw_folder=dataset_raw_folder,
            preprocess_type=preprocess_type,
            save_processed=True,
            batch_size=training_param["batch_size"],
            shuffle=True,
        )
        logg.debug("Using validation data")
    else:
        partition["training"].extend(partition["validation"])
        logg.debug("NOT using validation data")

    training_generator = ImageNetGenerator(
        partition["training"],
        ids2labels,
        label_list,
        dataset_proc_folder=dataset_proc_folder,
        dataset_raw_folder=dataset_raw_folder,
        preprocess_type=preprocess_type,
        save_processed=True,
        batch_size=training_param["batch_size"],
        shuffle=True,
    )

    testing_generator = ImageNetGenerator(
        partition["testing"],
        ids2labels,
        label_list,
        dataset_proc_folder=dataset_proc_folder,
        dataset_raw_folder=dataset_raw_folder,
        preprocess_type=preprocess_type,
        save_processed=True,
        batch_size=1,
        shuffle=False,
    )

    ##########################################################
    #   Setup model
    ##########################################################

    input_shape = training_generator.get_img_shape()

    # from hypa extract model param
    model_param = get_model_param_img(hypa, num_labels, input_shape)

    # get the model with the chosen params
    net_type = hypa["net_type"]
    if net_type == "ARN":
        model = AreaNet.build(**model_param)
    elif net_type == "AAN":
        model = ActualAreaNet.build(**model_param)
    elif net_type == "VAN":
        model = VerticalAreaNet.build(**model_param)
    elif net_type.startswith("SI"):
        if net_type == "SIM":
            sim_type = "1"
        elif net_type == "SI2":
            sim_type = "2"
        model = SimpleNet.build(sim_type=sim_type, **model_param)

    # a few metrics to track
    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    # compile the model
    model.compile(
        optimizer=training_param["opt"],
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # recap
    recap: ty.Dict[str, ty.Any] = {}
    recap["model_name"] = model_name
    recap["words"] = label_list
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["use_validation"] = use_validation
    recap["batch_size"] = training_param["batch_size"]
    recap["epochs"] = training_param["epochs"]
    recap["lr_name"] = training_param["lr_name"]
    recap["version"] = "002"

    # logg.debug(f"recap: {recap}")
    recap_path = model_info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    # https://stackoverflow.com/a/45546663/2237151
    model_summary_path = model_info_folder / "model_summary.txt"
    with model_summary_path.open("w") as msf:
        model.summary(line_length=150, print_fn=lambda x: msf.write(x + "\n"))

    ##########################################################
    #   Fit model
    ##########################################################

    results = model.fit(
        training_generator,
        validation_data=val_generator,
        epochs=training_param["epochs"],
        batch_size=training_param["batch_size"],
        callbacks=training_param["callbacks"],
    )

    ##########################################################
    #   Save results, history, performance
    ##########################################################

    # results_recap
    results_recap: ty.Dict[str, ty.Any] = {}
    results_recap["model_name"] = model_name
    results_recap["results_recap_version"] = "001"

    # evaluate performance
    eval_testing = model.evaluate(testing_generator)
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_recap[metrics_name] = value

    # confusion matrix
    y_pred = model.predict(testing_generator)
    y_pred_labels = testing_generator.pred2labelnames(y_pred)
    y_true = testing_generator.get_true_labels()
    cm = confusion_matrix(y_true, y_pred_labels)
    results_recap["cm"] = cm.tolist()

    # fscore
    fscore = analyze_confusion(cm, label_list)
    logg.debug(f"fscore: {fscore}")
    results_recap["fscore"] = fscore

    # save the histories
    results_recap["history_train"] = {
        mn: results.history[mn]
        for mn in model.metrics_names
    }
    if use_validation:
        results_recap["history_val"] = {
            f"val_{mn}": results.history[f"val_{mn}"]
            for mn in model.metrics_names
        }

    # save the results
    res_recap_path = model_info_folder / "results_recap.json"
    res_recap_path.write_text(json.dumps(results_recap, indent=4))

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, label_list, fscore)
    plot_cm_path = model_info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the trained model
    model.save(model_path)

    # save the placeholder
    placeholder_path.write_text(f"Trained. F-score: {fscore}")

Beispiel #15

0

Datei anzeigen

def evaluate_model_cm(model_name: str, test_words_type: str) -> float:
    r"""MAKEDOC: what is evaluate_model_cm doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_model_cm")
    # logg.setLevel("INFO")
    # logg.debug("\nStart evaluate_model_cm")

    # magic to fix the GPUs
    setup_gpus()

    logg.debug(f"\nmodel_name: {model_name}")

    dataset_re = re.compile("_ds(.*?)_")
    match = dataset_re.search(model_name)
    if match is not None:
        logg.debug(f"match[1]: {match[1]}")
        dataset_name = match[1]

    train_words_type_re = re.compile("_w(.*?)[_.]")
    match = train_words_type_re.search(model_name)
    if match is not None:
        logg.debug(f"match[1]: {match[1]}")
        train_words_type = match[1]

    arch_type = model_name[:3]

    if arch_type == "ATT":
        train_type_tag = "attention"
    else:
        train_type_tag = "area"

    # load the model
    model_folder = Path("trained_models") / train_type_tag
    model_path = model_folder / f"{model_name}.h5"
    model = tf_models.load_model(model_path)
    # model.summary()

    train_words = words_types[train_words_type]
    logg.debug(f"train_words: {train_words}")
    test_words = words_types[test_words_type]
    logg.debug(f"test_words: {test_words}")

    # input data must exist
    if dataset_name.startswith("mel"):
        preprocess_spec(dataset_name, test_words_type)
    elif dataset_name.startswith("aug"):
        do_augmentation(dataset_name, test_words_type)

    # input data
    processed_path = Path("data_proc") / f"{dataset_name}"
    data, labels = load_processed(processed_path, test_words)
    logg.debug(f"list(data.keys()): {list(data.keys())}")
    logg.debug(f"data['testing'].shape: {data['testing'].shape}")

    # evaluate on the words you trained on
    logg.debug("Evaluate on test data:")
    model.evaluate(data["testing"], labels["testing"])
    # model.evaluate(data["validation"], labels["validation"])

    # predict labels/cm/fscore
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, test_words)
    # y_pred = model.predict(data["validation"])
    # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words)
    fscore = analyze_confusion(cm, test_words)
    logg.debug(f"fscore: {fscore}")

    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, test_words, fscore, train_words)

    fig_name = f"{model_name}_test{test_words_type}_cm.{{}}"
    cm_folder = Path("plot_results") / "cm_all01"
    if not cm_folder.exists():
        cm_folder.mkdir(parents=True, exist_ok=True)

    plot_cm_path = cm_folder / fig_name.format("png")
    fig.savefig(plot_cm_path)
    plot_cm_path = cm_folder / fig_name.format("pdf")
    fig.savefig(plot_cm_path)

    # plt.show()
    return fscore

Beispiel #16

0

Datei anzeigen

def evaluate_attention_weights(train_words_type: str,
                               rec_words_type: str,
                               do_new_record: bool = False) -> None:
    """MAKEDOC: what is evaluate_attention_weights doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_attention_weights")
    # logg.setLevel("INFO")
    logg.debug("Start evaluate_attention_weights")

    # magic to fix the GPUs
    setup_gpus()

    # ATT_ct02_dr02_ks02_lu01_as01_qt01_dw01_opa1_lr01_bs01_en01_dsmel04_wk1
    # hypa: ty.Dict[str, str] = {}
    # hypa["conv_size_type"] = "02"
    # hypa["dropout_type"] = "02"
    # hypa["kernel_size_type"] = "02"
    # hypa["lstm_units_type"] = "01"
    # hypa["query_style_type"] = "01"
    # hypa["dense_width_type"] = "01"
    # hypa["optimizer_type"] = "a1"
    # hypa["learning_rate_type"] = "01"
    # hypa["batch_size_type"] = "01"
    # hypa["epoch_num_type"] = "01"
    # dataset_name = "mel04"
    # hypa["dataset_name"] = dataset_name
    # hypa["words_type"] = train_words_type
    # use_validation = True

    # ATT_ct02_dr01_ks01_lu01_qt05_dw01_opa1_lr03_bs02_en02_dsaug07_wLTnum
    hypa = {
        "batch_size_type": "02",
        "conv_size_type": "02",
        "dataset_name": "aug07",
        "dense_width_type": "01",
        "dropout_type": "01",
        "epoch_num_type": "02",
        "kernel_size_type": "01",
        "learning_rate_type": "03",
        "lstm_units_type": "01",
        "optimizer_type": "a1",
        "query_style_type": "05",
        "words_type": "LTnum",
    }
    use_validation = True

    dataset_name = hypa["dataset_name"]

    model_name = build_attention_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # load the model
    model_folder = Path("trained_models") / "attention"
    model_path = model_folder / f"{model_name}.h5"

    # model = tf.keras.models.load_model(model_path)
    # https://github.com/keras-team/keras/issues/5088#issuecomment-401498334
    model = tf.keras.models.load_model(
        model_path, custom_objects={"backend": tf.keras.backend})
    model.summary()
    logg.debug(f"ascii_model(model): {ascii_model(model)}")

    att_weight_model = tf.keras.models.Model(
        inputs=model.input,
        outputs=[
            model.get_layer("output").output,
            model.get_layer("att_softmax").output,
            model.get_layer("bidirectional_1").output,
        ],
    )
    att_weight_model.summary()
    # logg.debug(f"att_weight_model.outputs: {att_weight_model.outputs}")

    # get the training words
    train_words = words_types[train_words_type]
    # logg.debug(f"train_words: {train_words}")
    perm_pred = compute_permutation(train_words)

    rec_words_type = args.rec_words_type
    if rec_words_type == "train":
        rec_words = train_words[-3:]
        # rec_words = train_words[:]
        logg.debug(f"Using rec_words: {rec_words}")
    else:
        rec_words = words_types[rec_words_type]
    num_rec_words = len(rec_words)

    # record new audios
    if do_new_record:

        # where to save the audios
        audio_folder = Path("recorded_audio")
        if not audio_folder.exists():
            audio_folder.mkdir(parents=True, exist_ok=True)

        # record the audios and save them in audio_folder
        audio_path_fmt = "{}_02.wav"
        audios = record_audios(rec_words,
                               audio_folder,
                               audio_path_fmt,
                               timeout=0)

        # compute the spectrograms and build the dataset of correct shape
        img_specs = []
        spec_dict = get_spec_dict()
        spec_kwargs = spec_dict[dataset_name]
        p2d_kwargs = {"ref": np.max}
        for word in rec_words:
            # get the name
            audio_path = audio_folder / audio_path_fmt.format(word)

            # convert it to mel
            log_spec = wav2mel(audio_path, spec_kwargs, p2d_kwargs)
            img_spec = log_spec.reshape((*log_spec.shape, 1))
            # logg.debug(f"img_spec.shape: {img_spec.shape}")
            # img_spec.shape: (128, 32, 1)

            img_specs.append(img_spec)

        # the data needs to look like this data['testing'].shape: (735, 128, 32, 1)
        rec_data = np.stack(img_specs)
        # logg.debug(f"rec_data.shape: {rec_data.shape}")

    # load data if you do not want to record new audios
    else:

        # input data
        processed_folder = Path("data_proc")
        processed_path = processed_folder / f"{dataset_name}"

        # which word in the dataset to plot
        word_id = 2

        # the loaded spectrograms
        rec_data_l: ty.List[np.ndarray] = []

        for i, word in enumerate(rec_words):
            data, labels = load_processed(processed_path, [word])

            # get one of the spectrograms
            word_data = data["testing"][word_id]
            rec_data_l.append(word_data)

        # turn the list into np array
        rec_data = np.stack(rec_data_l)

    # get prediction and attention weights
    pred, att_weights, LSTM_out = att_weight_model.predict(rec_data)
    # logg.debug(f"att_weights.shape: {att_weights.shape}")
    # logg.debug(f"att_weights[0].shape: {att_weights[0].shape}")

    # if we recorded fresh audios we also have the waveform to plot
    ax_add = 1 if do_new_record else 0

    # plot the wave, spectrogram, weights and predictions in each column
    plot_size = 5
    fw = plot_size * num_rec_words
    nrows = 3 + ax_add
    # nrows = 4 + ax_add
    fh = plot_size * nrows * 0.7
    fig, axes = plt.subplots(nrows=nrows,
                             ncols=num_rec_words,
                             figsize=(fw, fh),
                             sharey="row")
    fig.suptitle(f"Attention weights and predictions for {rec_words}",
                 fontsize=20)

    for i, word in enumerate(rec_words):
        word_spec = rec_data[i][:, :, 0]
        # logg.debug(f"word_spec.shape: {word_spec.shape}")

        # plot the waveform
        if do_new_record:
            plot_waveform(audios[i], axes[0][i])

        # plot the spectrogram
        title = f"Spectrogram for {word}"
        plot_spec(word_spec, axes[0 + ax_add][i], title=title)

        # plot the weights
        word_att_weights = att_weights[i]
        # plot_att_weights(word_att_weights, axes[1 + ax_add][i], title)

        word_att_weights_img = np.expand_dims(word_att_weights, axis=-1).T
        axes[1 + ax_add][i].imshow(word_att_weights_img,
                                   origin="lower",
                                   aspect="auto")
        title = f"Attention weights for {word}"
        axes[1 + ax_add][i].set_title(title)

        # plot the predictions
        word_pred = pred[i]
        # permute the prediction from sorted to the order you have
        word_pred = word_pred[perm_pred]
        pred_index = np.argmax(word_pred)
        title = f"Predictions for {word}"
        plot_pred(word_pred, train_words, axes[2 + ax_add][i], title,
                  pred_index)

        # axes[3 + ax_add][i].imshow(LSTM_out[i], origin="lower")

    # fig.tight_layout()
    fig.tight_layout(h_pad=3, rect=[0, 0.03, 1, 0.97])

    fig_name = f"{model_name}"
    fig_name += f"_{train_words_type}"
    fig_name += f"_{rec_words_type}_img"
    if do_new_record:
        fig_name += "_new.{}"
    else:
        fig_name += "_data.{}"

    plot_folder = Path("plot_results")
    results_path = plot_folder / fig_name.format("png")
    fig.savefig(results_path)
    results_path = plot_folder / fig_name.format("pdf")
    fig.savefig(results_path)

    if num_rec_words <= 6:
        plt.show()

Beispiel #17

0

Datei anzeigen

Datei: train.py Projekt: liming312/any-precision-nets

def main():
    hostname = socket.gethostname()
    setup_logging(os.path.join(args.results_dir, 'log_{}.txt'.format(hostname)))
    logging.info("running arguments: %s", args)

    best_gpu = setup_gpus()
    torch.cuda.set_device(best_gpu)
    torch.backends.cudnn.benchmark = True

    train_transform = get_transform(args.dataset, 'train')
    train_data = get_dataset(args.dataset, args.train_split, train_transform)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_transform = get_transform(args.dataset, 'val')
    val_data = get_dataset(args.dataset, 'val', val_transform)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    bit_width_list = list(map(int, args.bit_width_list.split(',')))
    bit_width_list.sort()
    model = models.__dict__[args.model](bit_width_list, train_data.num_classes).cuda()

    lr_decay = list(map(int, args.lr_decay.split(',')))
    optimizer = get_optimizer_config(model, args.optimizer, args.lr, args.weight_decay)
    lr_scheduler = None
    best_prec1 = None
    if args.resume and args.resume != 'None':
        if os.path.isdir(args.resume):
            args.resume = os.path.join(args.resume, 'model_best.pth.tar')
        if os.path.isfile(args.resume):
            checkpoint = torch.load(args.resume, map_location='cuda:{}'.format(best_gpu))
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler = get_lr_scheduler(args.optimizer, optimizer, lr_decay, checkpoint['epoch'])
            logging.info("loaded resume checkpoint '%s' (epoch %s)", args.resume, checkpoint['epoch'])
        else:
            raise ValueError('Pretrained model path error!')
    elif args.pretrain and args.pretrain != 'None':
        if os.path.isdir(args.pretrain):
            args.pretrain = os.path.join(args.pretrain, 'model_best.pth.tar')
        if os.path.isfile(args.pretrain):
            checkpoint = torch.load(args.pretrain, map_location='cuda:{}'.format(best_gpu))
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            logging.info("loaded pretrain checkpoint '%s' (epoch %s)", args.pretrain, checkpoint['epoch'])
        else:
            raise ValueError('Pretrained model path error!')
    if lr_scheduler is None:
        lr_scheduler = get_lr_scheduler(args.optimizer, optimizer, lr_decay)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    logging.info("number of parameters: %d", num_parameters)

    criterion = nn.CrossEntropyLoss().cuda()
    criterion_soft = CrossEntropyLossSoft().cuda()
    sum_writer = SummaryWriter(args.results_dir + '/summary')

    for epoch in range(args.start_epoch, args.epochs):
        model.train()
        train_loss, train_prec1, train_prec5 = forward(train_loader, model, criterion, criterion_soft, epoch, True,
                                                       optimizer, sum_writer)
        model.eval()
        val_loss, val_prec1, val_prec5 = forward(val_loader, model, criterion, criterion_soft, epoch, False)

        if isinstance(lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            lr_scheduler.step(val_loss)
        else:
            lr_scheduler.step()

        if best_prec1 is None:
            is_best = True
            best_prec1 = val_prec1[-1]
        else:
            is_best = val_prec1[-1] > best_prec1
            best_prec1 = max(val_prec1[-1], best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.model,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict()
            },
            is_best,
            path=args.results_dir + '/ckpt')

        if sum_writer is not None:
            sum_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=epoch)
            for bw, tl, tp1, tp5, vl, vp1, vp5 in zip(bit_width_list, train_loss, train_prec1, train_prec5, val_loss,
                                                      val_prec1, val_prec5):
                sum_writer.add_scalar('train_loss_{}'.format(bw), tl, global_step=epoch)
                sum_writer.add_scalar('train_prec_1_{}'.format(bw), tp1, global_step=epoch)
                sum_writer.add_scalar('train_prec_5_{}'.format(bw), tp5, global_step=epoch)
                sum_writer.add_scalar('val_loss_{}'.format(bw), vl, global_step=epoch)
                sum_writer.add_scalar('val_prec_1_{}'.format(bw), vp1, global_step=epoch)
                sum_writer.add_scalar('val_prec_5_{}'.format(bw), vp5, global_step=epoch)
        logging.info('Epoch {}: \ntrain loss {:.2f}, train prec1 {:.2f}, train prec5 {:.2f}\n'
                     '  val loss {:.2f},   val prec1 {:.2f},   val prec5 {:.2f}'.format(
                         epoch, train_loss[-1], train_prec1[-1], train_prec5[-1], val_loss[-1], val_prec1[-1],
                         val_prec5[-1]))

Beispiel #18

0

Datei anzeigen

Datei: train_area.py Projekt: Pitrified/hda-speech-recog

def find_best_lr(hypa: ty.Dict[str, str]) -> None:
    """MAKEDOC: what is find_best_lr doing?"""
    logg = logging.getLogger(f"c.{__name__}.find_best_lr")
    # logg.setLevel("INFO")
    logg.debug("Start find_best_lr")

    # get the word list
    words = words_types[hypa["words_type"]]
    num_labels = len(words)

    # no validation just find the LR
    use_validation = False

    # name the model
    model_name = build_area_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # load data
    processed_folder = Path("data_proc")
    processed_path = processed_folder / f"{hypa['dataset_name']}"
    data, labels = load_processed(processed_path, words)

    # the shape of each sample
    input_shape = data["training"][0].shape

    # from hypa extract model param
    model_param = get_model_param_area(hypa, num_labels, input_shape)

    # no need for validation
    x = np.concatenate((data["training"], data["validation"]))
    y = np.concatenate((labels["training"], labels["validation"]))

    # magic to fix the GPUs
    setup_gpus()

    # get the model with the chosen params
    net_type = hypa["net_type"]
    if net_type == "ARN":
        model = AreaNet.build(**model_param)
    elif net_type == "AAN":
        model = ActualAreaNet.build(**model_param)
    elif net_type == "VAN":
        model = VerticalAreaNet.build(**model_param)
    elif net_type.startswith("SI"):
        if net_type == "SIM":
            sim_type = "1"
        elif net_type == "SI2":
            sim_type = "2"
        model = SimpleNet.build(sim_type=sim_type, **model_param)

    num_samples = x.shape[0]
    logg.debug(f"num_samples: {num_samples}")

    # from hypa extract training param (epochs, batch, opt, ...)
    training_param = get_training_param_area(hypa,
                                             use_validation,
                                             model_path=None,
                                             num_samples=num_samples)

    # a few metrics to track
    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    # compile the model
    model.compile(
        optimizer=training_param["opt"],
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # boundary values
    start_lr = 1e-9
    end_lr = 1e1

    # find the best values
    lrf = LearningRateFinder(model)
    lrf.find(
        (x, y),
        start_lr,
        end_lr,
        epochs=training_param["epochs"],
        batchSize=training_param["batch_size"],
    )

    fig_title = "LR_sweep"
    fig_title += f"__{model_name}"

    fig, ax = plt.subplots(figsize=(8, 8))

    # get the plot
    lrf.plot_loss(ax=ax, title=fig_title)

    # save the plot
    plot_fol = Path("plot_results") / "area" / "find_best_lr"
    if not plot_fol.exists():
        plot_fol.mkdir(parents=True, exist_ok=True)
    fig_name = fig_title + ".{}"
    fig.savefig(plot_fol / fig_name.format("png"))
    fig.savefig(plot_fol / fig_name.format("pdf"))

    recap_loss = {}
    recap_loss["lrs"] = [float(lr) for lr in lrf.lrs[:]]
    recap_loss["losses"] = [float(loss) for loss in lrf.losses[:]]
    loss_path = plot_fol / f"loss_{fig_title}.json"
    loss_path.write_text(json.dumps(recap_loss, indent=4))

    plt.show()

Beispiel #19

0

Datei anzeigen

def train_transfer(
    hypa: ty.Dict[str, str],
    force_retrain: bool,
    use_validation: bool,
    trained_folder: Path,
    root_info_folder: Path,
    tensorboard_logs_folder: Path,
) -> None:
    """MAKEDOC: what is train_transfer doing?

    https://www.tensorflow.org/guide/keras/transfer_learning/#build_a_model
    """
    logg = logging.getLogger(f"c.{__name__}.train_transfer")
    # logg.setLevel("INFO")
    logg.debug("Start train_transfer")

    ##########################################################
    #   Setup folders
    ##########################################################

    # name the model
    model_name = build_transfer_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_path = trained_folder / f"{model_name}.h5"
    placeholder_path = trained_folder / f"{model_name}.txt"

    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    model_info_folder = root_info_folder / model_name
    if not model_info_folder.exists():
        model_info_folder.mkdir(parents=True, exist_ok=True)

    # magic to fix the GPUs
    setup_gpus()

    ##########################################################
    #   Load data
    ##########################################################

    # grab a few hypas
    words_type = hypa["words_type"]
    datasets_type = hypa["datasets_type"]

    # get the partition of the data
    partition, ids2labels = prepare_partitions(words_type)

    # get the word list
    words = words_types[words_type]
    num_labels = len(words)

    # get the dataset name list
    datasets_types, datasets_shapes = get_datasets_types()
    dataset_names = datasets_types[datasets_type]
    dataset_shape = datasets_shapes[datasets_type]

    # the shape of each sample
    input_shape = (*dataset_shape, 3)

    # from hypa extract training param (epochs, batch, opt, ...)
    training_param = get_training_param_transfer(hypa, use_validation,
                                                 tensorboard_logs_folder,
                                                 model_path)

    # load datasets
    processed_folder = Path("data_split")
    data_split_paths = [processed_folder / f"{dn}" for dn in dataset_names]
    # data, labels = load_triple(data_paths, words)

    # assemble the gen_param for the generators
    gen_param = {
        "dim": dataset_shape,
        "batch_size": training_param["batch_sizes"][0],
        "shuffle": True,
        "label_names": words,
        "data_split_paths": data_split_paths,
    }

    # maybe concatenate the valdation and training lists
    val_generator: ty.Optional[AudioGenerator] = None
    if use_validation:
        val_generator = AudioGenerator(partition["validation"], ids2labels,
                                       **gen_param)
        logg.debug("Using validation data")
    else:
        partition["training"].extend(partition["validation"])
        logg.debug("NOT using validation data")

    # create the training generator with the modified (maybe) list of IDs
    training_generator = AudioGenerator(partition["training"], ids2labels,
                                        **gen_param)
    logg.debug(f"len(training_generator): {len(training_generator)}")

    ###### always create the test generator
    # do not shuffle the test data
    gen_param["shuffle"] = False
    # do not batch it, no loss of stray data at the end
    gen_param["batch_size"] = 1
    testing_generator = AudioGenerator(partition["testing"], ids2labels,
                                       **gen_param)

    ##########################################################
    #   Setup model
    ##########################################################

    # from hypa extract model param
    model_param = get_model_param_transfer(hypa, num_labels, input_shape)

    # get mean and var to normalize the data
    data_mean, data_variance = get_generator_mean_var_cached(
        training_generator, words_type, datasets_type, processed_folder)

    # get the model
    model, base_model = TRAmodel(data_mean=data_mean,
                                 data_variance=data_variance,
                                 **model_param)
    model.summary()

    # a dict to recreate this training
    recap: ty.Dict[str, ty.Any] = {}
    recap["words"] = words
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["use_validation"] = use_validation
    recap["model_name"] = model_name
    recap["batch_sizes"] = training_param["batch_sizes"]
    recap["epoch_num"] = training_param["epoch_num"]
    recap["version"] = "003"

    # logg.debug(f"recap: {recap}")
    recap_path = model_info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    ##########################################################
    #   Compile and fit model the first time
    ##########################################################

    model.compile(
        optimizer=training_param["opt"][0],
        loss=tf_losses.CategoricalCrossentropy(),
        metrics=training_param["metrics"][0],
    )

    results_freeze = model.fit(
        training_generator,
        validation_data=val_generator,
        epochs=training_param["epoch_num"][0],
        callbacks=training_param["callbacks"][0],
    )

    # reload the best weights saved by the ModelCheckpoint
    model.load_weights(str(model_path))

    ##########################################################
    #   Save results, history, performance
    ##########################################################

    # results_freeze_recap
    results_freeze_recap: ty.Dict[str, ty.Any] = {}
    results_freeze_recap["model_name"] = model_name
    results_freeze_recap["results_recap_version"] = "001"

    # save the histories
    results_freeze_recap["history_train"] = {
        mn: results_freeze.history[mn]
        for mn in model.metrics_names
    }
    if use_validation:
        results_freeze_recap["history_val"] = {
            f"val_{mn}": results_freeze.history[f"val_{mn}"]
            for mn in model.metrics_names
        }

    # save the results
    res_recap_path = model_info_folder / "results_freeze_recap.json"
    res_recap_path.write_text(json.dumps(results_freeze_recap, indent=4))

    ##########################################################
    #   Compile and fit model the second time
    ##########################################################

    # Unfreeze the base_model. Note that it keeps running in inference mode
    # since we passed `training=False` when calling it. This means that
    # the batchnorm layers will not update their batch statistics.
    # This prevents the batchnorm layers from undoing all the training
    # we've done so far.
    base_model.trainable = True
    model.summary()

    model.compile(
        optimizer=training_param["opt"][1],  # Low learning rate
        loss=tf_losses.CategoricalCrossentropy(),
        metrics=training_param["metrics"][1],
    )

    results_full = model.fit(
        training_generator,
        validation_data=val_generator,
        epochs=training_param["epoch_num"][1],
        callbacks=training_param["callbacks"][1],
    )

    # reload the best weights saved by the ModelCheckpoint
    model.load_weights(str(model_path))

    ##########################################################
    #   Save results, history, performance
    ##########################################################

    results_full_recap: ty.Dict[str, ty.Any] = {}
    results_full_recap["model_name"] = model_name
    results_full_recap["results_recap_version"] = "001"

    # evaluate performance
    eval_testing = model.evaluate(testing_generator)
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_full_recap[metrics_name] = value

    # compute the confusion matrix
    y_pred = model.predict(testing_generator)
    y_pred_labels = testing_generator.pred2labelnames(y_pred)
    y_true = testing_generator.get_true_labels()
    # cm = pred_hot_2_cm(y_true, y_pred, words)
    cm = confusion_matrix(y_true, y_pred_labels)
    results_full_recap["cm"] = cm.tolist()

    # compute the fscore
    fscore = analyze_confusion(cm, words)
    logg.debug(f"fscore: {fscore}")
    results_full_recap["fscore"] = fscore

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, words, fscore)
    plot_cm_path = model_info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the histories
    results_full_recap["history_train"] = {
        mn: results_full.history[mn]
        for mn in model.metrics_names
    }
    if use_validation:
        results_full_recap["history_val"] = {
            f"val_{mn}": results_full.history[f"val_{mn}"]
            for mn in model.metrics_names
        }

    # save the results
    res_recap_path = model_info_folder / "results_full_recap.json"
    res_recap_path.write_text(json.dumps(results_full_recap, indent=4))

    # save the trained model
    model.save(model_path)

    # save the placeholder
    placeholder_path.write_text(f"Trained. F-score: {fscore}")

Beispiel #20

0

Datei anzeigen

def visualize_datasets(word_index):
    """MAKEDOC: what is visualize_datasets doing?"""
    logg = logging.getLogger(f"c.{__name__}.visualize_datasets")
    logg.debug("Start visualize_datasets")

    # magic to fix the GPUs
    setup_gpus()

    # show different datasets

    # datasets = [ "mfcc01", "mfcc02", "mfcc03", "mfcc04", "mfcc05", "mfcc06", "mfcc07", "mfcc08"]
    # datasets = [ "mel01", "mel02", "mel03", "mel04", "mel05", "mel06", "mel07", "mel08",
    #     "mel09", "mel10", "mel11", "mel12", "mel13", "mel14", "mel15", "melc1", "melc2",
    #     "melc3", "melc4", "mela1", "meL04", "meLa1", "auL18", "aug18", ]
    # datasets = [ "mel01", "mel04", "mel06", "melc1" ]
    # datasets = ["mel09", "mel10", "mel11", "melc1"]
    datasets = ["mel04", "mel04a", "mel04b", "melc1"]

    # words = words_types["f1"]
    # a_word = words[0]
    # a_word = "loudest_one"
    a_word = "happy"
    # a_word = "_other_ltts_loud"

    # datasets = []
    # datasets.extend(["meL04", "meLa1", "meLa2", "meLa3", "meLa4"])
    # datasets.extend(["auL06", "auL07", "auL08", "auL09"])
    # datasets.extend(["auL18", "auL19", "auL20", "auL21"])
    # a_word = "loudest_two"

    # datasets = []
    # datasets.extend(["mel04", "mela1"])
    # datasets.extend(["aug14", "aug15"])
    # a_word = "forward"
    # datasets.extend(["aug14", "aug07"])
    # a_word = "one"
    # a_word = "_other_ltts"

    # which word in the dataset to plot
    iw = word_index

    processed_folder = Path("data_proc")

    # fig, axes = plt.subplots(4, 5, figsize=(12, 15))
    nrows, ncols = find_rowcol(len(datasets))
    base_figsize = 5
    figsize = (ncols * base_figsize * 1.5, nrows * base_figsize)
    fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
    if nrows * ncols > 1:
        axes_flat = axes.flat
    else:
        axes_flat = [axes]

    fig.suptitle(f"Various spectrograms for {a_word}", fontsize=20)
    for i, ax in enumerate(axes_flat[: len(datasets)]):

        # the current dataset being plotted
        dataset_name = datasets[i]
        processed_path = processed_folder / f"{dataset_name}"
        word_path = processed_path / f"{a_word}_training.npy"
        logg.debug(f"word_path: {word_path}")

        # FIXME this is shaky as hell
        if not word_path.exists():
            if dataset_name.startswith("me"):
                preprocess_spec(dataset_name, f"_{a_word}")
            elif dataset_name.startswith("au"):
                do_augmentation(dataset_name, f"_{a_word}")

        word_data = np.load(word_path, allow_pickle=True)
        logg.debug(f"{dataset_name} {a_word} shape: {word_data[iw].shape}")
        title = f"{dataset_name}: shape {word_data[iw].shape}"
        plot_spec(word_data[iw], ax, title=title)
    fig.tight_layout()

    plot_folder = Path("plot_models")
    dt_names = "_".join(datasets)
    fig.savefig(plot_folder / f"{a_word}_{dt_names}_specs.pdf")

Beispiel #21

0

Datei anzeigen

Datei: evaluate_area.py Projekt: Pitrified/hda-speech-recog

def evaluate_attention_weights(train_words_type: str) -> None:
    """MAKEDOC: what is evaluate_attention_weights doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_attention_weights")
    # logg.setLevel("INFO")
    logg.debug("Start evaluate_attention_weights")

    # magic to fix the GPUs
    setup_gpus()

    # VAN_opa1_lr05_bs32_en15_dsaug07_wLTall
    hypa = {
        "batch_size_type": "32",
        "dataset_name": "aug07",
        "epoch_num_type": "15",
        "learning_rate_type": "05",
        "net_type": "VAN",
        "optimizer_type": "a1",
        "words_type": "LTall",
    }
    use_validation = True
    dataset_name = hypa["dataset_name"]
    batch_size = int(hypa["batch_size_type"])

    # get the model name
    model_name = build_area_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # load the model
    model_folder = Path("trained_models") / "area"
    model_path = model_folder / f"{model_name}.h5"
    model = tf_models.load_model(model_path)

    # get the output layer because you forgot to name it
    name_output_layer = model.layers[-1].name
    logg.debug(f"name_output_layer: {name_output_layer}")

    # build a model on top of that to get the weights
    att_weight_model = tf_models.Model(
        inputs=model.input,
        outputs=[
            model.get_layer(name_output_layer).output,
            model.get_layer("area_values").output,
        ],
    )
    att_weight_model.summary()

    # get the training words
    train_words = words_types[train_words_type]
    perm_pred = compute_permutation(train_words)
    logg.debug(f"perm_pred: {perm_pred}")
    sorted_train_words = sorted(train_words)
    logg.debug(f"sorted(train_words): {sorted(train_words)}")

    # load data if you do not want to record new audios
    processed_folder = Path("data_proc")
    processed_path = processed_folder / f"{dataset_name}"
    logg.debug(f"processed_path: {processed_path}")

    # # evaluate on all data because im confused
    # data, labels = load_processed(processed_path, train_words)
    # logg.debug(f"data['testing'].shape: {data['testing'].shape}")
    # logg.debug(f"labels['testing'].shape: {labels['testing'].shape}")
    # eval_testing = model.evaluate(data["testing"], labels["testing"])
    # for metrics_name, value in zip(model.metrics_names, eval_testing):
    #     logg.debug(f"{metrics_name}: {value}")

    # which word in the dataset to plot
    # word_id = 5
    # word_id = 7
    word_id = 12

    # the loaded spectrograms
    rec_data_l: ty.List[np.ndarray] = []

    # for now we do not record new words
    rec_words = train_words[30:32]
    num_rec_words = len(rec_words)

    logg.debug(f"processed_path: {processed_path}")
    for i, word in enumerate(rec_words):
        logg.debug(f"\nword: {word}")

        data, labels = load_processed(processed_path, [word])
        logg.debug(f"data['testing'].shape: {data['testing'].shape}")
        logg.debug(f"labels['testing'].shape: {labels['testing'].shape}")

        # eval_testing = model.evaluate(data["testing"], labels["testing"])
        # for metrics_name, value in zip(model.metrics_names, eval_testing):
        #     logg.debug(f"{metrics_name}: {value}")

        # get one of the spectrograms
        word_data = data["testing"][word_id]
        rec_data_l.append(word_data)

        pred, att_weights = att_weight_model.predict(data["testing"])
        logg.debug(f"pred.shape: {pred.shape}")
        logg.debug(f"pred[0].shape: {pred[0].shape}")

        pred_am_all = np.argmax(pred, axis=1)
        logg.debug(f"pred_am_all: {pred_am_all}")

        pred_index = np.argmax(pred[0])
        pred_word = sorted_train_words[pred_index]
        logg.debug(f"sorted pred_word: {pred_word} pred_index {pred_index}")

        # test EVERY SINGLE spectrogram
        spec_num = data["testing"].shape[0]
        for wid in range(spec_num):

            # get the word
            word_data = data["testing"][wid]
            logg.debug(f"word_data.shape: {word_data.shape}")
            batch_word_data = np.expand_dims(word_data, axis=0)
            logg.debug(f"batch_word_data.shape: {batch_word_data.shape}")

            shape_batch = (batch_size, *word_data.shape)
            logg.debug(f"shape_batch: {shape_batch}")

            batch_word_data_big = np.zeros(shape_batch, dtype=np.float32)
            for i in range(batch_size):
                batch_word_data_big[i, :, :, :] = batch_word_data
            # batch_word_data_big[0, :, :, :] = batch_word_data

            # predict it
            # pred, att_weights = att_weight_model.predict(batch_word_data)
            pred, att_weights = att_weight_model.predict(batch_word_data_big)

            # show all prediction
            # pred_am = np.argmax(pred, axis=1)
            # logg.debug(f"pred_am: {pred_am}")

            # focus on first prediction
            word_pred = pred[0]
            pred_index = np.argmax(word_pred)
            pred_word = sorted_train_words[pred_index]

            recap = ""
            if pred_word == word:
                recap += "correct "
            else:
                recap += "  wrong "
                pred_am = np.argmax(pred, axis=1)
                logg.debug(f"pred_am: {pred_am}")

            recap += f"sorted pred_word: {pred_word} pred_index {pred_index}"
            recap += f" word_pred.shape {word_pred.shape}"
            recap += f" pred_am_all[wid] {pred_am_all[wid]}"

            # pred_f = ", ".join([f"{p:.3f}" for p in pred[0]])
            # recap += f" pred_f: {pred_f}"

            logg.debug(recap)

            # break

    # turn the list into np array
    rec_data = np.stack(rec_data_l)
    logg.debug(f"\nrec_data.shape: {rec_data.shape}")

    # get prediction and attention weights
    pred, att_weights = att_weight_model.predict(rec_data)
    logg.debug(f"att_weights.shape: {att_weights.shape}")
    logg.debug(f"att_weights[0].shape: {att_weights[0].shape}")

    plot_size = 5
    fw = plot_size * num_rec_words
    nrows = 2
    fh = plot_size * nrows
    fig, axes = plt.subplots(nrows=nrows,
                             ncols=num_rec_words,
                             figsize=(fw, fh))
    fig.suptitle("Attention weights computed with VerticalAreaNet",
                 fontsize=20)

    for i, word in enumerate(rec_words):
        logg.debug(f"recword: {word}")

        # show the spectrogram
        word_spec = rec_data[i][:, :, 0]
        # logg.debug(f"word_spec: {word_spec}")
        axes[0][i].set_title(f"Spectrogram for {word}", fontsize=20)
        axes[0][i].imshow(word_spec, origin="lower")

        axes[1][i].set_title(f"Attention weights for {word}", fontsize=20)
        att_w = att_weights[i][:, :, 0]
        axes[1][i].imshow(att_w, origin="lower")
        logg.debug(f"att_w.max(): {att_w.max()}")

        # axes[0][i].imshow(
        #     att_w, origin="lower", extent=img.get_extent(), cmap="gray", alpha=0.4
        # )

        # weighted = word_spec * att_w
        # axes[2][i].imshow(weighted, origin="lower")

        word_pred = pred[i]
        pred_index = np.argmax(word_pred)
        pred_word = sorted_train_words[pred_index]
        logg.debug(f"sorted pred_word: {pred_word} pred_index {pred_index}")

        # # plot the predictions
        word_pred = pred[i]
        # logg.debug(f"word_pred: {word_pred}")
        # # permute the prediction from sorted to the order you have
        word_pred = word_pred[perm_pred]
        # logg.debug(f"word_pred permuted: {word_pred}")
        pred_index = np.argmax(word_pred)
        pred_word = train_words[pred_index]
        logg.debug(f"pred_word: {pred_word} pred_index {pred_index}")
        # title = f"Predictions for {word}"
        # plot_pred(word_pred, train_words, axes[2][i], title, pred_index)

    fig.tight_layout()

    fig_name = f"{model_name}"
    fig_name += "_0002.{}"
    plot_folder = Path("plot_results")
    results_path = plot_folder / fig_name.format("pdf")
    fig.savefig(results_path)

    plt.show()

Beispiel #22

0

Datei anzeigen

Datei: evaluate_cnn.py Projekt: Pitrified/hda-speech-recog

def evaluate_model_cnn(which_dataset: str, train_words_type: str,
                       test_words_type: str) -> None:
    """MAKEDOC: what is evaluate_model_cnn doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_model_cnn")
    # logg.setLevel("INFO")
    logg.debug("Start evaluate_model_cnn")

    # magic to fix the GPUs
    setup_gpus()

    # setup the parameters
    # hypa: ty.Dict[str, ty.Union[str, int]] = {}
    # hypa["base_dense_width"] = 32
    # hypa["base_filters"] = 20
    # hypa["batch_size"] = 32
    # hypa["dropout_type"] = "01"
    # # hypa["epoch_num"] = 16
    # hypa["epoch_num"] = 15
    # hypa["kernel_size_type"] = "02"
    # # hypa["pool_size_type"] = "02"
    # hypa["pool_size_type"] = "01"
    # # hypa["learning_rate_type"] = "02"
    # hypa["learning_rate_type"] = "04"
    # hypa["optimizer_type"] = "a1"
    # hypa["dataset"] = which_dataset
    # hypa["words"] = train_words_type

    # hypa: ty.Dict[str, ty.Union[str, int]] = {}
    # hypa["base_dense_width"] = 32
    # hypa["base_filters"] = 32
    # hypa["batch_size"] = 32
    # hypa["dropout_type"] = "02"
    # hypa["epoch_num"] = 15
    # hypa["kernel_size_type"] = "02"
    # hypa["pool_size_type"] = "01"
    # hypa["learning_rate_type"] = "04"
    # hypa["optimizer_type"] = "a1"
    # hypa["dataset"] = which_dataset
    # hypa["words"] = train_words_type

    hypa: ty.Dict[str, ty.Union[str, int]] = {
        "base_dense_width": 32,
        "base_filters": 32,
        "batch_size": 32,
        # "dataset": "aug07",
        "dropout_type": "01",
        "epoch_num": 15,
        "kernel_size_type": "02",
        "learning_rate_type": "04",
        "optimizer_type": "a1",
        "pool_size_type": "01",
        # "words": "all",
    }

    hypa["dataset"] = which_dataset
    hypa["words"] = train_words_type

    # get the words
    # train_words = words_types[train_words_type]
    test_words = words_types[test_words_type]

    model_name = build_cnn_name(hypa)
    logg.debug(f"model_name: {model_name}")

    model_folder = Path("trained_models") / "cnn"
    model_path = model_folder / f"{model_name}.h5"
    if not model_path.exists():
        logg.error(f"Model not found at: {model_path}")
        raise FileNotFoundError

    model = tf.keras.models.load_model(model_path)
    model.summary()

    # input data
    processed_path = Path("data_proc") / f"{which_dataset}"
    data, labels = load_processed(processed_path, test_words)
    logg.debug(f"data['testing'].shape: {data['testing'].shape}")

    # evaluate on the words you trained on
    logg.debug("Evaluate on test data:")
    model.evaluate(data["testing"], labels["testing"])
    # model.evaluate(data["validation"], labels["validation"])

    # predict labels/cm/fscore
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, test_words)
    # y_pred = model.predict(data["validation"])
    # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words)
    fscore = analyze_confusion(cm, test_words)
    logg.debug(f"fscore: {fscore}")

    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, test_words, fscore)

    plt.show()

Beispiel #23

0

Datei anzeigen

Datei: evaluate_cnn.py Projekt: Pitrified/hda-speech-recog

def evaluate_audio_cnn(args):
    """MAKEDOC: what is evaluate_audio_cnn doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_audio_cnn")
    logg.debug("Start evaluate_audio_cnn")

    # magic to fix the GPUs
    setup_gpus()

    # need to know on which dataset the model was trained to compute specs
    dataset_name = "mel01"

    # words that the dataset was trained on
    train_words_type = args.train_words_type
    train_words = words_types[train_words_type]

    # permutation from sorted to your wor(l)d order
    perm_pred = compute_permutation(train_words)

    rec_words_type = args.rec_words_type
    if rec_words_type == "train":
        rec_words = train_words
    else:
        rec_words = words_types[rec_words_type]
    num_rec_words = len(rec_words)

    # where to save the audios
    audio_folder = Path("recorded_audio")
    if not audio_folder.exists():
        audio_folder.mkdir(parents=True, exist_ok=True)

    # record the audios and save them in audio_folder
    audio_path_fmt = "{}_02.wav"
    audios = record_audios(rec_words, audio_folder, audio_path_fmt, timeout=0)

    # compute the spectrograms and build the dataset of correct shape
    img_specs = []
    spec_dict = get_spec_dict()
    spec_kwargs = spec_dict[dataset_name]
    p2d_kwargs = {"ref": np.max}
    for word in rec_words:
        # get the name
        audio_path = audio_folder / audio_path_fmt.format(word)

        # convert it to mel
        log_spec = wav2mel(audio_path, spec_kwargs, p2d_kwargs)
        img_spec = log_spec.reshape((*log_spec.shape, 1))
        logg.debug(f"img_spec.shape: {img_spec.shape}"
                   )  # img_spec.shape: (128, 32, 1)

        img_specs.append(img_spec)

    # the data needs to look like this data['testing'].shape: (735, 128, 32, 1)
    # data = log_spec.reshape((1, *log_spec.shape, 1))
    data = np.stack(img_specs)
    logg.debug(f"data.shape: {data.shape}")

    hypa: ty.Dict[str, ty.Union[str, int]] = {}
    hypa["base_dense_width"] = 32
    hypa["base_filters"] = 20
    hypa["batch_size"] = 32
    hypa["dropout_type"] = "01"
    hypa["epoch_num"] = 16
    hypa["kernel_size_type"] = "02"
    hypa["pool_size_type"] = "02"
    hypa["learning_rate_type"] = "02"
    hypa["optimizer_type"] = "a1"
    hypa["dataset"] = dataset_name
    hypa["words"] = train_words_type

    # get the words
    train_words = words_types[train_words_type]

    model_name = build_cnn_name(hypa)
    logg.debug(f"model_name: {model_name}")

    # model_folder = Path("trained_models") / "cnn"
    model_folder = Path("saved_models")
    model_path = model_folder / f"{model_name}.h5"
    if not model_path.exists():
        logg.error(f"Model not found at: {model_path}")
        raise FileNotFoundError

    model = tf.keras.models.load_model(model_path)
    model.summary()

    pred = model.predict(data)
    # logg.debug(f"pred: {pred}")

    # plot the thing
    plot_size = 5
    fw = plot_size * 3
    fh = plot_size * num_rec_words
    fig, axes = plt.subplots(nrows=num_rec_words, ncols=3, figsize=(fw, fh))
    fig.suptitle("Recorded audios", fontsize=18)

    for i, word in enumerate(rec_words):
        plot_waveform(audios[i], axes[i][0])
        spec = img_specs[i][:, :, 0]
        plot_spec(spec, axes[i][1])
        plot_pred(
            pred[i][perm_pred],
            train_words,
            axes[i][2],
            f"Prediction for {rec_words[i]}",
            train_words.index(word),
        )

    # https://stackoverflow.com/q/8248467
    # https://stackoverflow.com/q/2418125
    fig.tight_layout(h_pad=3, rect=[0, 0.03, 1, 0.97])

    fig_name = f"{model_name}_{train_words_type}_{rec_words_type}.png"
    results_path = audio_folder / fig_name
    fig.savefig(results_path)

    if num_rec_words <= 6:
        plt.show()

Beispiel #24

0

Datei anzeigen

Datei: evaluate_transfer.py Projekt: Pitrified/hda-speech-recog

def evaluate_audio_transfer(train_words_type: str, rec_words_type: str) -> None:
    """MAKEDOC: what is evaluate_audio_transfer doing?"""
    logg = logging.getLogger(f"c.{__name__}.evaluate_audio_transfer")
    # logg.setLevel("INFO")
    logg.debug("Start evaluate_audio_transfer")

    # magic to fix the GPUs
    setup_gpus()

    datasets_type = "01"
    datasets_types = {
        "01": ["mel05", "mel09", "mel10"],
        "02": ["mel05", "mel10", "mfcc07"],
        "03": ["mfcc06", "mfcc07", "mfcc08"],
        "04": ["mel05", "mfcc06", "melc1"],
        "05": ["melc1", "melc2", "melc4"],
    }
    dataset_names = datasets_types[datasets_type]

    # we do not support composed datasets for now
    for dn in dataset_names:
        if dn.startswith("melc"):
            logg.error(f"not supported: {dataset_names}")
            return

    # words that the dataset was trained on
    train_words_type = args.train_words_type
    train_words = words_types[train_words_type]

    # the model predicts sorted words
    perm_pred = compute_permutation(train_words)

    if rec_words_type == "train":
        rec_words = train_words
    else:
        rec_words = words_types[rec_words_type]
    num_rec_words = len(rec_words)

    # where to save the audios
    audio_folder = Path("recorded_audio")
    if not audio_folder.exists():
        audio_folder.mkdir(parents=True, exist_ok=True)

    # record the audios and save them in audio_folder
    audio_path_fmt = "{}_02.wav"
    audios = record_audios(rec_words, audio_folder, audio_path_fmt, timeout=0)

    # compute the spectrograms and build the dataset of correct shape
    specs_3ch: ty.List[np.ndarray] = []
    # params for the mel conversion
    p2d_kwargs = {"ref": np.max}
    spec_dict = get_spec_dict()
    for word in rec_words:
        # get the name
        audio_path = audio_folder / audio_path_fmt.format(word)

        # convert it to mel for each type of dataset
        specs: ty.List[np.ndarray] = []
        for dataset_name in dataset_names:
            spec_kwargs = spec_dict[dataset_name]
            log_spec = wav2mel(audio_path, spec_kwargs, p2d_kwargs)
            specs.append(log_spec)
        img_spec = np.stack(specs, axis=2)
        # logg.debug(f"img_spec.shape: {img_spec.shape}")  # (128, 128, 3)

        specs_3ch.append(img_spec)

    data = np.stack(specs_3ch)
    logg.debug(f"data.shape: {data.shape}")

    hypa: ty.Dict[str, str] = {}
    hypa["dense_width_type"] = "03"
    hypa["dropout_type"] = "01"
    hypa["batch_size_type"] = "02"
    hypa["epoch_num_type"] = "01"
    hypa["learning_rate_type"] = "01"
    hypa["optimizer_type"] = "a1"
    hypa["datasets_type"] = datasets_type
    hypa["words_type"] = train_words_type
    use_validation = False

    # hypa: Dict[str, str] = {}
    # hypa["dense_width_type"] = "02"
    # hypa["dropout_type"] = "01"
    # hypa["batch_size_type"] = "01"
    # hypa["epoch_num_type"] = "01"
    # hypa["learning_rate_type"] = "01"
    # hypa["optimizer_type"] = "a1"
    # hypa["datasets_type"] = datasets_type
    # hypa["words_type"] = train_words_type
    # use_validation = True

    # get the model name
    model_name = build_transfer_name(hypa, use_validation)

    # load the model
    # model_folder = Path("trained_models") / "transfer"
    model_folder = Path("saved_models")
    model_path = model_folder / f"{model_name}.h5"
    model = tf.keras.models.load_model(model_path)

    # predict!
    pred = model.predict(data)

    # plot everything
    plot_size = 5
    fw = plot_size * 5
    fh = plot_size * num_rec_words
    fig, axes = plt.subplots(nrows=num_rec_words, ncols=5, figsize=(fw, fh))
    fig.suptitle("Recorded audios", fontsize=18)

    for i, word in enumerate(rec_words):
        plot_waveform(audios[i], axes[i][0])
        img_spec = specs_3ch[i]
        plot_spec(img_spec[:, :, 0], axes[i][1])
        plot_spec(img_spec[:, :, 1], axes[i][2])
        plot_spec(img_spec[:, :, 2], axes[i][3])
        plot_pred(
            pred[i][perm_pred],
            train_words,
            axes[i][4],
            f"Prediction for {rec_words[i]}",
            train_words.index(word),
        )

    # https://stackoverflow.com/q/8248467
    # https://stackoverflow.com/q/2418125
    fig.tight_layout(h_pad=3, rect=[0, 0.03, 1, 0.97])

    fig_name = f"{model_name}_{train_words_type}_{rec_words_type}.png"
    results_path = audio_folder / fig_name
    fig.savefig(results_path)

    if num_rec_words <= 6:
        plt.show()