def run_eval(args): logging.set_verbosity(logging.WARNING) args = utils.dict_to_namedtuple(args) config = hparams_config.get_efficientdet_config(args.model_name) config.override(args.hparams, allow_new_keys=True) config.image_size = utils.parse_image_size(config.image_size) params = dict(config.as_dict(), seed=None) logging.info(params) utils.setup_gpus() dataset = utils.get_dataset(args, 1, False, params, None) model = efficientdet_net.EfficientDetNet(params=params) model.compile() if args.weights: image_size = params["image_size"] model.predict(np.zeros((1, image_size[0], image_size[1], 3))) model.load_weights(args.weights) model.evaluate(dataset, steps=args.eval_steps)
def run_augment_data(args: argparse.Namespace) -> None: """MAKEDOC: What is augment_data doing?""" logg = logging.getLogger(f"c.{__name__}.run_augment_data") logg.debug("Starting run_augment_data") # magic to fix the GPUs setup_gpus() augmentation_type = args.augmentation_type words_type = args.words_type force_augment = args.force_augment if augmentation_type == "2345": aug_type_list = ["aug02", "aug03", "aug04", "aug05"] elif augmentation_type == "6789": aug_type_list = ["aug06", "aug07", "aug08", "aug09"] elif augmentation_type == "10123": aug_type_list = ["aug10", "aug11", "aug12", "aug13"] elif augmentation_type == "14567": aug_type_list = ["aug14", "aug15", "aug16", "aug17"] elif augmentation_type == "auA1234": aug_type_list = ["auA01", "auA02", "auA03", "auA04"] elif augmentation_type == "auA5678": aug_type_list = ["auA05", "auA06", "auA07", "auA08"] else: aug_type_list = [augmentation_type] for at in aug_type_list: do_augmentation(at, words_type, force_augment)
def run_demo(args: argparse.Namespace) -> None: """TODO: What is demo doing?""" logg = logging.getLogger(f"c.{__name__}.run_demo") logg.debug("Starting run_demo") arch_type = args.arch_type train_words_type = args.train_words_type # magic to fix the GPUs setup_gpus() device = None device_info = sd.query_devices(device=device, kind="input") logg.debug(f"device_info: {device_info}") if train_words_type.endswith("LS"): window = 500 else: window = 1000 # window = 200 # interval = 1000 # interval = 30 interval = args.interval # interval = 100 # interval = 500 # blocksize = 0 samplerate_input = device_info["default_samplerate"] channels = [1] # block_duration = 50 the_demo = Demo( device, window, interval, samplerate_input, channels, arch_type=arch_type, train_words_type=train_words_type, ) the_demo.run()
def __init__(self, model_name, data_path, explain=False, save_predictions=False, **kwargs): utils.setup_gpus() self.model_path = os.path.join('./trained_models', model_name, 'frozen') print('Loading model from: {}'.format(self.model_path)) self.model_name = model_name self.data = DataLoader(data_path, training=False).test_dataset() self.model = tf.keras.models.load_model(self.model_path) self.explain = explain self.outdir = os.path.join('./trained_models', model_name, 'results') self.class_names = ['normal', 'pneumonia', 'COVID-19'] self.save_predictions = save_predictions utils.mdir(self.outdir)
def do_stream_evaluation( architecture_type, which_dataset, train_words_type, sentence_wav_paths, sentence_norm_tra, good_sentences, ) -> None: r"""MAKEDOC: what is do_stream_evaluation doing?""" logg = logging.getLogger(f"c.{__name__}.do_stream_evaluation") # logg.setLevel("INFO") logg.debug("Start do_stream_evaluation") wav_IDs = list(sentence_wav_paths.keys()) logg.debug(f"len(wav_IDs): {len(wav_IDs)}") good_count = 0 bad_count = 0 # magic to fix the GPUs setup_gpus() # load the model model, model_name = load_trained_model(architecture_type, which_dataset, train_words_type) # all_y_pred: ty.Dict[str, ty.List[float]] = {} ypred_folder = Path("plot_stream") / "y_pred" / model_name if not ypred_folder.exists(): ypred_folder.mkdir(parents=True, exist_ok=True) for sentence_index in good_sentences: # get info for one sentence wav_ID = wav_IDs[sentence_index] logg.debug( f"\nsentence_index {sentence_index} / {len(good_sentences)-1}") orig_wav_path = sentence_wav_paths[wav_ID] logg.debug(f"sentence_wav_paths[{wav_ID}]: {orig_wav_path}") norm_tra = sentence_norm_tra[wav_ID] logg.debug(f"sentence_norm_tra[{wav_ID}]: {norm_tra}") # build the output path pred_name = f"{model_name}" pred_name += f"_{wav_ID}" pred_name += ".npy" logg.debug(f"pred_name SINGLE: {pred_name}") pred_path = ypred_folder / pred_name if pred_path.exists(): logg.info(f"Already predicted {pred_path}") continue y_pred = evaluate_stream( model, which_dataset, train_words_type, architecture_type, model_name, orig_wav_path, norm_tra, wav_ID, ) logg.debug(f"y_pred.shape: {y_pred.shape}") np.save(pred_path, y_pred) # all_y_pred[wav_ID] = y_pred.tolist() # wasgood = input() # if wasgood == "y": # good_count += 1 # else: # bad_count += 1 logg.debug(f"good_count: {good_count}") logg.debug(f"bad_count: {bad_count}") logg.debug(f"total: {bad_count+good_count}")
def run_training(args): logging.set_verbosity(logging.WARNING) args = utils.dict_to_namedtuple(args) config = hparams_config.get_efficientdet_config(args.model_name) config.override(args.hparams, allow_new_keys=True) config.image_size = utils.parse_image_size(config.image_size) params = dict( config.as_dict(), seed=args.seed, batch_size=args.batch_size, ) logging.info(params) if args.ckpt_dir: ckpt_dir = args.ckpt_dir if not tf.io.gfile.exists(ckpt_dir): tf.io.gfile.makedirs(ckpt_dir) config_file = os.path.join(ckpt_dir, "config.yaml") if not tf.io.gfile.exists(config_file): tf.io.gfile.GFile(config_file, "w").write(str(config)) if params["seed"]: seed = params["seed"] os.environ["PYTHONHASHSEED"] = str(seed) tf.random.set_seed(seed) np.random.seed(seed) random.seed(seed) os.environ["TF_DETERMINISTIC_OPS"] = "1" os.environ["TF_CUDNN_DETERMINISTIC"] = "1" utils.setup_gpus() num_devices = 1 physical_devices = tf.config.list_physical_devices("GPU") multi_gpu = args.multi_gpu if multi_gpu is not None and len(multi_gpu) != 1 and len( physical_devices) > 1: devices = [f"GPU:{gpu}" for gpu in multi_gpu] if len(multi_gpu) != 0 else None strategy = tf.distribute.MirroredStrategy(devices) num_devices = len(devices) if devices else len(physical_devices) else: strategy = tf.distribute.get_strategy() train_dataset = utils.get_dataset( args, args.batch_size * num_devices, True, params, strategy if num_devices > 1 else None, ) if args.eval_after_training or args.eval_during_training: eval_dataset = utils.get_dataset( args, num_devices, False, params, strategy if num_devices > 1 else None, ) options = tf.data.Options() options.experimental_distribute.auto_shard_policy = ( tf.data.experimental.AutoShardPolicy.DATA) eval_dataset = eval_dataset.with_options(options) with strategy.scope(): model = efficientdet_net.EfficientDetNet(params=params) global_batch_size = args.batch_size * strategy.num_replicas_in_sync model.compile(optimizer=optimizers.get_optimizer( params, args.epochs, global_batch_size, args.train_steps)) initial_epoch = args.initial_epoch if args.start_weights: image_size = params["image_size"] model.predict(np.zeros((1, image_size[0], image_size[1], 3))) model.load_weights(args.start_weights) fname = args.start_weights.split("/")[-1] ckpt_pattern = f"{args.model_name}\.(\d\d+)\.h5" match = re.match(ckpt_pattern, fname) if match: initial_epoch = int(match.group(1).lstrip("0")) callbacks = [] if args.ckpt_dir: ckpt_dir = args.ckpt_dir if not tf.io.gfile.exists(ckpt_dir): tf.io.gfile.makedirs(tensorboard_dir) callbacks.append( tf.keras.callbacks.ModelCheckpoint( filepath=os.path.join( ckpt_dir, "".join([args.model_name, ".{epoch:02d}.h5"])), save_weights_only=True, )) if args.log_dir: log_dir = args.log_dir if not tf.io.gfile.exists(log_dir): tf.io.gfile.makedirs(log_dir) callbacks.append( tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq="epoch")) model.fit( train_dataset, epochs=args.epochs, steps_per_epoch=args.train_steps, initial_epoch=initial_epoch, callbacks=callbacks, validation_data=eval_dataset if args.eval_during_training else None, validation_steps=args.eval_steps, validation_freq=args.eval_freq, ) if args.eval_after_training: print("Evaluation after training:") model.evaluate(eval_dataset, steps=args.eval_steps) model.save_weights(args.output_filename)
def find_best_lr(hypa: ty.Dict[str, str]) -> None: """MAKEDOC: what is find_best_lr doing?""" logg = logging.getLogger(f"c.{__name__}.find_best_lr") # logg.setLevel("INFO") logg.debug("Start find_best_lr") # get the word list words = words_types[hypa["words_type"]] num_labels = len(words) # load data processed_folder = Path("data_proc") processed_path = processed_folder / f"{hypa['dataset_name']}" data, labels = load_processed(processed_path, words) # no need for validation x = np.concatenate((data["training"], data["validation"])) y = np.concatenate((labels["training"], labels["validation"])) # the shape of each sample input_shape = data["training"][0].shape # from hypa extract model param model_param = get_model_param_attention(hypa, num_labels, input_shape) # magic to fix the GPUs setup_gpus() model = AttentionModel(**model_param) # model.summary() start_lr = 1e-9 end_lr = 1e1 batch_size_types = {"01": 32, "02": 16} batch_size = batch_size_types[hypa["batch_size_type"]] epoch_num_types = {"01": 15, "02": 30, "03": 2} epoch_num = epoch_num_types[hypa["epoch_num_type"]] optimizer_types = {"a1": Adam(), "r1": RMSprop()} opt = optimizer_types[hypa["optimizer_type"]] metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # find the best values lrf = LearningRateFinder(model) lrf.find((x, y), start_lr, end_lr, epochs=epoch_num, batchSize=batch_size) model_name = build_attention_name(hypa, False) fig_title = "LR_sweep" fig_title += f"_bs{batch_size}" fig_title += f"_en{epoch_num}" fig_title += f"__{model_name}" fig, ax = plt.subplots(figsize=(8, 8)) # get the plot lrf.plot_loss(ax=ax, title=fig_title) # save the plot plot_fol = Path("plot_results") / "att" / "find_best_lr" if not plot_fol.exists(): plot_fol.mkdir(parents=True, exist_ok=True) fig_name = fig_title + ".{}" fig.savefig(plot_fol / fig_name.format("png")) fig.savefig(plot_fol / fig_name.format("pdf")) # TODO: save the loss history plt.show()
def train_attention(hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool) -> None: """MAKEDOC: what is train_attention doing?""" logg = logging.getLogger(f"c.{__name__}.train_attention") # logg.setLevel("INFO") logg.debug("Start train_attention") # build the model name model_name = build_attention_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_folder = Path("trained_models") / "attention" if not model_folder.exists(): model_folder.mkdir(parents=True, exist_ok=True) model_path = model_folder / f"{model_name}.h5" placeholder_path = model_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder info_folder = Path("info") / "attention" / model_name if not info_folder.exists(): info_folder.mkdir(parents=True, exist_ok=True) # get the word list words = words_types[hypa["words_type"]] num_labels = len(words) # load data processed_folder = Path("data_proc") processed_path = processed_folder / f"{hypa['dataset_name']}" data, labels = load_processed(processed_path, words) # concatenate train and val for final train val_data = None if use_validation: x = data["training"] y = labels["training"] val_data = (data["validation"], labels["validation"]) logg.debug("Using validation data") else: x = np.concatenate((data["training"], data["validation"])) y = np.concatenate((labels["training"], labels["validation"])) logg.debug("NOT using validation data") # the shape of each sample input_shape = data["training"][0].shape # from hypa extract model param model_param = get_model_param_attention(hypa, num_labels, input_shape) batch_size_types = {"01": 32, "02": 16} batch_size = batch_size_types[hypa["batch_size_type"]] epoch_num_types = {"01": 15, "02": 30, "03": 2, "04": 4} epoch_num = epoch_num_types[hypa["epoch_num_type"]] # magic to fix the GPUs setup_gpus() model = AttentionModel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] learning_rate_types = { "01": "fixed01", "02": "fixed02", "03": "exp_decay_step_01", "04": "exp_decay_smooth_01", "05": "clr_triangular2_01", "06": "clr_triangular2_02", "07": "clr_triangular2_03", "08": "clr_triangular2_04", "09": "clr_triangular2_05", "10": "exp_decay_smooth_02", } learning_rate_type = hypa["learning_rate_type"] lr_value = learning_rate_types[learning_rate_type] # setup opt fixed lr values if lr_value.startswith("fixed"): if lr_value == "fixed01": lr = 1e-3 elif lr_value == "fixed02": lr = 1e-4 else: lr = 1e-3 optimizer_types = { "a1": Adam(learning_rate=lr), "r1": RMSprop(learning_rate=lr) } opt = optimizer_types[hypa["optimizer_type"]] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # setup callbacks callbacks = [] # setup exp decay step / smooth if lr_value.startswith("exp_decay"): if lr_value == "exp_decay_step_01": exp_decay_part = partial(exp_decay_step, epochs_drop=5) elif lr_value == "exp_decay_smooth_01": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5) elif lr_value == "exp_decay_smooth_02": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2) lrate = LearningRateScheduler(exp_decay_part) callbacks.append(lrate) # setup cyclic learning rate if lr_value.startswith("clr_triangular2"): base_lr = 1e-5 max_lr = 1e-3 # training iteration per epoch = num samples // batch size # step size suggested = 2~8 * iterations if lr_value == "clr_triangular2_01": step_factor = 8 step_size = step_factor * x.shape[0] // batch_size elif lr_value == "clr_triangular2_02": step_factor = 2 step_size = step_factor * x.shape[0] // batch_size # target_cycles = the number of cycles we want in those epochs # it_per_epoch = num_samples // batch_size # total_iterations = it_per_epoch * epoch_num # step_size = total_iterations // target_cycles elif lr_value == "clr_triangular2_03": # the number of cycles we want in those epochs target_cycles = 4 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) elif lr_value == "clr_triangular2_04": # the number of cycles we want in those epochs target_cycles = 2 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) elif lr_value == "clr_triangular2_05": # the number of cycles we want in those epochs target_cycles = 2 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) # set bigger starting value max_lr = 1e-2 logg.debug(f"x.shape[0]: {x.shape[0]}") logg.debug(f"CLR is using step_size: {step_size}") mode = "triangular2" cyclic_lr = CyclicLR(base_lr, max_lr, step_size, mode) callbacks.append(cyclic_lr) # setup early stopping if learning_rate_type in ["01", "02", "03", "04"]: metric_to_monitor = "val_loss" if use_validation else "loss" early_stop = EarlyStopping( monitor=metric_to_monitor, patience=4, restore_best_weights=True, verbose=1, ) callbacks.append(early_stop) # model_checkpoint = ModelCheckpoint( # model_name, # monitor="val_loss", # save_best_only=True, # ) # a dict to recreate this training # FIXME this should be right before fit and have epoch_num/batch_size/lr info recap: ty.Dict[str, ty.Any] = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["model_name"] = model_name recap["version"] = "001" # logg.debug(f"recap: {recap}") recap_path = info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) results = model.fit( x, y, validation_data=val_data, epochs=epoch_num, batch_size=batch_size, callbacks=callbacks, ) results_recap: ty.Dict[str, ty.Any] = {} results_recap["model_name"] = model_name results_recap["results_recap_version"] = "002" # eval performance on the various metrics eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # compute the confusion matrix y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) # logg.debug(f"cm: {cm}") results_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") results_recap["fscore"] = fscore # save the histories results_recap["history_train"] = { mn: results.history[mn] for mn in model.metrics_names } if use_validation: results_recap["history_val"] = { f"val_{mn}": results.history[f"val_{mn}"] for mn in model.metrics_names } # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the results res_recap_path = info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) # if cyclic_lr was used save the history if lr_value.startswith("clr_triangular2"): logg.debug(f"cyclic_lr.history.keys(): {cyclic_lr.history.keys()}") clr_recap = {} for metric_name, values in cyclic_lr.history.items(): clr_recap[metric_name] = list(float(v) for v in values) clr_recap_path = info_folder / "clr_recap.json" clr_recap_path.write_text(json.dumps(clr_recap, indent=4)) # save the trained model model.save(model_path) placeholder_path.write_text(f"Trained. F-score: {fscore}")
def standardize_sample(img): mean = np.mean(img) n = len(img.ravel()) adjusted_stddev = max(np.std(img), 1.0 / np.sqrt(n)) return (img - mean) / adjusted_stddev def show(img): import matplotlib.pyplot as plt plt.imshow(img, cmap='gray') plt.show() if __name__ == '__main__': utils.setup_gpus() dme = glob.glob( '/media/miguel/ALICIUM/Miguel/DOWNLOADS/ZhangLabData/CellData/OCT/test/DME/*' ) data = prep_eval_data(dme) img = data[10] modelname = '20201011_vanilla_cnn_batch64' model_path = os.path.join('./trained_models', modelname, 'frozen') model = tf.keras.models.load_model(model_path) explainer = LIME(model, areas=7, perturbations=700) ex, mask, super_pix = explainer.fit_linear_model(img, label=2) super_pix = skimage.segmentation.mark_boundaries(img, super_pix) full_image = np.concatenate((ex, super_pix), axis=1) show(full_image)
def __init__(self, modelname, data_path, architecture, hyperparams, img_size=224, **kwargs): """ :param modelname: :param data_path: data of records :param model: tensorflow model :param hyperparams: params :param kwargs: """ utils.setup_gpus() self.modelname = modelname self.model_path = os.path.join('./trained_models', modelname) self.data_path = data_path self.params = { 'batch_size': 32, 'learning_rate': 0.001, 'schedule': False, 'optimizer': 'ADAM', 'test_iter': 100, 'epochs': 50, 'max_class_samples': 8514 # number of pneumonia cases in the data } self.params.update(hyperparams) self.img_size = img_size self.log_dir, self.ckpt_dir, self.train_writer, self.test_writer = self.create_dirs( ) self.steps_epoch = np.ceil(2.0 * self.params['max_class_samples'] / self.params['batch_size']) self.epochs = self.params['epochs'] self.epoch_counter = tf.Variable(initial_value=0, trainable=False, dtype=tf.int64) self.step = tf.Variable(initial_value=0, trainable=False, dtype=tf.int64) self.architecure_params = dict(**kwargs) self.model = self.build_model(architecture, **self.architecure_params) self.train_data = DataLoader(self.data_path, training=True) self.test_data = DataLoader(self.data_path, training=False) self.lr, self.opt = self.optimizer() self.loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False) self.train_loss, self.test_loss, self.train_acc, self.test_acc = self.build_metrics( ) architecture = dict(model=self.model, optimizer=self.opt, current_epoch=self.epoch_counter, step=self.step) self.ckpt = Checkpoint(architecture, self.ckpt_dir, max_to_keep=3) try: self.ckpt.restore().assert_existing_objects_matched() print('Loading pre trained model') except Exception as e: print(e)
def evaluate_model_area(model_name: str, test_words_type: str) -> None: r"""MAKEDOC: what is evaluate_model_area doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_model_area") # logg.setLevel("INFO") logg.debug("Start evaluate_model_area") # magic to fix the GPUs setup_gpus() # # VAN_opa1_lr05_bs32_en15_dsaug07_wLTall # hypa = { # "batch_size_type": "32", # "dataset_name": "aug07", # "epoch_num_type": "15", # "learning_rate_type": "03", # "net_type": "VAN", # "optimizer_type": "a1", # # "words_type": "LTall", # "words_type": train_words_type, # } # # use_validation = True # use_validation = False # dataset_name = hypa["dataset_name"] # get the model name # model_name = build_area_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") dataset_re = re.compile("_ds(.*?)_") match = dataset_re.search(model_name) if match is not None: logg.debug(f"match[1]: {match[1]}") dataset_name = match[1] train_words_type_re = re.compile("_w(.*?)[_.]") match = train_words_type_re.search(model_name) if match is not None: logg.debug(f"match[1]: {match[1]}") train_words_type = match[1] # load the model model_folder = Path("trained_models") / "area" model_path = model_folder / f"{model_name}.h5" model = tf_models.load_model(model_path) # model.summary() train_words = words_types[train_words_type] logg.debug(f"train_words: {train_words}") test_words = words_types[test_words_type] logg.debug(f"test_words: {test_words}") # input data processed_path = Path("data_proc") / f"{dataset_name}" data, labels = load_processed(processed_path, test_words) logg.debug(f"list(data.keys()): {list(data.keys())}") logg.debug(f"data['testing'].shape: {data['testing'].shape}") # evaluate on the words you trained on logg.debug("Evaluate on test data:") model.evaluate(data["testing"], labels["testing"]) # model.evaluate(data["validation"], labels["validation"]) # predict labels/cm/fscore y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, test_words) # y_pred = model.predict(data["validation"]) # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words) fscore = analyze_confusion(cm, test_words) logg.debug(f"fscore: {fscore}") fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, test_words, fscore, train_words) fig_name = f"{model_name}_test{test_words_type}_cm.{{}}" cm_folder = Path("plot_results") / "cm" if not cm_folder.exists(): cm_folder.mkdir(parents=True, exist_ok=True) plot_cm_path = cm_folder / fig_name.format("png") fig.savefig(plot_cm_path) plot_cm_path = cm_folder / fig_name.format("pdf") fig.savefig(plot_cm_path) plt.show()
def train_model(hypa, force_retrain): """MAKEDOC: What is train_model doing?""" logg = logging.getLogger(f"c.{__name__}.train_model") # logg.debug("Starting train_model") # get the words words = words_types[hypa["words"]] # name the model model_name = build_cnn_name(hypa) logg.debug(f"model_name: {model_name}") # save the trained model here model_folder = Path("trained_models") / "cnn" if not model_folder.exists(): model_folder.mkdir(parents=True, exist_ok=True) model_path = model_folder / f"{model_name}.h5" # logg.debug(f"model_path: {model_path}") placeholder_path = model_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder info_folder = Path("info") / "cnn" / model_name if not info_folder.exists(): info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() # input data processed_path = Path("data_proc") / f"{hypa['dataset']}" data, labels = load_processed(processed_path, words) # from hypa extract model param model_param = {} model_param["num_labels"] = len(words) model_param["input_shape"] = data["training"][0].shape model_param["base_filters"] = hypa["base_filters"] model_param["base_dense_width"] = hypa["base_dense_width"] # translate types to actual values kernel_size_types = { "01": [(2, 2), (2, 2), (2, 2)], "02": [(5, 1), (3, 3), (3, 3)], "03": [(1, 5), (3, 3), (3, 3)], } model_param["kernel_sizes"] = kernel_size_types[hypa["kernel_size_type"]] pool_size_types = { "01": [(2, 2), (2, 2), (2, 2)], "02": [(2, 1), (2, 2), (2, 2)], "03": [(1, 2), (2, 2), (2, 2)], } model_param["pool_sizes"] = pool_size_types[hypa["pool_size_type"]] dropout_types = {"01": [0.03, 0.01], "02": [0.3, 0.1]} model_param["dropouts"] = dropout_types[hypa["dropout_type"]] # a dict to recreate this training recap = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["model_name"] = model_name recap["version"] = "002" # logg.debug(f"recap: {recap}") recap_path = info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) learning_rate_types = { "01": "fixed01", "02": "fixed02", "03": "fixed03", "e1": "exp_decay_keras_01", "04": "exp_decay_step_01", "05": "exp_decay_smooth_01", "06": "exp_decay_smooth_02", } learning_rate_type = hypa["learning_rate_type"] lr_value = learning_rate_types[learning_rate_type] # setup opt fixed lr values if lr_value.startswith("fixed"): if lr_value == "fixed01": lr = 1e-2 elif lr_value == "fixed02": lr = 1e-3 elif lr_value == "fixed03": lr = 1e-4 else: lr = 1e-3 if lr_value == "exp_decay_keras_01": lr = ExponentialDecay(0.1, decay_steps=100000, decay_rate=0.96, staircase=True) optimizer_types = { "a1": Adam(learning_rate=lr), "r1": RMSprop(learning_rate=lr), } opt = optimizer_types[hypa["optimizer_type"]] # create the model model = CNNmodel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # setup callbacks callbacks = [] # setup exp decay step / smooth if lr_value.startswith("exp_decay"): if lr_value == "exp_decay_step_01": exp_decay_part = partial(exp_decay_step, epochs_drop=5) elif lr_value == "exp_decay_smooth_01": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5) elif lr_value == "exp_decay_smooth_02": exp_decay_part = partial( exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2 ) lrate = LearningRateScheduler(exp_decay_part) callbacks.append(lrate) # # setup early stopping # early_stop = EarlyStopping( # # monitor="val_categorical_accuracy", # monitor="val_loss", # patience=4, # verbose=1, # restore_best_weights=True, # ) # callbacks.append(early_stop) # get training parameters BATCH_SIZE = hypa["batch_size"] SHUFFLE_BUFFER_SIZE = BATCH_SIZE EPOCH_NUM = hypa["epoch_num"] # load the datasets datasets = {} for which in ["training", "validation", "testing"]: # logg.debug(f"data[{which}].shape: {data[which].shape}") datasets[which] = Dataset.from_tensor_slices((data[which], labels[which])) # logg.debug(f"datasets[{which}]: {datasets[which]}") datasets[which] = datasets[which].shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) # logg.debug(f"datasets[{which}]: {datasets[which]}") # train the model results = model.fit( data["training"], labels["training"], # validation_data=datasets["validation"], validation_data=(data["validation"], labels["validation"]), batch_size=BATCH_SIZE, epochs=EPOCH_NUM, verbose=1, callbacks=callbacks, ) # save the trained model model.save(model_path) results_recap = {} results_recap["model_name"] = model_name # version of the results saved results_recap["results_recap_version"] = "002" # quickly evaluate the results # logg.debug(f"\nmodel.metrics_names: {model.metrics_names}") # for which in ["training", "validation", "testing"]: # model_eval = model.evaluate(datasets[which]) # logg.debug(f"{which}: model_eval: {model_eval}") # save the evaluation results logg.debug("Evaluate on test data:") # eval_testing = model.evaluate(datasets["testing"]) # results_recap[model.metrics_names[0]] = eval_testing[0] # results_recap[model.metrics_names[1]] = eval_testing[1] eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # compute the confusion matrix # y_pred = model.predict(datasets["testing"]) y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) # logg.debug(f"cm: {cm}") results_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the histories results_recap["history"] = { "loss": results.history["loss"], "val_loss": results.history["val_loss"], "categorical_accuracy": results.history["categorical_accuracy"], "val_categorical_accuracy": results.history["val_categorical_accuracy"], } # save the results res_recap_path = info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) y_pred_dataset = model.predict(datasets["testing"]) cm_dataset = pred_hot_2_cm(labels["testing"], y_pred_dataset, words) fscore_dataset = analyze_confusion(cm_dataset, words) logg.debug(f"fscore_dataset: {fscore_dataset} fscore {fscore}") # for i, (ys, yd) in enumerate(zip(y_pred, y_pred_dataset)): # pred_split = np.argmax(ys) # pred_dataset = np.argmax(yd) # logg.debug(f"i: {i} pred_split: {pred_split} pred_dataset: {pred_dataset}") # plt.show() placeholder_path.write_text(f"Trained. F-score: {fscore}") return "done_training"
def train_area( hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool, trained_folder: Path, root_info_folder: Path, ) -> None: """MAKEDOC: what is train_area doing?""" logg = logging.getLogger(f"c.{__name__}.train_area") # logg.setLevel("INFO") logg.debug("Start train_area") ########################################################## # Setup folders ########################################################## # name the model model_name = build_area_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_path = trained_folder / f"{model_name}.h5" placeholder_path = trained_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder model_info_folder = root_info_folder / model_name if not model_info_folder.exists(): model_info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() ########################################################## # Load data ########################################################## # get the words words = words_types[hypa["words_type"]] num_labels = len(words) # load data processed_folder = Path("data_proc") processed_path = processed_folder / f"{hypa['dataset_name']}" data, labels = load_processed(processed_path, words) # concatenate train and val for final train val_data = None if use_validation: x = data["training"] y = labels["training"] val_data = (data["validation"], labels["validation"]) logg.debug("Using validation data") else: x = np.concatenate((data["training"], data["validation"])) y = np.concatenate((labels["training"], labels["validation"])) logg.debug("NOT using validation data") ########################################################## # Setup model ########################################################## # the shape of each sample input_shape = data["training"][0].shape # from hypa extract model param model_param = get_model_param_area(hypa, num_labels, input_shape) # get the model with the chosen params net_type = hypa["net_type"] if net_type == "ARN": model = AreaNet.build(**model_param) elif net_type == "AAN": model = ActualAreaNet.build(**model_param) elif net_type == "VAN": model = VerticalAreaNet.build(**model_param) elif net_type.startswith("SI"): if net_type == "SIM": sim_type = "1" elif net_type == "SI2": sim_type = "2" model = SimpleNet.build(sim_type=sim_type, **model_param) num_samples = x.shape[0] logg.debug(f"num_samples: {num_samples}") # from hypa extract training param (epochs, batch, opt, ...) training_param = get_training_param_area(hypa, use_validation, model_path, num_samples) # a few metrics to track metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] # compile the model model.compile( optimizer=training_param["opt"], loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # recap recap: ty.Dict[str, ty.Any] = {} recap["model_name"] = model_name recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["batch_size"] = training_param["batch_size"] recap["epochs"] = training_param["epochs"] recap["lr_name"] = training_param["lr_name"] recap["version"] = "002" # logg.debug(f"recap: {recap}") recap_path = model_info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) # https://stackoverflow.com/a/45546663/2237151 model_summary_path = model_info_folder / "model_summary.txt" with model_summary_path.open("w") as msf: model.summary(line_length=150, print_fn=lambda x: msf.write(x + "\n")) ########################################################## # Fit model ########################################################## results = model.fit( x, y, validation_data=val_data, epochs=training_param["epochs"], batch_size=training_param["batch_size"], callbacks=training_param["callbacks"], ) ########################################################## # Save results, history, performance ########################################################## # results_recap results_recap: ty.Dict[str, ty.Any] = {} results_recap["model_name"] = model_name results_recap["results_recap_version"] = "001" # evaluate performance eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # confusion matrix y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) results_recap["cm"] = cm.tolist() # fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") results_recap["fscore"] = fscore # save the histories results_recap["history_train"] = { mn: results.history[mn] for mn in model.metrics_names } if use_validation: results_recap["history_val"] = { f"val_{mn}": results.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = model_info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the trained model model.save(model_path) # save the placeholder placeholder_path.write_text(f"Trained. F-score: {fscore}")
def train_img( hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool, trained_folder: Path, root_info_folder: Path, ) -> None: """MAKEDOC: what is train_img doing?""" logg = logging.getLogger(f"c.{__name__}.train_img") # logg.setLevel("INFO") logg.debug("Start train_img") ########################################################## # Setup folders ########################################################## # name the model model_name = build_img_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_path = trained_folder / f"{model_name}.h5" placeholder_path = trained_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder model_info_folder = root_info_folder / model_name if not model_info_folder.exists(): model_info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() ########################################################## # Load data ########################################################## label_type = hypa["words_type"] label_list = get_label_list(label_type) num_labels = len(label_list) dataset_raw_folder = Path.home( ) / "datasets" / "imagenet" / "imagenet_images" dataset_proc_base_folder = Path.home() / "datasets" / "imagenet" # get the partition of the data partition, ids2labels = prepare_partitions(label_list, dataset_raw_folder) num_samples = len(partition["training"]) # from hypa extract training param (epochs, batch, opt, ...) training_param = get_training_param_img(hypa, use_validation, model_path, num_samples) preprocess_type = hypa["dataset_name"] dataset_proc_folder = dataset_proc_base_folder / preprocess_type val_generator: ty.Optional[ImageNetGenerator] = None if use_validation: val_generator = ImageNetGenerator( partition["validation"], ids2labels, label_list, dataset_proc_folder=dataset_proc_folder, dataset_raw_folder=dataset_raw_folder, preprocess_type=preprocess_type, save_processed=True, batch_size=training_param["batch_size"], shuffle=True, ) logg.debug("Using validation data") else: partition["training"].extend(partition["validation"]) logg.debug("NOT using validation data") training_generator = ImageNetGenerator( partition["training"], ids2labels, label_list, dataset_proc_folder=dataset_proc_folder, dataset_raw_folder=dataset_raw_folder, preprocess_type=preprocess_type, save_processed=True, batch_size=training_param["batch_size"], shuffle=True, ) testing_generator = ImageNetGenerator( partition["testing"], ids2labels, label_list, dataset_proc_folder=dataset_proc_folder, dataset_raw_folder=dataset_raw_folder, preprocess_type=preprocess_type, save_processed=True, batch_size=1, shuffle=False, ) ########################################################## # Setup model ########################################################## input_shape = training_generator.get_img_shape() # from hypa extract model param model_param = get_model_param_img(hypa, num_labels, input_shape) # get the model with the chosen params net_type = hypa["net_type"] if net_type == "ARN": model = AreaNet.build(**model_param) elif net_type == "AAN": model = ActualAreaNet.build(**model_param) elif net_type == "VAN": model = VerticalAreaNet.build(**model_param) elif net_type.startswith("SI"): if net_type == "SIM": sim_type = "1" elif net_type == "SI2": sim_type = "2" model = SimpleNet.build(sim_type=sim_type, **model_param) # a few metrics to track metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] # compile the model model.compile( optimizer=training_param["opt"], loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # recap recap: ty.Dict[str, ty.Any] = {} recap["model_name"] = model_name recap["words"] = label_list recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["batch_size"] = training_param["batch_size"] recap["epochs"] = training_param["epochs"] recap["lr_name"] = training_param["lr_name"] recap["version"] = "002" # logg.debug(f"recap: {recap}") recap_path = model_info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) # https://stackoverflow.com/a/45546663/2237151 model_summary_path = model_info_folder / "model_summary.txt" with model_summary_path.open("w") as msf: model.summary(line_length=150, print_fn=lambda x: msf.write(x + "\n")) ########################################################## # Fit model ########################################################## results = model.fit( training_generator, validation_data=val_generator, epochs=training_param["epochs"], batch_size=training_param["batch_size"], callbacks=training_param["callbacks"], ) ########################################################## # Save results, history, performance ########################################################## # results_recap results_recap: ty.Dict[str, ty.Any] = {} results_recap["model_name"] = model_name results_recap["results_recap_version"] = "001" # evaluate performance eval_testing = model.evaluate(testing_generator) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # confusion matrix y_pred = model.predict(testing_generator) y_pred_labels = testing_generator.pred2labelnames(y_pred) y_true = testing_generator.get_true_labels() cm = confusion_matrix(y_true, y_pred_labels) results_recap["cm"] = cm.tolist() # fscore fscore = analyze_confusion(cm, label_list) logg.debug(f"fscore: {fscore}") results_recap["fscore"] = fscore # save the histories results_recap["history_train"] = { mn: results.history[mn] for mn in model.metrics_names } if use_validation: results_recap["history_val"] = { f"val_{mn}": results.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, label_list, fscore) plot_cm_path = model_info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the trained model model.save(model_path) # save the placeholder placeholder_path.write_text(f"Trained. F-score: {fscore}")
def evaluate_model_cm(model_name: str, test_words_type: str) -> float: r"""MAKEDOC: what is evaluate_model_cm doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_model_cm") # logg.setLevel("INFO") # logg.debug("\nStart evaluate_model_cm") # magic to fix the GPUs setup_gpus() logg.debug(f"\nmodel_name: {model_name}") dataset_re = re.compile("_ds(.*?)_") match = dataset_re.search(model_name) if match is not None: logg.debug(f"match[1]: {match[1]}") dataset_name = match[1] train_words_type_re = re.compile("_w(.*?)[_.]") match = train_words_type_re.search(model_name) if match is not None: logg.debug(f"match[1]: {match[1]}") train_words_type = match[1] arch_type = model_name[:3] if arch_type == "ATT": train_type_tag = "attention" else: train_type_tag = "area" # load the model model_folder = Path("trained_models") / train_type_tag model_path = model_folder / f"{model_name}.h5" model = tf_models.load_model(model_path) # model.summary() train_words = words_types[train_words_type] logg.debug(f"train_words: {train_words}") test_words = words_types[test_words_type] logg.debug(f"test_words: {test_words}") # input data must exist if dataset_name.startswith("mel"): preprocess_spec(dataset_name, test_words_type) elif dataset_name.startswith("aug"): do_augmentation(dataset_name, test_words_type) # input data processed_path = Path("data_proc") / f"{dataset_name}" data, labels = load_processed(processed_path, test_words) logg.debug(f"list(data.keys()): {list(data.keys())}") logg.debug(f"data['testing'].shape: {data['testing'].shape}") # evaluate on the words you trained on logg.debug("Evaluate on test data:") model.evaluate(data["testing"], labels["testing"]) # model.evaluate(data["validation"], labels["validation"]) # predict labels/cm/fscore y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, test_words) # y_pred = model.predict(data["validation"]) # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words) fscore = analyze_confusion(cm, test_words) logg.debug(f"fscore: {fscore}") fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, test_words, fscore, train_words) fig_name = f"{model_name}_test{test_words_type}_cm.{{}}" cm_folder = Path("plot_results") / "cm_all01" if not cm_folder.exists(): cm_folder.mkdir(parents=True, exist_ok=True) plot_cm_path = cm_folder / fig_name.format("png") fig.savefig(plot_cm_path) plot_cm_path = cm_folder / fig_name.format("pdf") fig.savefig(plot_cm_path) # plt.show() return fscore
def evaluate_attention_weights(train_words_type: str, rec_words_type: str, do_new_record: bool = False) -> None: """MAKEDOC: what is evaluate_attention_weights doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_attention_weights") # logg.setLevel("INFO") logg.debug("Start evaluate_attention_weights") # magic to fix the GPUs setup_gpus() # ATT_ct02_dr02_ks02_lu01_as01_qt01_dw01_opa1_lr01_bs01_en01_dsmel04_wk1 # hypa: ty.Dict[str, str] = {} # hypa["conv_size_type"] = "02" # hypa["dropout_type"] = "02" # hypa["kernel_size_type"] = "02" # hypa["lstm_units_type"] = "01" # hypa["query_style_type"] = "01" # hypa["dense_width_type"] = "01" # hypa["optimizer_type"] = "a1" # hypa["learning_rate_type"] = "01" # hypa["batch_size_type"] = "01" # hypa["epoch_num_type"] = "01" # dataset_name = "mel04" # hypa["dataset_name"] = dataset_name # hypa["words_type"] = train_words_type # use_validation = True # ATT_ct02_dr01_ks01_lu01_qt05_dw01_opa1_lr03_bs02_en02_dsaug07_wLTnum hypa = { "batch_size_type": "02", "conv_size_type": "02", "dataset_name": "aug07", "dense_width_type": "01", "dropout_type": "01", "epoch_num_type": "02", "kernel_size_type": "01", "learning_rate_type": "03", "lstm_units_type": "01", "optimizer_type": "a1", "query_style_type": "05", "words_type": "LTnum", } use_validation = True dataset_name = hypa["dataset_name"] model_name = build_attention_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # load the model model_folder = Path("trained_models") / "attention" model_path = model_folder / f"{model_name}.h5" # model = tf.keras.models.load_model(model_path) # https://github.com/keras-team/keras/issues/5088#issuecomment-401498334 model = tf.keras.models.load_model( model_path, custom_objects={"backend": tf.keras.backend}) model.summary() logg.debug(f"ascii_model(model): {ascii_model(model)}") att_weight_model = tf.keras.models.Model( inputs=model.input, outputs=[ model.get_layer("output").output, model.get_layer("att_softmax").output, model.get_layer("bidirectional_1").output, ], ) att_weight_model.summary() # logg.debug(f"att_weight_model.outputs: {att_weight_model.outputs}") # get the training words train_words = words_types[train_words_type] # logg.debug(f"train_words: {train_words}") perm_pred = compute_permutation(train_words) rec_words_type = args.rec_words_type if rec_words_type == "train": rec_words = train_words[-3:] # rec_words = train_words[:] logg.debug(f"Using rec_words: {rec_words}") else: rec_words = words_types[rec_words_type] num_rec_words = len(rec_words) # record new audios if do_new_record: # where to save the audios audio_folder = Path("recorded_audio") if not audio_folder.exists(): audio_folder.mkdir(parents=True, exist_ok=True) # record the audios and save them in audio_folder audio_path_fmt = "{}_02.wav" audios = record_audios(rec_words, audio_folder, audio_path_fmt, timeout=0) # compute the spectrograms and build the dataset of correct shape img_specs = [] spec_dict = get_spec_dict() spec_kwargs = spec_dict[dataset_name] p2d_kwargs = {"ref": np.max} for word in rec_words: # get the name audio_path = audio_folder / audio_path_fmt.format(word) # convert it to mel log_spec = wav2mel(audio_path, spec_kwargs, p2d_kwargs) img_spec = log_spec.reshape((*log_spec.shape, 1)) # logg.debug(f"img_spec.shape: {img_spec.shape}") # img_spec.shape: (128, 32, 1) img_specs.append(img_spec) # the data needs to look like this data['testing'].shape: (735, 128, 32, 1) rec_data = np.stack(img_specs) # logg.debug(f"rec_data.shape: {rec_data.shape}") # load data if you do not want to record new audios else: # input data processed_folder = Path("data_proc") processed_path = processed_folder / f"{dataset_name}" # which word in the dataset to plot word_id = 2 # the loaded spectrograms rec_data_l: ty.List[np.ndarray] = [] for i, word in enumerate(rec_words): data, labels = load_processed(processed_path, [word]) # get one of the spectrograms word_data = data["testing"][word_id] rec_data_l.append(word_data) # turn the list into np array rec_data = np.stack(rec_data_l) # get prediction and attention weights pred, att_weights, LSTM_out = att_weight_model.predict(rec_data) # logg.debug(f"att_weights.shape: {att_weights.shape}") # logg.debug(f"att_weights[0].shape: {att_weights[0].shape}") # if we recorded fresh audios we also have the waveform to plot ax_add = 1 if do_new_record else 0 # plot the wave, spectrogram, weights and predictions in each column plot_size = 5 fw = plot_size * num_rec_words nrows = 3 + ax_add # nrows = 4 + ax_add fh = plot_size * nrows * 0.7 fig, axes = plt.subplots(nrows=nrows, ncols=num_rec_words, figsize=(fw, fh), sharey="row") fig.suptitle(f"Attention weights and predictions for {rec_words}", fontsize=20) for i, word in enumerate(rec_words): word_spec = rec_data[i][:, :, 0] # logg.debug(f"word_spec.shape: {word_spec.shape}") # plot the waveform if do_new_record: plot_waveform(audios[i], axes[0][i]) # plot the spectrogram title = f"Spectrogram for {word}" plot_spec(word_spec, axes[0 + ax_add][i], title=title) # plot the weights word_att_weights = att_weights[i] # plot_att_weights(word_att_weights, axes[1 + ax_add][i], title) word_att_weights_img = np.expand_dims(word_att_weights, axis=-1).T axes[1 + ax_add][i].imshow(word_att_weights_img, origin="lower", aspect="auto") title = f"Attention weights for {word}" axes[1 + ax_add][i].set_title(title) # plot the predictions word_pred = pred[i] # permute the prediction from sorted to the order you have word_pred = word_pred[perm_pred] pred_index = np.argmax(word_pred) title = f"Predictions for {word}" plot_pred(word_pred, train_words, axes[2 + ax_add][i], title, pred_index) # axes[3 + ax_add][i].imshow(LSTM_out[i], origin="lower") # fig.tight_layout() fig.tight_layout(h_pad=3, rect=[0, 0.03, 1, 0.97]) fig_name = f"{model_name}" fig_name += f"_{train_words_type}" fig_name += f"_{rec_words_type}_img" if do_new_record: fig_name += "_new.{}" else: fig_name += "_data.{}" plot_folder = Path("plot_results") results_path = plot_folder / fig_name.format("png") fig.savefig(results_path) results_path = plot_folder / fig_name.format("pdf") fig.savefig(results_path) if num_rec_words <= 6: plt.show()
def main(): hostname = socket.gethostname() setup_logging(os.path.join(args.results_dir, 'log_{}.txt'.format(hostname))) logging.info("running arguments: %s", args) best_gpu = setup_gpus() torch.cuda.set_device(best_gpu) torch.backends.cudnn.benchmark = True train_transform = get_transform(args.dataset, 'train') train_data = get_dataset(args.dataset, args.train_split, train_transform) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_transform = get_transform(args.dataset, 'val') val_data = get_dataset(args.dataset, 'val', val_transform) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) bit_width_list = list(map(int, args.bit_width_list.split(','))) bit_width_list.sort() model = models.__dict__[args.model](bit_width_list, train_data.num_classes).cuda() lr_decay = list(map(int, args.lr_decay.split(','))) optimizer = get_optimizer_config(model, args.optimizer, args.lr, args.weight_decay) lr_scheduler = None best_prec1 = None if args.resume and args.resume != 'None': if os.path.isdir(args.resume): args.resume = os.path.join(args.resume, 'model_best.pth.tar') if os.path.isfile(args.resume): checkpoint = torch.load(args.resume, map_location='cuda:{}'.format(best_gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler = get_lr_scheduler(args.optimizer, optimizer, lr_decay, checkpoint['epoch']) logging.info("loaded resume checkpoint '%s' (epoch %s)", args.resume, checkpoint['epoch']) else: raise ValueError('Pretrained model path error!') elif args.pretrain and args.pretrain != 'None': if os.path.isdir(args.pretrain): args.pretrain = os.path.join(args.pretrain, 'model_best.pth.tar') if os.path.isfile(args.pretrain): checkpoint = torch.load(args.pretrain, map_location='cuda:{}'.format(best_gpu)) model.load_state_dict(checkpoint['state_dict'], strict=False) logging.info("loaded pretrain checkpoint '%s' (epoch %s)", args.pretrain, checkpoint['epoch']) else: raise ValueError('Pretrained model path error!') if lr_scheduler is None: lr_scheduler = get_lr_scheduler(args.optimizer, optimizer, lr_decay) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) criterion = nn.CrossEntropyLoss().cuda() criterion_soft = CrossEntropyLossSoft().cuda() sum_writer = SummaryWriter(args.results_dir + '/summary') for epoch in range(args.start_epoch, args.epochs): model.train() train_loss, train_prec1, train_prec5 = forward(train_loader, model, criterion, criterion_soft, epoch, True, optimizer, sum_writer) model.eval() val_loss, val_prec1, val_prec5 = forward(val_loader, model, criterion, criterion_soft, epoch, False) if isinstance(lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): lr_scheduler.step(val_loss) else: lr_scheduler.step() if best_prec1 is None: is_best = True best_prec1 = val_prec1[-1] else: is_best = val_prec1[-1] > best_prec1 best_prec1 = max(val_prec1[-1], best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict() }, is_best, path=args.results_dir + '/ckpt') if sum_writer is not None: sum_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=epoch) for bw, tl, tp1, tp5, vl, vp1, vp5 in zip(bit_width_list, train_loss, train_prec1, train_prec5, val_loss, val_prec1, val_prec5): sum_writer.add_scalar('train_loss_{}'.format(bw), tl, global_step=epoch) sum_writer.add_scalar('train_prec_1_{}'.format(bw), tp1, global_step=epoch) sum_writer.add_scalar('train_prec_5_{}'.format(bw), tp5, global_step=epoch) sum_writer.add_scalar('val_loss_{}'.format(bw), vl, global_step=epoch) sum_writer.add_scalar('val_prec_1_{}'.format(bw), vp1, global_step=epoch) sum_writer.add_scalar('val_prec_5_{}'.format(bw), vp5, global_step=epoch) logging.info('Epoch {}: \ntrain loss {:.2f}, train prec1 {:.2f}, train prec5 {:.2f}\n' ' val loss {:.2f}, val prec1 {:.2f}, val prec5 {:.2f}'.format( epoch, train_loss[-1], train_prec1[-1], train_prec5[-1], val_loss[-1], val_prec1[-1], val_prec5[-1]))
def find_best_lr(hypa: ty.Dict[str, str]) -> None: """MAKEDOC: what is find_best_lr doing?""" logg = logging.getLogger(f"c.{__name__}.find_best_lr") # logg.setLevel("INFO") logg.debug("Start find_best_lr") # get the word list words = words_types[hypa["words_type"]] num_labels = len(words) # no validation just find the LR use_validation = False # name the model model_name = build_area_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # load data processed_folder = Path("data_proc") processed_path = processed_folder / f"{hypa['dataset_name']}" data, labels = load_processed(processed_path, words) # the shape of each sample input_shape = data["training"][0].shape # from hypa extract model param model_param = get_model_param_area(hypa, num_labels, input_shape) # no need for validation x = np.concatenate((data["training"], data["validation"])) y = np.concatenate((labels["training"], labels["validation"])) # magic to fix the GPUs setup_gpus() # get the model with the chosen params net_type = hypa["net_type"] if net_type == "ARN": model = AreaNet.build(**model_param) elif net_type == "AAN": model = ActualAreaNet.build(**model_param) elif net_type == "VAN": model = VerticalAreaNet.build(**model_param) elif net_type.startswith("SI"): if net_type == "SIM": sim_type = "1" elif net_type == "SI2": sim_type = "2" model = SimpleNet.build(sim_type=sim_type, **model_param) num_samples = x.shape[0] logg.debug(f"num_samples: {num_samples}") # from hypa extract training param (epochs, batch, opt, ...) training_param = get_training_param_area(hypa, use_validation, model_path=None, num_samples=num_samples) # a few metrics to track metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] # compile the model model.compile( optimizer=training_param["opt"], loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # boundary values start_lr = 1e-9 end_lr = 1e1 # find the best values lrf = LearningRateFinder(model) lrf.find( (x, y), start_lr, end_lr, epochs=training_param["epochs"], batchSize=training_param["batch_size"], ) fig_title = "LR_sweep" fig_title += f"__{model_name}" fig, ax = plt.subplots(figsize=(8, 8)) # get the plot lrf.plot_loss(ax=ax, title=fig_title) # save the plot plot_fol = Path("plot_results") / "area" / "find_best_lr" if not plot_fol.exists(): plot_fol.mkdir(parents=True, exist_ok=True) fig_name = fig_title + ".{}" fig.savefig(plot_fol / fig_name.format("png")) fig.savefig(plot_fol / fig_name.format("pdf")) recap_loss = {} recap_loss["lrs"] = [float(lr) for lr in lrf.lrs[:]] recap_loss["losses"] = [float(loss) for loss in lrf.losses[:]] loss_path = plot_fol / f"loss_{fig_title}.json" loss_path.write_text(json.dumps(recap_loss, indent=4)) plt.show()
def train_transfer( hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool, trained_folder: Path, root_info_folder: Path, tensorboard_logs_folder: Path, ) -> None: """MAKEDOC: what is train_transfer doing? https://www.tensorflow.org/guide/keras/transfer_learning/#build_a_model """ logg = logging.getLogger(f"c.{__name__}.train_transfer") # logg.setLevel("INFO") logg.debug("Start train_transfer") ########################################################## # Setup folders ########################################################## # name the model model_name = build_transfer_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_path = trained_folder / f"{model_name}.h5" placeholder_path = trained_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder model_info_folder = root_info_folder / model_name if not model_info_folder.exists(): model_info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() ########################################################## # Load data ########################################################## # grab a few hypas words_type = hypa["words_type"] datasets_type = hypa["datasets_type"] # get the partition of the data partition, ids2labels = prepare_partitions(words_type) # get the word list words = words_types[words_type] num_labels = len(words) # get the dataset name list datasets_types, datasets_shapes = get_datasets_types() dataset_names = datasets_types[datasets_type] dataset_shape = datasets_shapes[datasets_type] # the shape of each sample input_shape = (*dataset_shape, 3) # from hypa extract training param (epochs, batch, opt, ...) training_param = get_training_param_transfer(hypa, use_validation, tensorboard_logs_folder, model_path) # load datasets processed_folder = Path("data_split") data_split_paths = [processed_folder / f"{dn}" for dn in dataset_names] # data, labels = load_triple(data_paths, words) # assemble the gen_param for the generators gen_param = { "dim": dataset_shape, "batch_size": training_param["batch_sizes"][0], "shuffle": True, "label_names": words, "data_split_paths": data_split_paths, } # maybe concatenate the valdation and training lists val_generator: ty.Optional[AudioGenerator] = None if use_validation: val_generator = AudioGenerator(partition["validation"], ids2labels, **gen_param) logg.debug("Using validation data") else: partition["training"].extend(partition["validation"]) logg.debug("NOT using validation data") # create the training generator with the modified (maybe) list of IDs training_generator = AudioGenerator(partition["training"], ids2labels, **gen_param) logg.debug(f"len(training_generator): {len(training_generator)}") ###### always create the test generator # do not shuffle the test data gen_param["shuffle"] = False # do not batch it, no loss of stray data at the end gen_param["batch_size"] = 1 testing_generator = AudioGenerator(partition["testing"], ids2labels, **gen_param) ########################################################## # Setup model ########################################################## # from hypa extract model param model_param = get_model_param_transfer(hypa, num_labels, input_shape) # get mean and var to normalize the data data_mean, data_variance = get_generator_mean_var_cached( training_generator, words_type, datasets_type, processed_folder) # get the model model, base_model = TRAmodel(data_mean=data_mean, data_variance=data_variance, **model_param) model.summary() # a dict to recreate this training recap: ty.Dict[str, ty.Any] = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["model_name"] = model_name recap["batch_sizes"] = training_param["batch_sizes"] recap["epoch_num"] = training_param["epoch_num"] recap["version"] = "003" # logg.debug(f"recap: {recap}") recap_path = model_info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) ########################################################## # Compile and fit model the first time ########################################################## model.compile( optimizer=training_param["opt"][0], loss=tf_losses.CategoricalCrossentropy(), metrics=training_param["metrics"][0], ) results_freeze = model.fit( training_generator, validation_data=val_generator, epochs=training_param["epoch_num"][0], callbacks=training_param["callbacks"][0], ) # reload the best weights saved by the ModelCheckpoint model.load_weights(str(model_path)) ########################################################## # Save results, history, performance ########################################################## # results_freeze_recap results_freeze_recap: ty.Dict[str, ty.Any] = {} results_freeze_recap["model_name"] = model_name results_freeze_recap["results_recap_version"] = "001" # save the histories results_freeze_recap["history_train"] = { mn: results_freeze.history[mn] for mn in model.metrics_names } if use_validation: results_freeze_recap["history_val"] = { f"val_{mn}": results_freeze.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_freeze_recap.json" res_recap_path.write_text(json.dumps(results_freeze_recap, indent=4)) ########################################################## # Compile and fit model the second time ########################################################## # Unfreeze the base_model. Note that it keeps running in inference mode # since we passed `training=False` when calling it. This means that # the batchnorm layers will not update their batch statistics. # This prevents the batchnorm layers from undoing all the training # we've done so far. base_model.trainable = True model.summary() model.compile( optimizer=training_param["opt"][1], # Low learning rate loss=tf_losses.CategoricalCrossentropy(), metrics=training_param["metrics"][1], ) results_full = model.fit( training_generator, validation_data=val_generator, epochs=training_param["epoch_num"][1], callbacks=training_param["callbacks"][1], ) # reload the best weights saved by the ModelCheckpoint model.load_weights(str(model_path)) ########################################################## # Save results, history, performance ########################################################## results_full_recap: ty.Dict[str, ty.Any] = {} results_full_recap["model_name"] = model_name results_full_recap["results_recap_version"] = "001" # evaluate performance eval_testing = model.evaluate(testing_generator) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_full_recap[metrics_name] = value # compute the confusion matrix y_pred = model.predict(testing_generator) y_pred_labels = testing_generator.pred2labelnames(y_pred) y_true = testing_generator.get_true_labels() # cm = pred_hot_2_cm(y_true, y_pred, words) cm = confusion_matrix(y_true, y_pred_labels) results_full_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") results_full_recap["fscore"] = fscore # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = model_info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the histories results_full_recap["history_train"] = { mn: results_full.history[mn] for mn in model.metrics_names } if use_validation: results_full_recap["history_val"] = { f"val_{mn}": results_full.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_full_recap.json" res_recap_path.write_text(json.dumps(results_full_recap, indent=4)) # save the trained model model.save(model_path) # save the placeholder placeholder_path.write_text(f"Trained. F-score: {fscore}")
def visualize_datasets(word_index): """MAKEDOC: what is visualize_datasets doing?""" logg = logging.getLogger(f"c.{__name__}.visualize_datasets") logg.debug("Start visualize_datasets") # magic to fix the GPUs setup_gpus() # show different datasets # datasets = [ "mfcc01", "mfcc02", "mfcc03", "mfcc04", "mfcc05", "mfcc06", "mfcc07", "mfcc08"] # datasets = [ "mel01", "mel02", "mel03", "mel04", "mel05", "mel06", "mel07", "mel08", # "mel09", "mel10", "mel11", "mel12", "mel13", "mel14", "mel15", "melc1", "melc2", # "melc3", "melc4", "mela1", "meL04", "meLa1", "auL18", "aug18", ] # datasets = [ "mel01", "mel04", "mel06", "melc1" ] # datasets = ["mel09", "mel10", "mel11", "melc1"] datasets = ["mel04", "mel04a", "mel04b", "melc1"] # words = words_types["f1"] # a_word = words[0] # a_word = "loudest_one" a_word = "happy" # a_word = "_other_ltts_loud" # datasets = [] # datasets.extend(["meL04", "meLa1", "meLa2", "meLa3", "meLa4"]) # datasets.extend(["auL06", "auL07", "auL08", "auL09"]) # datasets.extend(["auL18", "auL19", "auL20", "auL21"]) # a_word = "loudest_two" # datasets = [] # datasets.extend(["mel04", "mela1"]) # datasets.extend(["aug14", "aug15"]) # a_word = "forward" # datasets.extend(["aug14", "aug07"]) # a_word = "one" # a_word = "_other_ltts" # which word in the dataset to plot iw = word_index processed_folder = Path("data_proc") # fig, axes = plt.subplots(4, 5, figsize=(12, 15)) nrows, ncols = find_rowcol(len(datasets)) base_figsize = 5 figsize = (ncols * base_figsize * 1.5, nrows * base_figsize) fig, axes = plt.subplots(nrows, ncols, figsize=figsize) if nrows * ncols > 1: axes_flat = axes.flat else: axes_flat = [axes] fig.suptitle(f"Various spectrograms for {a_word}", fontsize=20) for i, ax in enumerate(axes_flat[: len(datasets)]): # the current dataset being plotted dataset_name = datasets[i] processed_path = processed_folder / f"{dataset_name}" word_path = processed_path / f"{a_word}_training.npy" logg.debug(f"word_path: {word_path}") # FIXME this is shaky as hell if not word_path.exists(): if dataset_name.startswith("me"): preprocess_spec(dataset_name, f"_{a_word}") elif dataset_name.startswith("au"): do_augmentation(dataset_name, f"_{a_word}") word_data = np.load(word_path, allow_pickle=True) logg.debug(f"{dataset_name} {a_word} shape: {word_data[iw].shape}") title = f"{dataset_name}: shape {word_data[iw].shape}" plot_spec(word_data[iw], ax, title=title) fig.tight_layout() plot_folder = Path("plot_models") dt_names = "_".join(datasets) fig.savefig(plot_folder / f"{a_word}_{dt_names}_specs.pdf")
def evaluate_attention_weights(train_words_type: str) -> None: """MAKEDOC: what is evaluate_attention_weights doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_attention_weights") # logg.setLevel("INFO") logg.debug("Start evaluate_attention_weights") # magic to fix the GPUs setup_gpus() # VAN_opa1_lr05_bs32_en15_dsaug07_wLTall hypa = { "batch_size_type": "32", "dataset_name": "aug07", "epoch_num_type": "15", "learning_rate_type": "05", "net_type": "VAN", "optimizer_type": "a1", "words_type": "LTall", } use_validation = True dataset_name = hypa["dataset_name"] batch_size = int(hypa["batch_size_type"]) # get the model name model_name = build_area_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # load the model model_folder = Path("trained_models") / "area" model_path = model_folder / f"{model_name}.h5" model = tf_models.load_model(model_path) # get the output layer because you forgot to name it name_output_layer = model.layers[-1].name logg.debug(f"name_output_layer: {name_output_layer}") # build a model on top of that to get the weights att_weight_model = tf_models.Model( inputs=model.input, outputs=[ model.get_layer(name_output_layer).output, model.get_layer("area_values").output, ], ) att_weight_model.summary() # get the training words train_words = words_types[train_words_type] perm_pred = compute_permutation(train_words) logg.debug(f"perm_pred: {perm_pred}") sorted_train_words = sorted(train_words) logg.debug(f"sorted(train_words): {sorted(train_words)}") # load data if you do not want to record new audios processed_folder = Path("data_proc") processed_path = processed_folder / f"{dataset_name}" logg.debug(f"processed_path: {processed_path}") # # evaluate on all data because im confused # data, labels = load_processed(processed_path, train_words) # logg.debug(f"data['testing'].shape: {data['testing'].shape}") # logg.debug(f"labels['testing'].shape: {labels['testing'].shape}") # eval_testing = model.evaluate(data["testing"], labels["testing"]) # for metrics_name, value in zip(model.metrics_names, eval_testing): # logg.debug(f"{metrics_name}: {value}") # which word in the dataset to plot # word_id = 5 # word_id = 7 word_id = 12 # the loaded spectrograms rec_data_l: ty.List[np.ndarray] = [] # for now we do not record new words rec_words = train_words[30:32] num_rec_words = len(rec_words) logg.debug(f"processed_path: {processed_path}") for i, word in enumerate(rec_words): logg.debug(f"\nword: {word}") data, labels = load_processed(processed_path, [word]) logg.debug(f"data['testing'].shape: {data['testing'].shape}") logg.debug(f"labels['testing'].shape: {labels['testing'].shape}") # eval_testing = model.evaluate(data["testing"], labels["testing"]) # for metrics_name, value in zip(model.metrics_names, eval_testing): # logg.debug(f"{metrics_name}: {value}") # get one of the spectrograms word_data = data["testing"][word_id] rec_data_l.append(word_data) pred, att_weights = att_weight_model.predict(data["testing"]) logg.debug(f"pred.shape: {pred.shape}") logg.debug(f"pred[0].shape: {pred[0].shape}") pred_am_all = np.argmax(pred, axis=1) logg.debug(f"pred_am_all: {pred_am_all}") pred_index = np.argmax(pred[0]) pred_word = sorted_train_words[pred_index] logg.debug(f"sorted pred_word: {pred_word} pred_index {pred_index}") # test EVERY SINGLE spectrogram spec_num = data["testing"].shape[0] for wid in range(spec_num): # get the word word_data = data["testing"][wid] logg.debug(f"word_data.shape: {word_data.shape}") batch_word_data = np.expand_dims(word_data, axis=0) logg.debug(f"batch_word_data.shape: {batch_word_data.shape}") shape_batch = (batch_size, *word_data.shape) logg.debug(f"shape_batch: {shape_batch}") batch_word_data_big = np.zeros(shape_batch, dtype=np.float32) for i in range(batch_size): batch_word_data_big[i, :, :, :] = batch_word_data # batch_word_data_big[0, :, :, :] = batch_word_data # predict it # pred, att_weights = att_weight_model.predict(batch_word_data) pred, att_weights = att_weight_model.predict(batch_word_data_big) # show all prediction # pred_am = np.argmax(pred, axis=1) # logg.debug(f"pred_am: {pred_am}") # focus on first prediction word_pred = pred[0] pred_index = np.argmax(word_pred) pred_word = sorted_train_words[pred_index] recap = "" if pred_word == word: recap += "correct " else: recap += " wrong " pred_am = np.argmax(pred, axis=1) logg.debug(f"pred_am: {pred_am}") recap += f"sorted pred_word: {pred_word} pred_index {pred_index}" recap += f" word_pred.shape {word_pred.shape}" recap += f" pred_am_all[wid] {pred_am_all[wid]}" # pred_f = ", ".join([f"{p:.3f}" for p in pred[0]]) # recap += f" pred_f: {pred_f}" logg.debug(recap) # break # turn the list into np array rec_data = np.stack(rec_data_l) logg.debug(f"\nrec_data.shape: {rec_data.shape}") # get prediction and attention weights pred, att_weights = att_weight_model.predict(rec_data) logg.debug(f"att_weights.shape: {att_weights.shape}") logg.debug(f"att_weights[0].shape: {att_weights[0].shape}") plot_size = 5 fw = plot_size * num_rec_words nrows = 2 fh = plot_size * nrows fig, axes = plt.subplots(nrows=nrows, ncols=num_rec_words, figsize=(fw, fh)) fig.suptitle("Attention weights computed with VerticalAreaNet", fontsize=20) for i, word in enumerate(rec_words): logg.debug(f"recword: {word}") # show the spectrogram word_spec = rec_data[i][:, :, 0] # logg.debug(f"word_spec: {word_spec}") axes[0][i].set_title(f"Spectrogram for {word}", fontsize=20) axes[0][i].imshow(word_spec, origin="lower") axes[1][i].set_title(f"Attention weights for {word}", fontsize=20) att_w = att_weights[i][:, :, 0] axes[1][i].imshow(att_w, origin="lower") logg.debug(f"att_w.max(): {att_w.max()}") # axes[0][i].imshow( # att_w, origin="lower", extent=img.get_extent(), cmap="gray", alpha=0.4 # ) # weighted = word_spec * att_w # axes[2][i].imshow(weighted, origin="lower") word_pred = pred[i] pred_index = np.argmax(word_pred) pred_word = sorted_train_words[pred_index] logg.debug(f"sorted pred_word: {pred_word} pred_index {pred_index}") # # plot the predictions word_pred = pred[i] # logg.debug(f"word_pred: {word_pred}") # # permute the prediction from sorted to the order you have word_pred = word_pred[perm_pred] # logg.debug(f"word_pred permuted: {word_pred}") pred_index = np.argmax(word_pred) pred_word = train_words[pred_index] logg.debug(f"pred_word: {pred_word} pred_index {pred_index}") # title = f"Predictions for {word}" # plot_pred(word_pred, train_words, axes[2][i], title, pred_index) fig.tight_layout() fig_name = f"{model_name}" fig_name += "_0002.{}" plot_folder = Path("plot_results") results_path = plot_folder / fig_name.format("pdf") fig.savefig(results_path) plt.show()
def evaluate_model_cnn(which_dataset: str, train_words_type: str, test_words_type: str) -> None: """MAKEDOC: what is evaluate_model_cnn doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_model_cnn") # logg.setLevel("INFO") logg.debug("Start evaluate_model_cnn") # magic to fix the GPUs setup_gpus() # setup the parameters # hypa: ty.Dict[str, ty.Union[str, int]] = {} # hypa["base_dense_width"] = 32 # hypa["base_filters"] = 20 # hypa["batch_size"] = 32 # hypa["dropout_type"] = "01" # # hypa["epoch_num"] = 16 # hypa["epoch_num"] = 15 # hypa["kernel_size_type"] = "02" # # hypa["pool_size_type"] = "02" # hypa["pool_size_type"] = "01" # # hypa["learning_rate_type"] = "02" # hypa["learning_rate_type"] = "04" # hypa["optimizer_type"] = "a1" # hypa["dataset"] = which_dataset # hypa["words"] = train_words_type # hypa: ty.Dict[str, ty.Union[str, int]] = {} # hypa["base_dense_width"] = 32 # hypa["base_filters"] = 32 # hypa["batch_size"] = 32 # hypa["dropout_type"] = "02" # hypa["epoch_num"] = 15 # hypa["kernel_size_type"] = "02" # hypa["pool_size_type"] = "01" # hypa["learning_rate_type"] = "04" # hypa["optimizer_type"] = "a1" # hypa["dataset"] = which_dataset # hypa["words"] = train_words_type hypa: ty.Dict[str, ty.Union[str, int]] = { "base_dense_width": 32, "base_filters": 32, "batch_size": 32, # "dataset": "aug07", "dropout_type": "01", "epoch_num": 15, "kernel_size_type": "02", "learning_rate_type": "04", "optimizer_type": "a1", "pool_size_type": "01", # "words": "all", } hypa["dataset"] = which_dataset hypa["words"] = train_words_type # get the words # train_words = words_types[train_words_type] test_words = words_types[test_words_type] model_name = build_cnn_name(hypa) logg.debug(f"model_name: {model_name}") model_folder = Path("trained_models") / "cnn" model_path = model_folder / f"{model_name}.h5" if not model_path.exists(): logg.error(f"Model not found at: {model_path}") raise FileNotFoundError model = tf.keras.models.load_model(model_path) model.summary() # input data processed_path = Path("data_proc") / f"{which_dataset}" data, labels = load_processed(processed_path, test_words) logg.debug(f"data['testing'].shape: {data['testing'].shape}") # evaluate on the words you trained on logg.debug("Evaluate on test data:") model.evaluate(data["testing"], labels["testing"]) # model.evaluate(data["validation"], labels["validation"]) # predict labels/cm/fscore y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, test_words) # y_pred = model.predict(data["validation"]) # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words) fscore = analyze_confusion(cm, test_words) logg.debug(f"fscore: {fscore}") fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, test_words, fscore) plt.show()
def evaluate_audio_cnn(args): """MAKEDOC: what is evaluate_audio_cnn doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_audio_cnn") logg.debug("Start evaluate_audio_cnn") # magic to fix the GPUs setup_gpus() # need to know on which dataset the model was trained to compute specs dataset_name = "mel01" # words that the dataset was trained on train_words_type = args.train_words_type train_words = words_types[train_words_type] # permutation from sorted to your wor(l)d order perm_pred = compute_permutation(train_words) rec_words_type = args.rec_words_type if rec_words_type == "train": rec_words = train_words else: rec_words = words_types[rec_words_type] num_rec_words = len(rec_words) # where to save the audios audio_folder = Path("recorded_audio") if not audio_folder.exists(): audio_folder.mkdir(parents=True, exist_ok=True) # record the audios and save them in audio_folder audio_path_fmt = "{}_02.wav" audios = record_audios(rec_words, audio_folder, audio_path_fmt, timeout=0) # compute the spectrograms and build the dataset of correct shape img_specs = [] spec_dict = get_spec_dict() spec_kwargs = spec_dict[dataset_name] p2d_kwargs = {"ref": np.max} for word in rec_words: # get the name audio_path = audio_folder / audio_path_fmt.format(word) # convert it to mel log_spec = wav2mel(audio_path, spec_kwargs, p2d_kwargs) img_spec = log_spec.reshape((*log_spec.shape, 1)) logg.debug(f"img_spec.shape: {img_spec.shape}" ) # img_spec.shape: (128, 32, 1) img_specs.append(img_spec) # the data needs to look like this data['testing'].shape: (735, 128, 32, 1) # data = log_spec.reshape((1, *log_spec.shape, 1)) data = np.stack(img_specs) logg.debug(f"data.shape: {data.shape}") hypa: ty.Dict[str, ty.Union[str, int]] = {} hypa["base_dense_width"] = 32 hypa["base_filters"] = 20 hypa["batch_size"] = 32 hypa["dropout_type"] = "01" hypa["epoch_num"] = 16 hypa["kernel_size_type"] = "02" hypa["pool_size_type"] = "02" hypa["learning_rate_type"] = "02" hypa["optimizer_type"] = "a1" hypa["dataset"] = dataset_name hypa["words"] = train_words_type # get the words train_words = words_types[train_words_type] model_name = build_cnn_name(hypa) logg.debug(f"model_name: {model_name}") # model_folder = Path("trained_models") / "cnn" model_folder = Path("saved_models") model_path = model_folder / f"{model_name}.h5" if not model_path.exists(): logg.error(f"Model not found at: {model_path}") raise FileNotFoundError model = tf.keras.models.load_model(model_path) model.summary() pred = model.predict(data) # logg.debug(f"pred: {pred}") # plot the thing plot_size = 5 fw = plot_size * 3 fh = plot_size * num_rec_words fig, axes = plt.subplots(nrows=num_rec_words, ncols=3, figsize=(fw, fh)) fig.suptitle("Recorded audios", fontsize=18) for i, word in enumerate(rec_words): plot_waveform(audios[i], axes[i][0]) spec = img_specs[i][:, :, 0] plot_spec(spec, axes[i][1]) plot_pred( pred[i][perm_pred], train_words, axes[i][2], f"Prediction for {rec_words[i]}", train_words.index(word), ) # https://stackoverflow.com/q/8248467 # https://stackoverflow.com/q/2418125 fig.tight_layout(h_pad=3, rect=[0, 0.03, 1, 0.97]) fig_name = f"{model_name}_{train_words_type}_{rec_words_type}.png" results_path = audio_folder / fig_name fig.savefig(results_path) if num_rec_words <= 6: plt.show()
def evaluate_audio_transfer(train_words_type: str, rec_words_type: str) -> None: """MAKEDOC: what is evaluate_audio_transfer doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_audio_transfer") # logg.setLevel("INFO") logg.debug("Start evaluate_audio_transfer") # magic to fix the GPUs setup_gpus() datasets_type = "01" datasets_types = { "01": ["mel05", "mel09", "mel10"], "02": ["mel05", "mel10", "mfcc07"], "03": ["mfcc06", "mfcc07", "mfcc08"], "04": ["mel05", "mfcc06", "melc1"], "05": ["melc1", "melc2", "melc4"], } dataset_names = datasets_types[datasets_type] # we do not support composed datasets for now for dn in dataset_names: if dn.startswith("melc"): logg.error(f"not supported: {dataset_names}") return # words that the dataset was trained on train_words_type = args.train_words_type train_words = words_types[train_words_type] # the model predicts sorted words perm_pred = compute_permutation(train_words) if rec_words_type == "train": rec_words = train_words else: rec_words = words_types[rec_words_type] num_rec_words = len(rec_words) # where to save the audios audio_folder = Path("recorded_audio") if not audio_folder.exists(): audio_folder.mkdir(parents=True, exist_ok=True) # record the audios and save them in audio_folder audio_path_fmt = "{}_02.wav" audios = record_audios(rec_words, audio_folder, audio_path_fmt, timeout=0) # compute the spectrograms and build the dataset of correct shape specs_3ch: ty.List[np.ndarray] = [] # params for the mel conversion p2d_kwargs = {"ref": np.max} spec_dict = get_spec_dict() for word in rec_words: # get the name audio_path = audio_folder / audio_path_fmt.format(word) # convert it to mel for each type of dataset specs: ty.List[np.ndarray] = [] for dataset_name in dataset_names: spec_kwargs = spec_dict[dataset_name] log_spec = wav2mel(audio_path, spec_kwargs, p2d_kwargs) specs.append(log_spec) img_spec = np.stack(specs, axis=2) # logg.debug(f"img_spec.shape: {img_spec.shape}") # (128, 128, 3) specs_3ch.append(img_spec) data = np.stack(specs_3ch) logg.debug(f"data.shape: {data.shape}") hypa: ty.Dict[str, str] = {} hypa["dense_width_type"] = "03" hypa["dropout_type"] = "01" hypa["batch_size_type"] = "02" hypa["epoch_num_type"] = "01" hypa["learning_rate_type"] = "01" hypa["optimizer_type"] = "a1" hypa["datasets_type"] = datasets_type hypa["words_type"] = train_words_type use_validation = False # hypa: Dict[str, str] = {} # hypa["dense_width_type"] = "02" # hypa["dropout_type"] = "01" # hypa["batch_size_type"] = "01" # hypa["epoch_num_type"] = "01" # hypa["learning_rate_type"] = "01" # hypa["optimizer_type"] = "a1" # hypa["datasets_type"] = datasets_type # hypa["words_type"] = train_words_type # use_validation = True # get the model name model_name = build_transfer_name(hypa, use_validation) # load the model # model_folder = Path("trained_models") / "transfer" model_folder = Path("saved_models") model_path = model_folder / f"{model_name}.h5" model = tf.keras.models.load_model(model_path) # predict! pred = model.predict(data) # plot everything plot_size = 5 fw = plot_size * 5 fh = plot_size * num_rec_words fig, axes = plt.subplots(nrows=num_rec_words, ncols=5, figsize=(fw, fh)) fig.suptitle("Recorded audios", fontsize=18) for i, word in enumerate(rec_words): plot_waveform(audios[i], axes[i][0]) img_spec = specs_3ch[i] plot_spec(img_spec[:, :, 0], axes[i][1]) plot_spec(img_spec[:, :, 1], axes[i][2]) plot_spec(img_spec[:, :, 2], axes[i][3]) plot_pred( pred[i][perm_pred], train_words, axes[i][4], f"Prediction for {rec_words[i]}", train_words.index(word), ) # https://stackoverflow.com/q/8248467 # https://stackoverflow.com/q/2418125 fig.tight_layout(h_pad=3, rect=[0, 0.03, 1, 0.97]) fig_name = f"{model_name}_{train_words_type}_{rec_words_type}.png" results_path = audio_folder / fig_name fig.savefig(results_path) if num_rec_words <= 6: plt.show()