def main(): # rs = np.random.RandomState(SEED) # Get the outbreaks, and loop through the df_outbreaks = utils.get_outbreaks() df_shocks = utils.get_shocks_data() df_risk_all = pd.read_excel(f'input/risk/{FILENAME_ZIMBABWE}') df_performance_all = utils.get_df_performance_all() # Get adm2 present in risks file adm2_shortlist = utils.get_adm2_shortlist(df_risk_all) # Plot for outbreaks and shocks fig, axs = plt.subplots(len(adm2_shortlist), 1, figsize=(10, 10)) for iadm2, (admin2_pcode, admin2_name) in enumerate(adm2_shortlist): print(f'Analyzing admin region {admin2_name}') df_outbreak = df_outbreaks[df_outbreaks['admin2Pcode'] == admin2_pcode] df_shock = df_shocks[df_shocks['pcode'] == admin2_pcode] # Make the fake data # df_risk = utils.generate_fake_risk(rs, START_DATE, END_DATE) # Get risk from Zimbabwe data df_risk = utils.get_risk_df(df_risk_all, admin2_name) # Get outbreak date indices df_risk['outbreak'] = df_risk['date'].isin( df_outbreak['Outbreak month']) real_outbreaks = df_risk[df_risk['outbreak']].index.values # Get shocks shocks, df_risk = utils.get_shocks(df_shock, df_risk) # Get detections per threshold df_performance = utils.loop_over_thresholds(df_risk['risk'], real_outbreaks) df_performance = utils.calculate_f1(df_performance) # Add it to the full frame df_performance_all = (pd.concat( [df_performance[['thresh', 'TP', 'FP', 'FN']], df_performance_all]).groupby(['thresh']).sum().reset_index()) # Make plots plot_utils.plot_adm2(df_risk, df_performance, real_outbreaks, shocks, admin2_pcode, admin2_name) # Plot shocks / outbreaks plot_utils.plot_shocks_and_outbreaks( axs[iadm2], real_outbreaks, shocks, admin2_name, df_risk, show_x_axis=(iadm2 == len(adm2_shortlist) - 1)) # TODO: evaluate the best threshold value and calculate the overall value of precision and recall # Save the shocks / outbreaks figure fig.savefig('plots/outbreaks_shocks.png') plt.close(fig) # Caclulate overall performance df_performance_all = utils.calculate_f1(df_performance_all) # Confusion matrix fig, ax = plt.subplots() plot_utils.plot_confusion_matrix(df_performance_all, ax) fig.savefig('plots/full_confusion_matrix.png') plt.close() # Performance fig, ax = plt.subplots() plot_utils.plot_performance(df_performance_all, ax) fig.savefig('plots/full_performance.png') plt.close()
def plot_confusion_matrix(self, df, y_test, y_predicted): if len(self.all_classes) > 20: self._cm(df, y_test, y_predicted) else: cm = confusion_matrix(y_test, y_predicted) plot_confusion_matrix(cm, classes=self.all_classes, normalize=False)
def print_confusion_matrix(self, label, prediction): label = np.argmax(label, 1) sess = tf.Session() cnfn_matrix = sess.run(tf.confusion_matrix(label, prediction)) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plot_confusion_matrix(cnfn_matrix, classes=label, title='Confusion matrix, without normalization')
def load_LR_and_test(model_path, test_dataset): model = logistic_regresssion.LogisticRegressionClassifier() model.load_state_dict(torch.load(model_path)) accuracy, confusion = logistic_regresssion.test_network(model, test_dataset) dm = DatasetMetadata.from_filepath(JSON_FILE_PATH) plt.figure(figsize=(10, 10)) plot_confusion_matrix(confusion, dm.genre_labels, title="Logistic Regression Confusion Matrix") plt.show() return accuracy
def train(training1="training_all.csv"): facedata = pd.read_csv(training1, index_col=0) lab = facedata.label features = facedata.drop('label', axis=1) #The below command prints a table with statistics for each numerical column in our dataset features.describe().to_csv("description.csv") """ Generate descriptive statistics that summarize the central tendency, dispersion and shape of a dataset’s distribution, excluding NaN values. Analyzes both numeric and object series, as well as DataFrame column sets of mixed data types. The output will vary depending on what is provided. Refer to the notes below for more detail. """ X = pd.DataFrame(features) ##print (X.iloc[0]) Y = pd.Series(lab) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) ##print (X_train.shape) y_test.replace(0, np.nan) y_test.replace(1, np.nan) y_test.replace(y_test.values, np.nan) scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000) ##print (X_train.shape) mlp.fit(X_train, y_train.values.ravel()) filename = 'finalized_model.txt' pickle.dump(mlp, open(filename, 'wb')) predictions = mlp.predict(X_test) ####print("confusion matrix (test prediction) =") mat = confusion_matrix(y_test, predictions) #print(mat) ##print("classification report (y test pred iction") classifi_report = classification_report(y_test, predictions) ##print(classifi_report) auc = roc_auc_score(y_test, predictions) ##print('AUC: %.2f' % auc) fpr, tpr, thresholds = roc_curve(y_test, predictions) #print((y_test,predictions)) plot_roc_curve(fpr, tpr) #plt.show() plot_confusion_matrix(mat, [0, 1], ["non-face", "face"])
def test_majority_classifier(): print(f'{datetime.datetime.now()} - starting majority classifier testing') classifier = MajorityClassifier(JSON_FILE_PATH) test_dataset = LyricsDataset(LEARNING_DATASET_TEST_PATH, WordAverageTransform()) accuracy, confusion = majority_classifier.test_network(classifier, test_dataset) dm = DatasetMetadata.from_filepath(JSON_FILE_PATH) plt.figure(figsize=(10, 10)) plot_confusion_matrix(confusion, dm.genre_labels, title="Majority Classifier Confusion Matrix") plt.show() print(f'{datetime.datetime.now()} - Majority classifier accuracy = {accuracy}')
def load_HAN_and_test(model_path, test_dataset): word_embedding = pd.read_csv(WORD_EMBEDDING_PATH, header=None, sep=" ", quoting=csv.QUOTE_NONE).values[:, 1:] model = HAN(hierarchical_attention_net.HIDDEN_SIZE, hierarchical_attention_net.HIDDEN_SIZE, hierarchical_attention_net.BATCH_SIZE, hierarchical_attention_net.NUM_CLASSES, word_embedding) model.load_state_dict(torch.load(model_path)) accuracy, confusion = hierarchical_attention_net.test_network(model, test_dataset) dm = DatasetMetadata.from_filepath(JSON_FILE_PATH) plt.figure(figsize=(10, 10)) plot_confusion_matrix(confusion, dm.genre_labels, title="HAN Confusion Matrix") plt.show() return accuracy
def visualize_cm(model_name: str) -> None: r"""MAKEDOC: what is visualize_cm doing?""" logg = logging.getLogger(f"c.{__name__}.visualize_cm") # logg.setLevel("INFO") logg.debug("Start visualize_cm") # the location of this file this_file_folder = Path(__file__).parent.absolute() logg.debug(f"this_file_folder: {this_file_folder}") if "im0" in model_name: train_type_tag = "image" elif model_name.startswith("ATT"): train_type_tag = "attention" elif model_name.startswith("VAN"): train_type_tag = "area" elif model_name.startswith("S"): train_type_tag = "area" elif model_name.startswith("AAN"): train_type_tag = "area" info_folder = Path("info") / train_type_tag model_folder = info_folder / model_name res_path = model_folder / "results_recap.json" res = json.loads(res_path.read_text()) recap_path = model_folder / "recap.json" recap = json.loads(recap_path.read_text()) cm = np.array(res["cm"]) fscore = res["fscore"] words = recap["words"] fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) fig.tight_layout() fig_name = f"{model_name}_cm.{{}}" cm_folder = Path("plot_results") / "cm" if not cm_folder.exists(): cm_folder.mkdir(parents=True, exist_ok=True) plot_cm_path = cm_folder / fig_name.format("png") fig.savefig(plot_cm_path) plot_cm_path = cm_folder / fig_name.format("pdf") fig.savefig(plot_cm_path) plt.close(fig)
def test(self): path = save_path + 'models/{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.model'. \ format(self.args.model_name, self.args.dataset, self.args.optimizer, self.args.learning_rate, self.args.weight_decay, self.args.dropout, self.args.batch_normalizations, self.args.softmax, self.args.batch_size, self.args.dev) self.mdl.load_state_dict(self.load_model(path)) self.mdl.eval() outputs, targets = None, None with torch.no_grad(): for t_batch, t_sample_batched in enumerate(self.test_data_loader): output, target = self.evaluation(t_sample_batched) if outputs is None: outputs = output else: outputs = np.concatenate((outputs, output)) if targets is None: targets = target else: targets = np.concatenate((targets, target)) result = self.metric(targets=targets, outputs=output) print('\033[1;32mTest accuracy:{:.2f}%, macro_f1:{:.5f}\033[0m'.format( result['acc'] * 100, result['macro_f1'])) self.learning_history['Test accuracy'] = result['acc'] # Plot confusion matrix class_names = ['negative', 'neutral', 'positive'] cnf_matrix = confusion_matrix(targets, outputs) plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix', normalize=False) plt.savefig('./result/figures/' '{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.png'.format( self.args.model_name, self.args.dataset, self.args.optimizer, self.args.learning_rate, self.args.weight_decay, self.args.dropout, self.args.batch_normalizations, self.args.softmax, self.args.batch_size, self.args.dev))
def main(n_cpu, save=True, exp_dir=None): configs = [] for mode in range(3): config = default_config.copy() config["mode"] = mode configs.append(config) # Run experiments n_cpu = min(len(configs), n_cpu) with Pool(n_cpu) as p: output = p.map(run, configs) # output = run(default_config) if save: exp_dir = ("Experiments/" + str(int(time.time())) + "/" if exp_dir is None else exp_dir) os.makedirs(exp_dir, exist_ok=True) # Plot reward graph and save it fig1, ax1 = plt.subplots() legends = ["Communication", "Fixed messages", "Fixed actions"] for i, (rs, cps, ns) in enumerate(output): ax1.set_ylim(-0.2, 1.05) ax1.set_ylabel("Obtained reward / max reward for the state") ax1.set_xlabel("Episodes") ys = rs.mean(axis=1) rs_sem = stats.sem(rs, axis=1) xs = np.arange(len(ys)) * default_config["log_interval"] ax1.plot(xs, ys, color=color_sequence[i], label=legends[i]) plt.fill_between(xs, ys - rs_sem, ys + rs_sem, alpha=0.5, color=color_sequence[i]) ax1.legend() if save: fig1.savefig(exp_dir + "reward_plot") # Plot the convergence points and save it fig2, ax2 = plot_confusion_matrix(output[0][1].astype(np.int)) if save: fig2.savefig(exp_dir + "cp_plot") # Plot the venn diagram for messages and save it fig3, ax3 = plot_venn(output[0][2]) if save: fig3.savefig(exp_dir + "message_plot") # Save the configs and the raw output if save: with open(exp_dir + "configs", "wb") as f: pickle.dump(configs, f) with bz2.BZ2File(exp_dir + "output.pbz2", "wb") as f: pickle.dump(output, f) plt.show()
dataset.dev, max_epoch=config.max_epoch) model.save_weights(os.path.join(todir, 'best_weights'), overwrite=True) with open(os.path.join(todir, 'classification_report.txt'), 'wb') as f: report = classification_report( best_scores['targs'], best_scores['preds'], target_names=dataset.featurizer.vocab['rel'].index2word) f.write(report) print report from plot_utils import plot_confusion_matrix, plot_histogram, get_sorted_labels order, labels, counts = get_sorted_labels(best_scores['targs'], dataset.featurizer.vocab) fig = plot_confusion_matrix(best_scores['targs'], best_scores['preds'], order, labels) fig.savefig(os.path.join(todir, 'confusion_matrix.png')) fig = plot_histogram(labels, counts) fig.savefig(os.path.join(todir, 'relation_histogram.png')) with open(os.path.join(todir, 'best_scores.json'), 'wb') as f: del best_scores['preds'] del best_scores['targs'] del best_scores['ids'] json.dump(best_scores, f, sort_keys=True) print 'best scores' pprint(best_scores)
def train_model(hypa, force_retrain): """MAKEDOC: What is train_model doing?""" logg = logging.getLogger(f"c.{__name__}.train_model") # logg.debug("Starting train_model") # get the words words = words_types[hypa["words"]] # name the model model_name = build_cnn_name(hypa) logg.debug(f"model_name: {model_name}") # save the trained model here model_folder = Path("trained_models") / "cnn" if not model_folder.exists(): model_folder.mkdir(parents=True, exist_ok=True) model_path = model_folder / f"{model_name}.h5" # logg.debug(f"model_path: {model_path}") placeholder_path = model_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder info_folder = Path("info") / "cnn" / model_name if not info_folder.exists(): info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() # input data processed_path = Path("data_proc") / f"{hypa['dataset']}" data, labels = load_processed(processed_path, words) # from hypa extract model param model_param = {} model_param["num_labels"] = len(words) model_param["input_shape"] = data["training"][0].shape model_param["base_filters"] = hypa["base_filters"] model_param["base_dense_width"] = hypa["base_dense_width"] # translate types to actual values kernel_size_types = { "01": [(2, 2), (2, 2), (2, 2)], "02": [(5, 1), (3, 3), (3, 3)], "03": [(1, 5), (3, 3), (3, 3)], } model_param["kernel_sizes"] = kernel_size_types[hypa["kernel_size_type"]] pool_size_types = { "01": [(2, 2), (2, 2), (2, 2)], "02": [(2, 1), (2, 2), (2, 2)], "03": [(1, 2), (2, 2), (2, 2)], } model_param["pool_sizes"] = pool_size_types[hypa["pool_size_type"]] dropout_types = {"01": [0.03, 0.01], "02": [0.3, 0.1]} model_param["dropouts"] = dropout_types[hypa["dropout_type"]] # a dict to recreate this training recap = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["model_name"] = model_name recap["version"] = "002" # logg.debug(f"recap: {recap}") recap_path = info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) learning_rate_types = { "01": "fixed01", "02": "fixed02", "03": "fixed03", "e1": "exp_decay_keras_01", "04": "exp_decay_step_01", "05": "exp_decay_smooth_01", "06": "exp_decay_smooth_02", } learning_rate_type = hypa["learning_rate_type"] lr_value = learning_rate_types[learning_rate_type] # setup opt fixed lr values if lr_value.startswith("fixed"): if lr_value == "fixed01": lr = 1e-2 elif lr_value == "fixed02": lr = 1e-3 elif lr_value == "fixed03": lr = 1e-4 else: lr = 1e-3 if lr_value == "exp_decay_keras_01": lr = ExponentialDecay(0.1, decay_steps=100000, decay_rate=0.96, staircase=True) optimizer_types = { "a1": Adam(learning_rate=lr), "r1": RMSprop(learning_rate=lr), } opt = optimizer_types[hypa["optimizer_type"]] # create the model model = CNNmodel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # setup callbacks callbacks = [] # setup exp decay step / smooth if lr_value.startswith("exp_decay"): if lr_value == "exp_decay_step_01": exp_decay_part = partial(exp_decay_step, epochs_drop=5) elif lr_value == "exp_decay_smooth_01": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5) elif lr_value == "exp_decay_smooth_02": exp_decay_part = partial( exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2 ) lrate = LearningRateScheduler(exp_decay_part) callbacks.append(lrate) # # setup early stopping # early_stop = EarlyStopping( # # monitor="val_categorical_accuracy", # monitor="val_loss", # patience=4, # verbose=1, # restore_best_weights=True, # ) # callbacks.append(early_stop) # get training parameters BATCH_SIZE = hypa["batch_size"] SHUFFLE_BUFFER_SIZE = BATCH_SIZE EPOCH_NUM = hypa["epoch_num"] # load the datasets datasets = {} for which in ["training", "validation", "testing"]: # logg.debug(f"data[{which}].shape: {data[which].shape}") datasets[which] = Dataset.from_tensor_slices((data[which], labels[which])) # logg.debug(f"datasets[{which}]: {datasets[which]}") datasets[which] = datasets[which].shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) # logg.debug(f"datasets[{which}]: {datasets[which]}") # train the model results = model.fit( data["training"], labels["training"], # validation_data=datasets["validation"], validation_data=(data["validation"], labels["validation"]), batch_size=BATCH_SIZE, epochs=EPOCH_NUM, verbose=1, callbacks=callbacks, ) # save the trained model model.save(model_path) results_recap = {} results_recap["model_name"] = model_name # version of the results saved results_recap["results_recap_version"] = "002" # quickly evaluate the results # logg.debug(f"\nmodel.metrics_names: {model.metrics_names}") # for which in ["training", "validation", "testing"]: # model_eval = model.evaluate(datasets[which]) # logg.debug(f"{which}: model_eval: {model_eval}") # save the evaluation results logg.debug("Evaluate on test data:") # eval_testing = model.evaluate(datasets["testing"]) # results_recap[model.metrics_names[0]] = eval_testing[0] # results_recap[model.metrics_names[1]] = eval_testing[1] eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # compute the confusion matrix # y_pred = model.predict(datasets["testing"]) y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) # logg.debug(f"cm: {cm}") results_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the histories results_recap["history"] = { "loss": results.history["loss"], "val_loss": results.history["val_loss"], "categorical_accuracy": results.history["categorical_accuracy"], "val_categorical_accuracy": results.history["val_categorical_accuracy"], } # save the results res_recap_path = info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) y_pred_dataset = model.predict(datasets["testing"]) cm_dataset = pred_hot_2_cm(labels["testing"], y_pred_dataset, words) fscore_dataset = analyze_confusion(cm_dataset, words) logg.debug(f"fscore_dataset: {fscore_dataset} fscore {fscore}") # for i, (ys, yd) in enumerate(zip(y_pred, y_pred_dataset)): # pred_split = np.argmax(ys) # pred_dataset = np.argmax(yd) # logg.debug(f"i: {i} pred_split: {pred_split} pred_dataset: {pred_dataset}") # plt.show() placeholder_path.write_text(f"Trained. F-score: {fscore}") return "done_training"
def train_transfer( hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool, trained_folder: Path, root_info_folder: Path, tensorboard_logs_folder: Path, ) -> None: """MAKEDOC: what is train_transfer doing? https://www.tensorflow.org/guide/keras/transfer_learning/#build_a_model """ logg = logging.getLogger(f"c.{__name__}.train_transfer") # logg.setLevel("INFO") logg.debug("Start train_transfer") ########################################################## # Setup folders ########################################################## # name the model model_name = build_transfer_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_path = trained_folder / f"{model_name}.h5" placeholder_path = trained_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder model_info_folder = root_info_folder / model_name if not model_info_folder.exists(): model_info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() ########################################################## # Load data ########################################################## # grab a few hypas words_type = hypa["words_type"] datasets_type = hypa["datasets_type"] # get the partition of the data partition, ids2labels = prepare_partitions(words_type) # get the word list words = words_types[words_type] num_labels = len(words) # get the dataset name list datasets_types, datasets_shapes = get_datasets_types() dataset_names = datasets_types[datasets_type] dataset_shape = datasets_shapes[datasets_type] # the shape of each sample input_shape = (*dataset_shape, 3) # from hypa extract training param (epochs, batch, opt, ...) training_param = get_training_param_transfer(hypa, use_validation, tensorboard_logs_folder, model_path) # load datasets processed_folder = Path("data_split") data_split_paths = [processed_folder / f"{dn}" for dn in dataset_names] # data, labels = load_triple(data_paths, words) # assemble the gen_param for the generators gen_param = { "dim": dataset_shape, "batch_size": training_param["batch_sizes"][0], "shuffle": True, "label_names": words, "data_split_paths": data_split_paths, } # maybe concatenate the valdation and training lists val_generator: ty.Optional[AudioGenerator] = None if use_validation: val_generator = AudioGenerator(partition["validation"], ids2labels, **gen_param) logg.debug("Using validation data") else: partition["training"].extend(partition["validation"]) logg.debug("NOT using validation data") # create the training generator with the modified (maybe) list of IDs training_generator = AudioGenerator(partition["training"], ids2labels, **gen_param) logg.debug(f"len(training_generator): {len(training_generator)}") ###### always create the test generator # do not shuffle the test data gen_param["shuffle"] = False # do not batch it, no loss of stray data at the end gen_param["batch_size"] = 1 testing_generator = AudioGenerator(partition["testing"], ids2labels, **gen_param) ########################################################## # Setup model ########################################################## # from hypa extract model param model_param = get_model_param_transfer(hypa, num_labels, input_shape) # get mean and var to normalize the data data_mean, data_variance = get_generator_mean_var_cached( training_generator, words_type, datasets_type, processed_folder) # get the model model, base_model = TRAmodel(data_mean=data_mean, data_variance=data_variance, **model_param) model.summary() # a dict to recreate this training recap: ty.Dict[str, ty.Any] = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["model_name"] = model_name recap["batch_sizes"] = training_param["batch_sizes"] recap["epoch_num"] = training_param["epoch_num"] recap["version"] = "003" # logg.debug(f"recap: {recap}") recap_path = model_info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) ########################################################## # Compile and fit model the first time ########################################################## model.compile( optimizer=training_param["opt"][0], loss=tf_losses.CategoricalCrossentropy(), metrics=training_param["metrics"][0], ) results_freeze = model.fit( training_generator, validation_data=val_generator, epochs=training_param["epoch_num"][0], callbacks=training_param["callbacks"][0], ) # reload the best weights saved by the ModelCheckpoint model.load_weights(str(model_path)) ########################################################## # Save results, history, performance ########################################################## # results_freeze_recap results_freeze_recap: ty.Dict[str, ty.Any] = {} results_freeze_recap["model_name"] = model_name results_freeze_recap["results_recap_version"] = "001" # save the histories results_freeze_recap["history_train"] = { mn: results_freeze.history[mn] for mn in model.metrics_names } if use_validation: results_freeze_recap["history_val"] = { f"val_{mn}": results_freeze.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_freeze_recap.json" res_recap_path.write_text(json.dumps(results_freeze_recap, indent=4)) ########################################################## # Compile and fit model the second time ########################################################## # Unfreeze the base_model. Note that it keeps running in inference mode # since we passed `training=False` when calling it. This means that # the batchnorm layers will not update their batch statistics. # This prevents the batchnorm layers from undoing all the training # we've done so far. base_model.trainable = True model.summary() model.compile( optimizer=training_param["opt"][1], # Low learning rate loss=tf_losses.CategoricalCrossentropy(), metrics=training_param["metrics"][1], ) results_full = model.fit( training_generator, validation_data=val_generator, epochs=training_param["epoch_num"][1], callbacks=training_param["callbacks"][1], ) # reload the best weights saved by the ModelCheckpoint model.load_weights(str(model_path)) ########################################################## # Save results, history, performance ########################################################## results_full_recap: ty.Dict[str, ty.Any] = {} results_full_recap["model_name"] = model_name results_full_recap["results_recap_version"] = "001" # evaluate performance eval_testing = model.evaluate(testing_generator) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_full_recap[metrics_name] = value # compute the confusion matrix y_pred = model.predict(testing_generator) y_pred_labels = testing_generator.pred2labelnames(y_pred) y_true = testing_generator.get_true_labels() # cm = pred_hot_2_cm(y_true, y_pred, words) cm = confusion_matrix(y_true, y_pred_labels) results_full_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") results_full_recap["fscore"] = fscore # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = model_info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the histories results_full_recap["history_train"] = { mn: results_full.history[mn] for mn in model.metrics_names } if use_validation: results_full_recap["history_val"] = { f"val_{mn}": results_full.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_full_recap.json" res_recap_path.write_text(json.dumps(results_full_recap, indent=4)) # save the trained model model.save(model_path) # save the placeholder placeholder_path.write_text(f"Trained. F-score: {fscore}")
def recompute_fscore_cnn() -> None: """MAKEDOC: what is recompute_fscore_cnn doing?""" logg = logging.getLogger(f"c.{__name__}.recompute_fscore_cnn") # logg.setLevel("INFO") logg.debug("Start recompute_fscore_cnn") info_folder = Path("info") trained_folder = Path("trained_models") for model_folder in info_folder.iterdir(): # logg.debug(f"model_folder: {model_folder}") # check that it is a CNN model_name = model_folder.name if not model_name.startswith("CNN"): continue # check that the model is trained and not a placeholder model_path = trained_folder / f"{model_name}.h5" found_model = False if model_path.exists(): if model_path.stat().st_size > 100: found_model = True if not found_model: continue # load it model = models.load_model(model_path) res_recap_path = model_folder / "results_recap.json" if not res_recap_path.exists(): continue results_recap = json.loads(res_recap_path.read_text()) # logg.debug(f"results_recap['cm']: {results_recap['cm']}") recap_path = model_folder / "recap.json" recap = json.loads(recap_path.read_text()) # logg.debug(f"recap['words']: {recap['words']}") words = recap["words"] hypa = recap["hypa"] # check that the data is available dn = hypa["dataset"] wt = hypa["words"] if dn.startswith("mel") or dn.startswith("mfcc"): preprocess_spec(dn, wt) elif dn.startswith("aug"): do_augmentation(dn, wt) processed_path = Path("data_proc") / f"{hypa['dataset']}" data, labels = load_processed(processed_path, words) y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) fscore = analyze_confusion(cm, words) # logg.debug(f"fscore: {fscore}") # overwrite the cm results_recap["cm"] = cm.tolist() # add the fscore results_recap["fscore"] = fscore # increase the version results_recap["results_recap_version"] = "002" # write the new results res_recap_path.write_text(json.dumps(results_recap, indent=4)) # increase the recap version (shows that it is after this debacle) recap["version"] = "002" recap_path.write_text(json.dumps(recap, indent=4)) # save the new plots fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig)
def make_plots(true_id, true_p4, pred_id, pred_p4, out): num_output_classes = len(class_labels) _, true_id = torch.max(true_id, -1) _, pred_id = torch.max(pred_id, -1) cm = sklearn.metrics.confusion_matrix( true_id, pred_id, labels=list(range(num_output_classes))) cm_normed = sklearn.metrics.confusion_matrix( true_id, pred_id, labels=list(range(num_output_classes)), normalize="true") figure = plot_confusion_matrix(cm) figure = plot_confusion_matrix(cm_normed) msk = (pred_id!=0) & (true_id!=0) ch_true = true_p4[msk, 0].flatten().detach().numpy() ch_pred = pred_p4[msk, 0].flatten().detach().numpy() pt_true = true_p4[msk, 1].flatten().detach().numpy() pt_pred = pred_p4[msk, 1].flatten().detach().numpy() e_true = true_p4[msk, 5].flatten().detach().numpy() e_pred = pred_p4[msk, 5].flatten().detach().numpy() eta_true = true_p4[msk, 2].flatten().detach().numpy() eta_pred = pred_p4[msk, 2].flatten().detach().numpy() sphi_true = true_p4[msk, 3].flatten().detach().numpy() sphi_pred = pred_p4[msk, 3].flatten().detach().numpy() cphi_true = true_p4[msk, 4].flatten().detach().numpy() cphi_pred = pred_p4[msk, 4].flatten().detach().numpy() figure = plot_regression(ch_true, ch_pred, "charge", np.linspace(-2, 2, 100), fname = out+'charge_regression') figure = plot_regression(pt_true, pt_pred, "pt", np.linspace(0, 5, 100), fname = out+'pt_regression') figure = plot_distributions(pt_true, pt_pred, "pt", np.linspace(0, 5, 100), fname = out+'pt_distribution') figure = plot_regression(e_true, e_pred, "E", np.linspace(-1, 5, 100), fname = out+'energy_regression') figure = plot_distributions(e_true, e_pred, "E", np.linspace(-1, 5, 100), fname = out+'energy_distribution') figure = plot_regression(eta_true, eta_pred, "eta", np.linspace(-5, 5, 100), fname = out+'eta_regression') figure = plot_distributions(eta_true, eta_pred, "eta", np.linspace(-5, 5, 100), fname = out+'eta_distribution') figure = plot_regression(sphi_true, sphi_pred, "sin phi", np.linspace(-2, 2, 100), fname = out+'sphi_regression') figure = plot_distributions(sphi_true, sphi_pred, "sin phi", np.linspace(-2, 2, 100), fname = out+'sphi_distribution') figure = plot_regression(cphi_true, cphi_pred, "cos phi", np.linspace(-2, 2, 100), fname = out+'cphi_regression') figure = plot_distributions(cphi_true, cphi_pred, "cos phi", np.linspace(-2, 2, 100), fname = out+'cphi_distribution') figure = plot_particles( out+'particleID1', true_id, true_p4, pred_id, pred_p4, pid=1) figure = plot_particles( out+'particleID2', true_id, true_p4, pred_id, pred_p4, pid=2)
# =========================================================================== # Evaluate the model # =========================================================================== # ====== evaluate the train data ====== # y_pred_probas = model.predict(X_train, batch_size=BATCH_SIZE) y_pred = np.argmax(y_pred_probas, axis=-1) train_report = classification_report(y_true=np.argmax(y_train, axis=-1), y_pred=y_pred) train_cm = confusion_matrix(y_true=np.argmax(y_train, axis=-1), y_pred=y_pred) # ====== evaluate the test data ====== # y_pred_probas = model.predict(X_score, batch_size=BATCH_SIZE) y_pred = np.argmax(y_pred_probas, axis=-1) score_report = classification_report(y_true=np.argmax(y_score, axis=-1), y_pred=y_pred) score_cm = confusion_matrix(y_true=np.argmax(y_score, axis=-1), y_pred=y_pred) # ====== plotting the results ====== # plt.figure(figsize=(16, 8)) # (ncol, nrow) plot_confusion_matrix(train_cm, ax=(1, 2, 1), labels=digits, fontsize=8, title="Train") plot_confusion_matrix(score_cm, ax=(1, 2, 2), labels=digits, fontsize=8, title="Score") # plt.show(block=True) plot_save(FIG_PATH)
if vote_window > 0: y_pred = predictions_vote(y_pred, vote_window) # Accuracy acc = accuracy_score(y_true, y_pred) print 'Accuracy on test set:', acc label_list = sorted(df.y_true.unique()) # Plot normalized confusion matrix cnf_matrix = confusion_matrix(y_true, y_pred) output_file = os.path.join(output_dir, 'confusion_matrix.png') _ = plot_confusion_matrix(cnf_matrix, output_file, classes=label_list, normalize=True, title='Confusion matrix (accuracy=%.2f)' % acc) print 'Plot saved:', output_file # Classification report (F1 score, etc.) clf_report = classification_report(y_true, y_pred) output_file = os.path.join(output_dir, 'classification_report.png') plot_classification_report(clf_report, output_file) print 'Plot saved:', output_file # Plot predictions output_file = os.path.join(output_dir, 'predictions.html') title = 'Predictions (accuracy=%s)' % acc plot_predictions(t, X_values, y_true, y_pred, output_file, title) print 'Plot saved:', output_file
def train_img( hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool, trained_folder: Path, root_info_folder: Path, ) -> None: """MAKEDOC: what is train_img doing?""" logg = logging.getLogger(f"c.{__name__}.train_img") # logg.setLevel("INFO") logg.debug("Start train_img") ########################################################## # Setup folders ########################################################## # name the model model_name = build_img_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_path = trained_folder / f"{model_name}.h5" placeholder_path = trained_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder model_info_folder = root_info_folder / model_name if not model_info_folder.exists(): model_info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() ########################################################## # Load data ########################################################## label_type = hypa["words_type"] label_list = get_label_list(label_type) num_labels = len(label_list) dataset_raw_folder = Path.home( ) / "datasets" / "imagenet" / "imagenet_images" dataset_proc_base_folder = Path.home() / "datasets" / "imagenet" # get the partition of the data partition, ids2labels = prepare_partitions(label_list, dataset_raw_folder) num_samples = len(partition["training"]) # from hypa extract training param (epochs, batch, opt, ...) training_param = get_training_param_img(hypa, use_validation, model_path, num_samples) preprocess_type = hypa["dataset_name"] dataset_proc_folder = dataset_proc_base_folder / preprocess_type val_generator: ty.Optional[ImageNetGenerator] = None if use_validation: val_generator = ImageNetGenerator( partition["validation"], ids2labels, label_list, dataset_proc_folder=dataset_proc_folder, dataset_raw_folder=dataset_raw_folder, preprocess_type=preprocess_type, save_processed=True, batch_size=training_param["batch_size"], shuffle=True, ) logg.debug("Using validation data") else: partition["training"].extend(partition["validation"]) logg.debug("NOT using validation data") training_generator = ImageNetGenerator( partition["training"], ids2labels, label_list, dataset_proc_folder=dataset_proc_folder, dataset_raw_folder=dataset_raw_folder, preprocess_type=preprocess_type, save_processed=True, batch_size=training_param["batch_size"], shuffle=True, ) testing_generator = ImageNetGenerator( partition["testing"], ids2labels, label_list, dataset_proc_folder=dataset_proc_folder, dataset_raw_folder=dataset_raw_folder, preprocess_type=preprocess_type, save_processed=True, batch_size=1, shuffle=False, ) ########################################################## # Setup model ########################################################## input_shape = training_generator.get_img_shape() # from hypa extract model param model_param = get_model_param_img(hypa, num_labels, input_shape) # get the model with the chosen params net_type = hypa["net_type"] if net_type == "ARN": model = AreaNet.build(**model_param) elif net_type == "AAN": model = ActualAreaNet.build(**model_param) elif net_type == "VAN": model = VerticalAreaNet.build(**model_param) elif net_type.startswith("SI"): if net_type == "SIM": sim_type = "1" elif net_type == "SI2": sim_type = "2" model = SimpleNet.build(sim_type=sim_type, **model_param) # a few metrics to track metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] # compile the model model.compile( optimizer=training_param["opt"], loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # recap recap: ty.Dict[str, ty.Any] = {} recap["model_name"] = model_name recap["words"] = label_list recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["batch_size"] = training_param["batch_size"] recap["epochs"] = training_param["epochs"] recap["lr_name"] = training_param["lr_name"] recap["version"] = "002" # logg.debug(f"recap: {recap}") recap_path = model_info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) # https://stackoverflow.com/a/45546663/2237151 model_summary_path = model_info_folder / "model_summary.txt" with model_summary_path.open("w") as msf: model.summary(line_length=150, print_fn=lambda x: msf.write(x + "\n")) ########################################################## # Fit model ########################################################## results = model.fit( training_generator, validation_data=val_generator, epochs=training_param["epochs"], batch_size=training_param["batch_size"], callbacks=training_param["callbacks"], ) ########################################################## # Save results, history, performance ########################################################## # results_recap results_recap: ty.Dict[str, ty.Any] = {} results_recap["model_name"] = model_name results_recap["results_recap_version"] = "001" # evaluate performance eval_testing = model.evaluate(testing_generator) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # confusion matrix y_pred = model.predict(testing_generator) y_pred_labels = testing_generator.pred2labelnames(y_pred) y_true = testing_generator.get_true_labels() cm = confusion_matrix(y_true, y_pred_labels) results_recap["cm"] = cm.tolist() # fscore fscore = analyze_confusion(cm, label_list) logg.debug(f"fscore: {fscore}") results_recap["fscore"] = fscore # save the histories results_recap["history_train"] = { mn: results.history[mn] for mn in model.metrics_names } if use_validation: results_recap["history_val"] = { f"val_{mn}": results.history[f"val_{mn}"] for mn in model.metrics_names } # save the results res_recap_path = model_info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, label_list, fscore) plot_cm_path = model_info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the trained model model.save(model_path) # save the placeholder placeholder_path.write_text(f"Trained. F-score: {fscore}")
def train_loop(): t0_initial = time.time() losses_1_train, losses_2_train, losses_tot_train = [], [], [] losses_1_valid, losses_2_valid, losses_tot_valid = [], [], [] accuracies_train, accuracies_msk_train = [], [] accuracies_valid, accuracies_msk_valid = [], [] best_val_loss = 99999.9 stale_epochs = 0 print("Training over {} epochs".format(args.n_epochs)) for epoch in range(args.n_epochs): t0 = time.time() if stale_epochs > patience: print("breaking due to stale epochs") break # training epoch model.train() losses_tot, losses_1, losses_2, acc, acc_msk, conf_matrix, conf_matrix_norm = train(model, train_loader, epoch, optimizer, args.alpha, args.target, device) losses_tot_train.append(losses_tot) losses_1_train.append(losses_1) losses_2_train.append(losses_2) accuracies_train.append(acc) accuracies_msk_train.append(acc_msk) # validation step model.eval() losses_tot_v, losses_1_v, losses_2_v, acc_v, acc_msk_v, conf_matrix_v, conf_matrix_norm_v = test(model, valid_loader, epoch, args.alpha, args.target, device) losses_tot_valid.append(losses_tot_v) losses_1_valid.append(losses_1_v) losses_2_valid.append(losses_2_v) accuracies_valid.append(acc_v) accuracies_msk_valid.append(acc_msk_v) # early-stopping if losses_tot_v < best_val_loss: best_val_loss = losses_tot_v stale_epochs = 0 else: stale_epochs += 1 t1 = time.time() epochs_remaining = args.n_epochs - (epoch+1) time_per_epoch = (t1 - t0_initial)/(epoch + 1) eta = epochs_remaining*time_per_epoch/60 print("epoch={}/{} dt={:.2f}min train_loss={:.5f} valid_loss={:.5f} train_acc={:.5f} valid_acc={:.5f} train_acc_msk={:.5f} valid_acc_msk={:.5f} stale={} eta={:.1f}m".format( epoch+1, args.n_epochs, (t1-t0)/60, losses_tot_train[epoch], losses_tot_valid[epoch], accuracies_train[epoch], accuracies_valid[epoch], accuracies_msk_train[epoch], accuracies_msk_valid[epoch], stale_epochs, eta)) torch.save(model.state_dict(), "{0}/epoch_{1}_weights.pth".format(outpath, epoch)) plot_confusion_matrix(conf_matrix_norm, ["none", "ch.had", "n.had", "g", "el", "mu"], fname = outpath + '/confusion_matrix_plots/cmT_normed_epoch_' + str(epoch), epoch=epoch) plot_confusion_matrix(conf_matrix_norm_v, ["none", "ch.had", "n.had", "g", "el", "mu"], fname = outpath + '/confusion_matrix_plots/cmV_normed_epoch_' + str(epoch), epoch=epoch) with open(outpath + '/confusion_matrix_plots/cmT_normed_epoch_' + str(epoch) + '.pkl', 'wb') as f: pickle.dump(conf_matrix_norm, f) with open(outpath + '/confusion_matrix_plots/cmV_normed_epoch_' + str(epoch) + '.pkl', 'wb') as f: pickle.dump(conf_matrix_norm_v, f) make_plot_from_list(losses_tot_train, 'train loss_tot', 'Epochs', 'Loss', outpath, 'losses_tot_train') make_plot_from_list(losses_1_train, 'train loss_1', 'Epochs', 'Loss', outpath, 'losses_1_train') make_plot_from_list(losses_2_train, 'train loss_2', 'Epochs', 'Loss', outpath, 'losses_2_train') make_plot_from_list(losses_tot_valid, 'valid loss_tot', 'Epochs', 'Loss', outpath, 'losses_tot_valid') make_plot_from_list(losses_1_valid, 'valid loss_1', 'Epochs', 'Loss', outpath, 'losses_1_valid') make_plot_from_list(losses_2_valid, 'valid loss_2', 'Epochs', 'Loss', outpath, 'losses_2_valid') make_plot_from_list(accuracies_train, 'train accuracy', 'Epochs', 'Accuracy', outpath, 'accuracies_train') make_plot_from_list(accuracies_msk_train, 'train accuracy_msk', 'Epochs', 'Accuracy', outpath, 'accuracies_msk_train') make_plot_from_list(accuracies_valid, 'valid accuracy', 'Epochs', 'Accuracy', outpath, 'accuracies_valid') make_plot_from_list(accuracies_msk_valid, 'valid accuracy_msk', 'Epochs', 'Accuracy', outpath, 'accuracies_msk_valid') print('Done with training.') return
y_pred = predictions_vote(y_pred, VOTE_WINDOW) # Accuracy acc = accuracy_score(y_true, y_pred) print 'Accuracy on test set:', acc # Find labels in use label_list = sorted(df.y_true.unique()) label_list = [LABELS[l] for l in label_list] # Plot normalized confusion matrix cnf_matrix = confusion_matrix(y_true, y_pred) output_file = os.path.join(OUTPUT_DIR, 'confusion_matrix.png') _ = plot_confusion_matrix(cnf_matrix, output_file, classes=label_list, normalize=True, title='Confusion matrix (accuracy=%.2f)' % acc) print 'Plot saved:', output_file # Classification report (F1 score, etc.) clf_report = classification_report(y_true, y_pred, target_names=label_list) output_file = os.path.join(OUTPUT_DIR, 'classification_report.png') plot_classification_report(clf_report, output_file) print 'Plot saved:', output_file # Plot predictions output_file = os.path.join(OUTPUT_DIR, 'predictions.html') title = 'Predictions (accuracy=%s)' % acc plot_predictions(t, X_values, y_true, y_pred, output_file, title) print 'Plot saved:', output_file
def evaluate_model_cnn(which_dataset: str, train_words_type: str, test_words_type: str) -> None: """MAKEDOC: what is evaluate_model_cnn doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_model_cnn") # logg.setLevel("INFO") logg.debug("Start evaluate_model_cnn") # magic to fix the GPUs setup_gpus() # setup the parameters # hypa: ty.Dict[str, ty.Union[str, int]] = {} # hypa["base_dense_width"] = 32 # hypa["base_filters"] = 20 # hypa["batch_size"] = 32 # hypa["dropout_type"] = "01" # # hypa["epoch_num"] = 16 # hypa["epoch_num"] = 15 # hypa["kernel_size_type"] = "02" # # hypa["pool_size_type"] = "02" # hypa["pool_size_type"] = "01" # # hypa["learning_rate_type"] = "02" # hypa["learning_rate_type"] = "04" # hypa["optimizer_type"] = "a1" # hypa["dataset"] = which_dataset # hypa["words"] = train_words_type # hypa: ty.Dict[str, ty.Union[str, int]] = {} # hypa["base_dense_width"] = 32 # hypa["base_filters"] = 32 # hypa["batch_size"] = 32 # hypa["dropout_type"] = "02" # hypa["epoch_num"] = 15 # hypa["kernel_size_type"] = "02" # hypa["pool_size_type"] = "01" # hypa["learning_rate_type"] = "04" # hypa["optimizer_type"] = "a1" # hypa["dataset"] = which_dataset # hypa["words"] = train_words_type hypa: ty.Dict[str, ty.Union[str, int]] = { "base_dense_width": 32, "base_filters": 32, "batch_size": 32, # "dataset": "aug07", "dropout_type": "01", "epoch_num": 15, "kernel_size_type": "02", "learning_rate_type": "04", "optimizer_type": "a1", "pool_size_type": "01", # "words": "all", } hypa["dataset"] = which_dataset hypa["words"] = train_words_type # get the words # train_words = words_types[train_words_type] test_words = words_types[test_words_type] model_name = build_cnn_name(hypa) logg.debug(f"model_name: {model_name}") model_folder = Path("trained_models") / "cnn" model_path = model_folder / f"{model_name}.h5" if not model_path.exists(): logg.error(f"Model not found at: {model_path}") raise FileNotFoundError model = tf.keras.models.load_model(model_path) model.summary() # input data processed_path = Path("data_proc") / f"{which_dataset}" data, labels = load_processed(processed_path, test_words) logg.debug(f"data['testing'].shape: {data['testing'].shape}") # evaluate on the words you trained on logg.debug("Evaluate on test data:") model.evaluate(data["testing"], labels["testing"]) # model.evaluate(data["validation"], labels["validation"]) # predict labels/cm/fscore y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, test_words) # y_pred = model.predict(data["validation"]) # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words) fscore = analyze_confusion(cm, test_words) logg.debug(f"fscore: {fscore}") fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, test_words, fscore) plt.show()
metrics=[keras.metrics.mean_squared_error]) model.build(input_shape=input_shape) print("Input shape:", input_shape) model.summary() # ====== start the training ====== # records = keras.callbacks.History() model.fit(X_train, y_train, callbacks=[records], epochs=NUM_EPOCH, batch_size=BATCH_SIZE, validation_split=0.1) # ====== plot the learning curve ====== # # TODO: ploting mean squared error metrics extracted from # training history plt.figure(figsize=(8, 3)) # (ncol, nrow) # =========================================================================== # Evaluate the model # =========================================================================== # ====== evaluate the test data ====== # y_pred_probas = model.predict(X_score, batch_size=BATCH_SIZE) # TODO: we have `y_pred_probas` is the predicted probabilities for # each classes, and `y_pred` is the predicted labels, replace `None` # with appropriate value y_pred = None score_cm = confusion_matrix(y_true=np.argmax(y_score, axis=-1), y_pred=y_pred) # ====== plotting the results ====== # plt.figure(figsize=(16, 8)) # (ncol, nrow) plot_confusion_matrix(score_cm, labels=digits, fontsize=8, title="Score Set") plot_save(FIG_PATH)
def test_audio_generator(words_type: str) -> None: """MAKEDOC: what is test_audio_generator doing?""" logg = logging.getLogger(f"c.{__name__}.test_audio_generator") # logg.setLevel("INFO") logg.debug("Start test_audio_generator") partition, ids2labels = prepare_partitions(words_type) for fold in partition: logg.debug(f"partition[{fold}][:4]: {partition[fold][:4]}") logg.debug(f"\nlen(ids2labels): {len(ids2labels)}") for ID in ids2labels: logg.debug(f"ids2labels[{ID}]: {ids2labels[ID]}") break words = words_types[words_type] processed_folder = Path("data_split") / "mel04" data_split_paths = [processed_folder] params = { "dim": (64, 64), "batch_size": 32, "shuffle": True, "label_names": words, "data_split_paths": data_split_paths, } training_generator = AudioGenerator(partition["training"], ids2labels, **params) logg.debug(f"len(training_generator): {len(training_generator)}") val_generator = AudioGenerator(partition["validation"], ids2labels, **params) logg.debug(f"len(val_generator): {len(val_generator)}") # do not shuffle the test data params["shuffle"] = False # do not batch it, no loss of stray data at the end params["batch_size"] = 1 testing_generator = AudioGenerator(partition["testing"], ids2labels, **params) logg.debug(f"len(testing_generator): {len(testing_generator)}") X, y = training_generator[0] logg.debug(f"X.shape: {X.shape} y.shape: {y.shape}") model_param: ty.Dict[str, ty.Any] = {} model_param["num_labels"] = len(words) model_param["input_shape"] = (64, 64, 1) model_param["base_dense_width"] = 32 model_param["base_filters"] = 20 model_param["dropouts"] = [0.03, 0.01] model_param["kernel_sizes"] = [(5, 1), (3, 3), (3, 3)] model_param["pool_sizes"] = [(2, 1), (2, 2), (2, 2)] model = CNNmodel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] opt = tf.optimizers.Adam() loss = tf.keras.losses.CategoricalCrossentropy() model.compile(optimizer=opt, loss=loss, metrics=metrics) EPOCH_NUM = 5 model.fit(training_generator, validation_data=val_generator, epochs=EPOCH_NUM) eval_testing = model.evaluate(testing_generator) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") y_pred = model.predict(testing_generator) y_pred_labels = testing_generator.pred2labelnames(y_pred) y_true = testing_generator.get_true_labels() cm = confusion_matrix(y_true, y_pred_labels) fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, "Test generator", words, fscore) fig.tight_layout() plt.show()
def validation_epoch_end(self, outputs): preds = torch.cat([tmp['preds'] for tmp in outputs]) targets = torch.cat([tmp['target'] for tmp in outputs]) fig = plot_confusion_matrix(preds, targets, n_classes=10) tensorboard = self.logger.experiment tensorboard.add_figure("Confusion matrix", fig, self.current_epoch)
def make_plots(true_id, true_p4, pred_id, pred_p4, target, epoch, outpath): conf_matrix_norm = sklearn.metrics.confusion_matrix( torch.max(true_id, -1)[1], np.argmax(pred_id.detach().cpu().numpy(), axis=1), labels=range(6), normalize="true") plot_confusion_matrix(conf_matrix_norm, ["none", "ch.had", "n.had", "g", "el", "mu"], fname=outpath + 'conf_matrix_norm_test' + str(epoch), epoch=epoch) with open(outpath + '/conf_matrix_norm_test' + str(epoch) + '.pkl', 'wb') as f: pickle.dump(conf_matrix_norm, f) _, true_id = torch.max(true_id, -1) _, pred_id = torch.max(pred_id, -1) msk = (pred_id != 0) & (true_id != 0) ch_true = true_p4[msk, 0].flatten().detach().numpy() ch_pred = pred_p4[msk, 0].flatten().detach().numpy() pt_true = true_p4[msk, 1].flatten().detach().numpy() pt_pred = pred_p4[msk, 1].flatten().detach().numpy() e_true = true_p4[msk, 5].flatten().detach().numpy() e_pred = pred_p4[msk, 5].flatten().detach().numpy() eta_true = true_p4[msk, 2].flatten().detach().numpy() eta_pred = pred_p4[msk, 2].flatten().detach().numpy() sphi_true = true_p4[msk, 3].flatten().detach().numpy() sphi_pred = pred_p4[msk, 3].flatten().detach().numpy() cphi_true = true_p4[msk, 4].flatten().detach().numpy() cphi_pred = pred_p4[msk, 4].flatten().detach().numpy() figure = plot_regression(ch_true, ch_pred, "charge", np.linspace(-2, 2, 100), target, fname=outpath + 'charge_regression') figure = plot_regression(pt_true, pt_pred, "pt", np.linspace(0, 5, 100), target, fname=outpath + 'pt_regression') figure = plot_distributions(pt_true, pt_pred, "pt", np.linspace(0, 5, 100), target, fname=outpath + 'pt_distribution') figure = plot_regression(e_true, e_pred, "E", np.linspace(-1, 5, 100), target, fname=outpath + 'energy_regression') figure = plot_distributions(e_true, e_pred, "E", np.linspace(-1, 5, 100), target, fname=outpath + 'energy_distribution') figure = plot_regression(eta_true, eta_pred, "eta", np.linspace(-5, 5, 100), target, fname=outpath + 'eta_regression') figure = plot_distributions(eta_true, eta_pred, "eta", np.linspace(-5, 5, 100), target, fname=outpath + 'eta_distribution') figure = plot_regression(sphi_true, sphi_pred, "sin phi", np.linspace(-2, 2, 100), target, fname=outpath + 'sphi_regression') figure = plot_distributions(sphi_true, sphi_pred, "sin phi", np.linspace(-2, 2, 100), target, fname=outpath + 'sphi_distribution') figure = plot_regression(cphi_true, cphi_pred, "cos phi", np.linspace(-2, 2, 100), target, fname=outpath + 'cphi_regression') figure = plot_distributions(cphi_true, cphi_pred, "cos phi", np.linspace(-2, 2, 100), target, fname=outpath + 'cphi_distribution') figure = plot_particles(outpath + 'particleID1', true_id, true_p4, pred_id, pred_p4, pid=1) figure = plot_particles(outpath + 'particleID2', true_id, true_p4, pred_id, pred_p4, pid=2)
def main(): algo_batch_id = int( datetime.datetime.now().strftime('%Y%m%d%H%M%S') ) #set ID for one run, so all the algos have the same ID algo_family_generator_dict = { 'DecisionTree': { 'DT-MostPruning': generate_decision_tree_most_pruning, 'DT-MiddlePruning': generate_decision_tree_middle_pruning, 'DT-LeastPruning': generate_decision_tree_least_pruning }, 'SVM-RBF': { 'SVM-RBF-C1': generate_svm_rbf_c1, 'SVM-RBF-C5': generate_svm_rbf_c5, 'SVM-RBF-C100': generate_svm_rbf_c100 }, 'SVM-Poly': { 'SVM-Poly-C5-D2': generate_svm_poly_c5_degree2, 'SVM-Poly-C5-D3': generate_svm_poly_c5_degree3, 'SVM-Poly-C5-D4': generate_svm_poly_c5_degree4 }, 'KNN': { 'KNN-K10-P1': generate_knn_10_p1, 'KNN-K10-P2': generate_knn_10_p2, 'KNN-K5-P2': generate_knn_5_p2 }, 'GradientBoostingTree': { 'GBT-LR.01': generate_gradient_boosting_tree_lr01, 'GBT-LR.05': generate_gradient_boosting_tree_lr05, 'GBT-LR.1': generate_gradient_boosting_tree_lr1 }, 'NeuralNetwork': { 'MLP-LR.001': generate_sk_mlp_classifier_lr001, 'MLP-LR.01': generate_sk_mlp_classifier_lr01, 'MLP-LR.1': generate_sk_mlp_classifier_lr1 } } #load dataset if USE_DATASET == 'spam': df = pd.read_csv('data/spam/spambasedata.csv', sep=',') print('using the dataset stored in ./data/spam') #shuffle data before splitting to train and test df = df.sample(frac=1).reset_index(drop=True) train_frac = 0.8 train_samples = int(round(df.shape[0] * train_frac)) dirty_train_df = df.iloc[:train_samples, :] dirty_test_df = df.iloc[train_samples:, :] class_col = 'class' elif USE_DATASET == 'aps': dirty_train_df = pd.read_csv('data/aps/aps_failure_training_set.csv', na_values=['na']) dirty_test_df = pd.read_csv('data/aps/aps_failure_test_set.csv', na_values=['na']) print('using the dataset stored in ./data/aps') class_col = 'class' #clean both datasets scaler = preprocessing.MinMaxScaler() [train_df, test_df] = clean_and_scale_dataset( { 'train': dirty_train_df, 'test': dirty_test_df }, scaler=scaler, na_action=-1) #prep the datasets [train_dataset, test_dataset ], label_encoder = prep_data({ 'train': train_df, 'test': test_df }, shuffle_data=True, balance_method=BALANCE_METHOD, class_col=class_col) print('\nTRAINING DATA INFORMATION') print('{} maps to {}'.format( label_encoder.classes_, label_encoder.transform(label_encoder.classes_))) print('size of training dataset:', train_dataset.data.shape) print('class counts:\n', train_dataset.df[class_col].value_counts()) if PRETRAINED_MODEL_FILEPATH: try: algo = pickle_load_model(model_path=PRETRAINED_MODEL_FILEPATH) print('loaded algo: {} - {}'.format(algo.model_type, algo.id)) evaluate_model(algo, test_dataset, classes_list=label_encoder.classes_) return None #break out of main function early if a single file is specified except: raise Exception('failed to load specified model, aborting') detail_df = pd.DataFrame(columns=[ 'Model Name', 'Precision', 'Recall', 'F1', 'ROC-AUC', 'Accuracy', 'Balanced Accuracy', 'Training Time' ]) for algo_family, algo_generator_dict in algo_family_generator_dict.items(): print('\n\nalgorithm family:', algo_family) print('algorithms to test:', [x for x in algo_generator_dict.keys()]) algo_list = [] for algo_key, algo_generator in algo_generator_dict.items(): print('\nmodel name: {}'.format(algo_key)) algo = algo_generator_dict[algo_key](id=algo_batch_id) algo = train_model(algo, train_dataset, n_folds=N_CV, n_chunks=N_LC_CHUNKS) if SAVE_MODELS: pickle_save_model(algo, model_folder='output/' + str(algo.id) + '/models') try: evaluate_model(algo, test_dataset, classes_list=label_encoder.classes_) except: raise Exception('unable to evaluate model') algo_list.append(algo) #store algo details in dataframe detail_df = detail_df.append( { 'Model Name': algo.model_type, 'Precision': algo.precision, 'Recall': algo.recall, 'F1': algo.f1, 'ROC-AUC': algo.roc_auc, 'Accuracy': algo.accuracy, 'Balanced Accuracy': algo.balanced_accuracy, 'Training Time': algo.training_time }, ignore_index=True) if PLOT_ACTION: plot_model_family_learning_curves( algo_family, algo_list, figure_action=PLOT_ACTION, figure_path='output/' + str(algo_batch_id) + '/figures/lc', file_name=(str(algo_family))) plot_confusion_matrix(algo_family, algo_list, label_encoder.classes_, figure_action=PLOT_ACTION, figure_path='output/' + str(algo_batch_id) + '/figures/cm', file_name=(str(algo_family)))
def evaluate_model_area(model_name: str, test_words_type: str) -> None: r"""MAKEDOC: what is evaluate_model_area doing?""" logg = logging.getLogger(f"c.{__name__}.evaluate_model_area") # logg.setLevel("INFO") logg.debug("Start evaluate_model_area") # magic to fix the GPUs setup_gpus() # # VAN_opa1_lr05_bs32_en15_dsaug07_wLTall # hypa = { # "batch_size_type": "32", # "dataset_name": "aug07", # "epoch_num_type": "15", # "learning_rate_type": "03", # "net_type": "VAN", # "optimizer_type": "a1", # # "words_type": "LTall", # "words_type": train_words_type, # } # # use_validation = True # use_validation = False # dataset_name = hypa["dataset_name"] # get the model name # model_name = build_area_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") dataset_re = re.compile("_ds(.*?)_") match = dataset_re.search(model_name) if match is not None: logg.debug(f"match[1]: {match[1]}") dataset_name = match[1] train_words_type_re = re.compile("_w(.*?)[_.]") match = train_words_type_re.search(model_name) if match is not None: logg.debug(f"match[1]: {match[1]}") train_words_type = match[1] # load the model model_folder = Path("trained_models") / "area" model_path = model_folder / f"{model_name}.h5" model = tf_models.load_model(model_path) # model.summary() train_words = words_types[train_words_type] logg.debug(f"train_words: {train_words}") test_words = words_types[test_words_type] logg.debug(f"test_words: {test_words}") # input data processed_path = Path("data_proc") / f"{dataset_name}" data, labels = load_processed(processed_path, test_words) logg.debug(f"list(data.keys()): {list(data.keys())}") logg.debug(f"data['testing'].shape: {data['testing'].shape}") # evaluate on the words you trained on logg.debug("Evaluate on test data:") model.evaluate(data["testing"], labels["testing"]) # model.evaluate(data["validation"], labels["validation"]) # predict labels/cm/fscore y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, test_words) # y_pred = model.predict(data["validation"]) # cm = pred_hot_2_cm(labels["validation"], y_pred, test_words) fscore = analyze_confusion(cm, test_words) logg.debug(f"fscore: {fscore}") fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, test_words, fscore, train_words) fig_name = f"{model_name}_test{test_words_type}_cm.{{}}" cm_folder = Path("plot_results") / "cm" if not cm_folder.exists(): cm_folder.mkdir(parents=True, exist_ok=True) plot_cm_path = cm_folder / fig_name.format("png") fig.savefig(plot_cm_path) plot_cm_path = cm_folder / fig_name.format("pdf") fig.savefig(plot_cm_path) plt.show()
def climbing_game(): """ Plotting for experiments with the climbing game payoffs """ exp_dir = "Experiments/PaperExperiments/{}/".format("1591036032") baseline_dir = "Experiments/PaperExperiments/{}/".format("1598035516") with open(exp_dir + "partial_configs", "rb") as f: partial_configs = pickle.load(f) with open(baseline_dir + "partial_configs", "rb") as f: baseline_partial_configs = pickle.load(f) fig11, ax11 = init_fig() fig112, ax112 = init_fig() fig12, ax12 = init_fig() fig122, ax122 = init_fig() cps = [] ns = [] legends = [ "IQL", "IQ", "ModelS", "ModelR", "Hysteretic-Q", "Lenience", "Info-Q", "Info-Policy", "Comm-Bias", ] baseline_legends = [ "IQL", "Info-Q", "Info-Policy", "Fixed messages", "Fixed actions", ] baseline_i = 0 for i in range(len(partial_configs)): with open(exp_dir + "output{}".format(i), "rb") as f: op = pickle.load(f) rs_mean, rs_sem = split_mean(0, op, 10) opts_mean, opts_sem = split_mean(2, op, 10) cps.append(op[0][-2]) ns.append(op[0][-1]) xs = np.arange(len(rs_mean)) * 10 ax11.plot(xs, rs_mean, color=color_sequence[i], label=legends[i]) ax11.fill_between( xs, rs_mean - rs_sem, rs_mean + rs_sem, alpha=0.5, color=color_sequence[i] ) ax12.plot(xs, opts_mean * 100, color=color_sequence[i], label=legends[i]) ax12.fill_between( xs, (opts_mean - opts_sem) * 100, (opts_mean + opts_sem) * 100, alpha=0.5, color=color_sequence[i], ) if partial_configs[i]["algorithm"] in ["IQL", "InfoQ", "InfoPolicy"]: ax112.plot( xs, rs_mean, color=color_sequence[baseline_i], label=baseline_legends[baseline_i], ) ax112.fill_between( xs, rs_mean - rs_sem, rs_mean + rs_sem, alpha=0.5, color=color_sequence[baseline_i], ) ax122.plot( xs, opts_mean * 100, color=color_sequence[baseline_i], label=baseline_legends[baseline_i], ) ax122.fill_between( xs, (opts_mean - opts_sem) * 100, (opts_mean + opts_sem) * 100, alpha=0.5, color=color_sequence[i], ) baseline_i += 1 for i in range(len(baseline_partial_configs)): with open(baseline_dir + "output{}".format(i), "rb") as f: op = pickle.load(f) rs_mean, rs_sem = split_mean(0, op, 10) opts_mean, opts_sem = split_mean(2, op, 10) xs = np.arange(len(rs_mean)) * 10 ax112.plot( xs, rs_mean, color=color_sequence[baseline_i], label=baseline_legends[baseline_i], ) ax112.fill_between( xs, rs_mean - rs_sem, rs_mean + rs_sem, alpha=0.5, color=color_sequence[baseline_i], ) ax122.plot( xs, opts_mean * 100, color=color_sequence[baseline_i], label=baseline_legends[baseline_i], ) ax122.fill_between( xs, (opts_mean - opts_sem) * 100, (opts_mean + opts_sem) * 100, alpha=0.5, color=color_sequence[i], ) baseline_i += 1 xs = [0, 1000, 800, 800, 280, 800, 200, -30, 0] ys = [0, 0.885, 0.93, 1.9, 1.945, 0.82, 1, 0.95, 1.9] for i in range(len(legends)): color = color_sequence[i] if i != 5 else "#000000" ax11.text(xs[i], ys[i], legends[i], color=color) xs = [0, 1000, 800, 800, 280, 800, 200, -30, 0] ys = np.array([0, 0.885, 0.93, 1.9, 1.945, 0.82, 1, 0.95, 1.9]) * 100 for i in range(len(legends)): color = color_sequence[i] if i != 5 else "#000000" ax12.text(xs[i], ys[i], legends[i], color=color) ax11.set_xlabel("Episodes") ax11.set_ylabel("Normalized reward") ax12.set_xlabel("Episodes") ax12.set_ylabel("% of optimal actions") xs = [0, 1000, 800, 800, 280] ys = [0, 0.885, 0.93, 1.9, 1.945] for i in range(len(baseline_legends)): color = color_sequence[i] if i != 5 else "#000000" ax112.text(xs[i], ys[i], baseline_legends[i], color=color) ax112.set_xlabel("Episodes") ax112.set_ylabel("Normalized reward") xs = [0, 1000, 800, 800, 280] ys = [0, 0.885, 0.93, 1.9, 1.945] for i in range(len(baseline_legends)): color = color_sequence[i] if i != 5 else "#000000" ax122.text(xs[i], ys[i], baseline_legends[i], color=color) ax122.set_xlabel("Episodes") ax122.set_ylabel("% of optimal actions") # fig11.savefig(exp_dir + "norm_reward.pdf", format="pdf") # fig12.savefig(exp_dir + "opts.pdf", format="pdf") fig21, ax21 = plot_confusion_matrix(cps[0].astype(np.int)) fig22, ax22 = plot_confusion_matrix(cps[-2].astype(np.int)) # fig21.savefig(exp_dir + "cp_iql.pdf", format="pdf", bbox_inches="tight") # fig22.savefig(exp_dir + "cp_infoq.pdf", format="pdf", bbox_inches="tight") fig31, ax31 = plot_venn(ns[0]) fig32, ax32 = plot_venn(ns[-2])
def main(): # reset tf graph tf.reset_default_graph() # get model configuration model_configs = model_config.cnn_baseline # load data train, valid, test =\ data.load_data_with_augmentation(n_train_samples_per_class=model_configs['n_train_samples_per_class'], classes=np.asarray(model_configs['classes'])) # get number of samples per dataset n_train_samples = train.images.shape[0] n_valid_samples = valid.images.shape[0] n_test_samples = test.images.shape[0] # define input and output input_width = model_configs['input_width'] n_input = model_configs['n_input'] n_classes = np.asarray(model_configs['classes']).shape[0] # define training hyper-parameters n_epochs = model_configs['n_epochs'] minibatch_size = model_configs['minibatch_size'] learning_rate = model_configs['learning_rate'] regularization_term = model_configs['regularization_term'] keep_probability = model_configs['keep_probability'] #define conv layer architecture filter_size = model_configs['filter_size'] num_filters = model_configs['num_filters'] conv_stride = model_configs['conv_stride'] max_pool_stride = model_configs['max_pool_stride'] pool_size = model_configs['pool_size'] padding = model_configs['padding'] # define FC NN architecture fc1_size = model_configs['fc1_size'] fc2_size = model_configs['fc2_size'] # define visualziation parameters vis_layers = np.arange(0, 8) # selected filter visualization layers # define placeholders X = tf.placeholder(tf.float32, shape=(None, n_input), name="X") y = tf.placeholder(tf.int32, shape=(None, n_classes), name="y") keep_prob = tf.placeholder(tf.float32) # input reshaping X_image = tf.reshape(X, [-1, input_width, input_width, 1]) # convolutional layer 1 with tf.variable_scope("conv_1"): W_conv1 = weight_variable([filter_size, filter_size, 1, num_filters]) b_conv1 = bias_variable([num_filters]) h_conv1 = tf.nn.relu( conv2d(X_image, W_conv1, stride=conv_stride, padding=padding) + b_conv1) # convolutional output dimension conv_out_dim = np.int( np.floor((input_width - filter_size) / conv_stride + 1)) # max pooling output dimension max_pool_out_dim = np.int( np.floor((conv_out_dim - pool_size) / max_pool_stride + 1)) # max pooling layer 1 with tf.variable_scope("pool_1"): h_pool1 = max_pool(h_conv1, stride=max_pool_stride, pool_size=pool_size, padding=padding) h_pool1_flat = tf.reshape( h_pool1, [-1, max_pool_out_dim * max_pool_out_dim * num_filters]) # fully connected layer 1 with tf.variable_scope("fc_1"): W_fc1 = weight_variable( [max_pool_out_dim * max_pool_out_dim * num_filters, fc1_size]) b_fc1 = bias_variable([fc1_size]) h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1) h_fc1_dropout = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # fully connected layer 2 with tf.variable_scope("fc_2"): W_fc2 = weight_variable([fc1_size, fc2_size]) b_fc2 = bias_variable([fc2_size]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1_dropout, W_fc2) + b_fc2) h_fc2_dropout = tf.nn.dropout(h_fc2, keep_prob=keep_prob) # output layer with tf.variable_scope("output_1"): W_fc3 = weight_variable([fc2_size, n_classes]) b_fc3 = bias_variable([n_classes]) y_conv = tf.matmul(h_fc2_dropout, W_fc3) + b_fc3 # compute losses cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_conv)) W1 = tf.get_default_graph().get_tensor_by_name("fc_1/weight:0") W2 = tf.get_default_graph().get_tensor_by_name("fc_2/weight:0") W3 = tf.get_default_graph().get_tensor_by_name("output_1/weight:0") reg_loss = tf.reduce_sum(tf.pow(tf.abs(W1),2)) + tf.reduce_sum(tf.pow(tf.abs(W2),2)) + \ tf.reduce_sum(tf.pow(tf.abs(W3),2)) cost = cross_entropy + (reg_loss * regularization_term) # compute predictions and error prediction = tf.argmax(y_conv, axis=1) correct = tf.equal(tf.argmax(y_conv, axis=1), tf.argmax(y, axis=1)) error = 1 - tf.reduce_mean(tf.cast(correct, tf.float32)) # compute confusion matrix confusion_matrix = tf.confusion_matrix(tf.argmax(y, axis=1), prediction, num_classes=n_classes) # training op optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # initialize variables and session init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: init.run() # initialize cost and error variables train_iteration_errors = [] train_errors = [] valid_errors = [] test_errors = [] # calculate number of iterations per epoch train_iterations = int(n_train_samples / minibatch_size) start_time = time() for epoch in range(n_epochs): if (epoch % 10 == 0): print("--- epoch: {}".format(epoch)) # reset error each epoch epoch_train_error = 0. epoch_valid_error = 0. epoch_test_error = 0. for i in range(train_iterations): # Get next batch of training data and labels train_data_mb, train_label_mb = train.next_batch( minibatch_size) # compute error train_mb_error = error.eval(feed_dict={ X: train_data_mb, y: train_label_mb, keep_prob: 1.0 }) epoch_train_error += train_mb_error train_iteration_errors.append(train_mb_error) # training operation sess.run(optimizer, feed_dict={ X: train_data_mb, y: train_label_mb, keep_prob: keep_probability }) # compute average train epoch error train_errors.append(epoch_train_error / train_iterations) # compute valid epoch error through mini-batches valid_iterations = int(n_valid_samples / minibatch_size) for i in range(valid_iterations): valid_data_mb, valid_label_mb = valid.next_batch( minibatch_size) valid_mb_error = error.eval(feed_dict={ X: valid_data_mb, y: valid_label_mb, keep_prob: 1.0 }) epoch_valid_error += valid_mb_error avg_epoch_valid_error = epoch_valid_error / valid_iterations valid_errors.append(avg_epoch_valid_error) # compute test epoch error through mini-batches test_iterations = int(n_test_samples / minibatch_size) for i in range(test_iterations): test_data_mb, test_label_mb = test.next_batch(minibatch_size) test_mb_error = error.eval(feed_dict={ X: test_data_mb, y: test_label_mb, keep_prob: 1.0 }) epoch_test_error += test_mb_error avg_epoch_test_error = epoch_test_error / test_iterations test_errors.append(avg_epoch_test_error) end_time = time() # print training time print("training time: {0:.2f} secs".format(end_time - start_time)) # save final model save_path = saver.save(sess, "./models/{}_final.ckpt".format(MODEL_NAME)) # plot confusion matrix confusion_mat = confusion_matrix.eval(feed_dict={ X: test.images, y: test.labels, keep_prob: 1.0 }) plot_utils.plot_confusion_matrix(confusion_mat) # print final errors print_utils.print_final_error(train_errors[-1], valid_errors[-1], test_errors[-1]) # print test error based on best valid epoch print_utils.print_best_valid_epoch(train_errors, valid_errors, test_errors) print_utils.write_errors_to_file(train_errors, valid_errors, test_errors, model_configs, MODEL_NAME) # plot error vs. epoch plot_utils.plot_epoch_errors(train_errors, valid_errors, prefix=MODEL_NAME) plot_utils.plot_train_iteration_errors(train_iteration_errors, prefix=MODEL_NAME) plot_utils.plot_cnn_kernels(vis_layers, W_conv1, prefix=MODEL_NAME)
def train_attention(hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool) -> None: """MAKEDOC: what is train_attention doing?""" logg = logging.getLogger(f"c.{__name__}.train_attention") # logg.setLevel("INFO") logg.debug("Start train_attention") # build the model name model_name = build_attention_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_folder = Path("trained_models") / "attention" if not model_folder.exists(): model_folder.mkdir(parents=True, exist_ok=True) model_path = model_folder / f"{model_name}.h5" placeholder_path = model_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder info_folder = Path("info") / "attention" / model_name if not info_folder.exists(): info_folder.mkdir(parents=True, exist_ok=True) # get the word list words = words_types[hypa["words_type"]] num_labels = len(words) # load data processed_folder = Path("data_proc") processed_path = processed_folder / f"{hypa['dataset_name']}" data, labels = load_processed(processed_path, words) # concatenate train and val for final train val_data = None if use_validation: x = data["training"] y = labels["training"] val_data = (data["validation"], labels["validation"]) logg.debug("Using validation data") else: x = np.concatenate((data["training"], data["validation"])) y = np.concatenate((labels["training"], labels["validation"])) logg.debug("NOT using validation data") # the shape of each sample input_shape = data["training"][0].shape # from hypa extract model param model_param = get_model_param_attention(hypa, num_labels, input_shape) batch_size_types = {"01": 32, "02": 16} batch_size = batch_size_types[hypa["batch_size_type"]] epoch_num_types = {"01": 15, "02": 30, "03": 2, "04": 4} epoch_num = epoch_num_types[hypa["epoch_num_type"]] # magic to fix the GPUs setup_gpus() model = AttentionModel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] learning_rate_types = { "01": "fixed01", "02": "fixed02", "03": "exp_decay_step_01", "04": "exp_decay_smooth_01", "05": "clr_triangular2_01", "06": "clr_triangular2_02", "07": "clr_triangular2_03", "08": "clr_triangular2_04", "09": "clr_triangular2_05", "10": "exp_decay_smooth_02", } learning_rate_type = hypa["learning_rate_type"] lr_value = learning_rate_types[learning_rate_type] # setup opt fixed lr values if lr_value.startswith("fixed"): if lr_value == "fixed01": lr = 1e-3 elif lr_value == "fixed02": lr = 1e-4 else: lr = 1e-3 optimizer_types = { "a1": Adam(learning_rate=lr), "r1": RMSprop(learning_rate=lr) } opt = optimizer_types[hypa["optimizer_type"]] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # setup callbacks callbacks = [] # setup exp decay step / smooth if lr_value.startswith("exp_decay"): if lr_value == "exp_decay_step_01": exp_decay_part = partial(exp_decay_step, epochs_drop=5) elif lr_value == "exp_decay_smooth_01": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5) elif lr_value == "exp_decay_smooth_02": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2) lrate = LearningRateScheduler(exp_decay_part) callbacks.append(lrate) # setup cyclic learning rate if lr_value.startswith("clr_triangular2"): base_lr = 1e-5 max_lr = 1e-3 # training iteration per epoch = num samples // batch size # step size suggested = 2~8 * iterations if lr_value == "clr_triangular2_01": step_factor = 8 step_size = step_factor * x.shape[0] // batch_size elif lr_value == "clr_triangular2_02": step_factor = 2 step_size = step_factor * x.shape[0] // batch_size # target_cycles = the number of cycles we want in those epochs # it_per_epoch = num_samples // batch_size # total_iterations = it_per_epoch * epoch_num # step_size = total_iterations // target_cycles elif lr_value == "clr_triangular2_03": # the number of cycles we want in those epochs target_cycles = 4 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) elif lr_value == "clr_triangular2_04": # the number of cycles we want in those epochs target_cycles = 2 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) elif lr_value == "clr_triangular2_05": # the number of cycles we want in those epochs target_cycles = 2 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) # set bigger starting value max_lr = 1e-2 logg.debug(f"x.shape[0]: {x.shape[0]}") logg.debug(f"CLR is using step_size: {step_size}") mode = "triangular2" cyclic_lr = CyclicLR(base_lr, max_lr, step_size, mode) callbacks.append(cyclic_lr) # setup early stopping if learning_rate_type in ["01", "02", "03", "04"]: metric_to_monitor = "val_loss" if use_validation else "loss" early_stop = EarlyStopping( monitor=metric_to_monitor, patience=4, restore_best_weights=True, verbose=1, ) callbacks.append(early_stop) # model_checkpoint = ModelCheckpoint( # model_name, # monitor="val_loss", # save_best_only=True, # ) # a dict to recreate this training # FIXME this should be right before fit and have epoch_num/batch_size/lr info recap: ty.Dict[str, ty.Any] = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["model_name"] = model_name recap["version"] = "001" # logg.debug(f"recap: {recap}") recap_path = info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) results = model.fit( x, y, validation_data=val_data, epochs=epoch_num, batch_size=batch_size, callbacks=callbacks, ) results_recap: ty.Dict[str, ty.Any] = {} results_recap["model_name"] = model_name results_recap["results_recap_version"] = "002" # eval performance on the various metrics eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # compute the confusion matrix y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) # logg.debug(f"cm: {cm}") results_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") results_recap["fscore"] = fscore # save the histories results_recap["history_train"] = { mn: results.history[mn] for mn in model.metrics_names } if use_validation: results_recap["history_val"] = { f"val_{mn}": results.history[f"val_{mn}"] for mn in model.metrics_names } # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the results res_recap_path = info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) # if cyclic_lr was used save the history if lr_value.startswith("clr_triangular2"): logg.debug(f"cyclic_lr.history.keys(): {cyclic_lr.history.keys()}") clr_recap = {} for metric_name, values in cyclic_lr.history.items(): clr_recap[metric_name] = list(float(v) for v in values) clr_recap_path = info_folder / "clr_recap.json" clr_recap_path.write_text(json.dumps(clr_recap, indent=4)) # save the trained model model.save(model_path) placeholder_path.write_text(f"Trained. F-score: {fscore}")
invalids = dataset.dev.remove_invalid_examples(typechecker) print 'removed', len(invalids), 'invalid dev examples' model = get_model(config, dataset.featurizer.vocab, typechecker) trainer = Trainer(todir, model, typechecker, scoring_labels) best_scores = trainer.train(dataset.train, dataset.dev, max_epoch=config.max_epoch) model.save_weights(os.path.join(todir, 'best_weights'), overwrite=True) with open(os.path.join(todir, 'classification_report.txt'), 'wb') as f: report = classification_report(best_scores['targs'], best_scores['preds'], target_names=dataset.featurizer.vocab['rel'].index2word) f.write(report) print report from plot_utils import plot_confusion_matrix, plot_histogram, get_sorted_labels order, labels, counts = get_sorted_labels(best_scores['targs'], dataset.featurizer.vocab) fig = plot_confusion_matrix(best_scores['targs'], best_scores['preds'], order, labels) fig.savefig(os.path.join(todir, 'confusion_matrix.png')) fig = plot_histogram(labels, counts) fig.savefig(os.path.join(todir, 'relation_histogram.png')) with open(os.path.join(todir, 'best_scores.json'), 'wb') as f: del best_scores['preds'] del best_scores['targs'] del best_scores['ids'] json.dump(best_scores, f, sort_keys=True) print 'best scores' pprint(best_scores)
big_df = pandas.read_pickle(args.pkl) big_df["pred_phi"] = np.arctan2(np.sin(big_df["pred_phi"]), np.cos(big_df["pred_phi"])) #msk = (big_df["{}_pid".format(args.target)] != 0) & ((big_df["pred_pid"] != 0)) msk = np.ones(len(big_df), dtype=np.bool) confusion2 = sklearn.metrics.confusion_matrix( big_df["{}_pid".format(args.target)][msk], big_df["pred_pid"][msk], labels=class_labels ) print(class_labels) fig, ax = plot_confusion_matrix( cm=confusion2, target_names=[int(x) for x in class_labels], normalize=True ) acc = sklearn.metrics.accuracy_score(big_df["{}_pid".format(args.target)][msk], big_df["pred_pid"][msk]) plt.title("") #plt.title("ML-PF, accuracy={:.2f}".format(acc)) plt.ylabel("{} PF candidate PID\nassociated to input PFElement".format(args.target)) plt.xlabel("predicted PID\nML-PF candidate,\naccuracy: {:.2f}".format(acc)) cms_label(x0=0.20, x1=0.26, y=0.95) sample_label(ax, y=0.995) plt.savefig(osp.join(osp.dirname(args.pkl),"confusion_mlpf.pdf"), bbox_inches="tight") prepare_resolution_plots(big_df, 211, bins[211], target=args.target, outpath=osp.dirname(args.pkl)) prepare_resolution_plots(big_df, 130, bins[130], target=args.target, outpath=osp.dirname(args.pkl)) prepare_resolution_plots(big_df, 11, bins[11], target=args.target, outpath=osp.dirname(args.pkl)) prepare_resolution_plots(big_df, 13, bins[13], target=args.target, outpath=osp.dirname(args.pkl))