def plot_multiple_xy_results(predictions, y_test, target_cols, ind, folder_name="nn"): """ Plotea lo resutltados de la red, cuando es con más de un input Parameters ---------- predictions : array predicciones. real : array valores reales. names : list nombre de las columans target. folder_name : string, optional directorio donde se dejaran las carpetas. The default is "nn". Returns ------- Plot. """ try_create_folder("results") try_create_folder(f"results/{folder_name}") for i in range(predictions.shape[1]): print("Resultados:", target_cols[i], "...") predi = predictions[:, i] reali = y_test[:, i] namei = target_cols[i] plot_xy_results(predi, reali, index=ind, name=namei, folder_name=folder_name)
def plot_sequence(predictions, real, fechas, indice, folder_name="nn"): """ Plot sequence de la secuecnia Parameters ---------- predictions : array predicciones. real : array valores reales. fechas : array array de fechas. indice : TYPE indice de la columna. Returns ------- plot de prediciones vs real. """ letter_size = 20 new_fechas = [] for fecha in fechas: fecha = fecha[0:10] new_fechas.append(fecha) plt.style.use('dark_background') fig, ax = plt.subplots(1, figsize=(20, 12)) ax.plot(new_fechas, real, 'gold', linewidth=2) ax.plot(new_fechas, predictions, 'orangered', linewidth=2) ax.set_xlabel('Tiempo', fontname="Arial", fontsize=letter_size) ax.set_ylabel('Predicción vs Real', fontname="Arial", fontsize=letter_size + 2) ax.set_title(f"Predicciones vs real {str(indice)}", fontname="Arial", fontsize=letter_size + 10) ax.legend(['real', 'predicción'], loc='upper left', prop={'size': letter_size + 5}) # Tamaño de los ejes for tick in ax.get_xticklabels(): tick.set_fontsize(letter_size) for tick in ax.get_yticklabels(): tick.set_fontsize(letter_size) try_create_folder(f"results/{folder_name}") plt.xticks(rotation=75) plt.show() fig.savefig(f"results/{folder_name}/{indice}_results.png")
def mae_evaluation(predictions, real, names, nn, folder_name="nn_architectures", filename="algo"): """ Plotea lo resutltados de la red, cuando es con más de un input Parameters ---------- predictions : array predicciones. real : array valores reales. names : list nombre de las columans target. names : model tf model. folder_name : string, optional directorio donde se dejaran las carpetas. The default is "nn". Returns ------- Plot. """ try_create_folder("results") try_create_folder(f"results/{folder_name}") output = [] for i in range(predictions.shape[1]): print("Resultados:", names[i], "...") name = names[i] predi = predictions[:, i] reali = real[:, i] mae = np.abs(predi - reali).mean() mae = round(mae, 4) output.append([name, mae, filename]) output = pd.DataFrame(output, columns=["variable", "mae", "architecture"]) output.to_csv(f"results/{folder_name}/{filename}.csv", index=False) plot_model(nn, to_file=f"results/{folder_name}/{filename}.png", show_shapes=True, show_layer_names=True) return output
def training_history(history, model_name="NN", filename="NN"): """ Según el historial de entrenamiento que hubo plotear el historial hacía atrás de las variables Parameters ---------- history : list lista con errores de validación y training. model_name : string, optional nombre del modelo. The default is "Celdas LSTM". filename : string, optional nombre del archivo. The default is "LSTM". Returns ------- None. """ size_training = len(history.history['val_loss']) fig = plot_instance_training( history, size_training, model_name, filename + "_ultimas:" + str(size_training) + "epocas") fig = plot_instance_training( history, int(1.5 * size_training / 2), model_name, filename + "_ultimas:" + str(1.5 * size_training / 2) + "epocas") # guardar el resultado de entrenamiento de la lstm print(os.getcwd()) try_create_folder("results") fig.savefig(f"results/{model_name}_training.png") fig = plot_instance_training( history, int(size_training / 2), model_name, filename + "_ultimas:" + str(size_training / 2) + "epocas") fig = plot_instance_training( history, int(size_training / 3), model_name, filename + "_ultimas:" + str(size_training / 3) + "epocas") fig = plot_instance_training( history, int(size_training / 4), model_name, filename + "_ultimas:" + str(size_training / 4) + "epocas") print(fig)
validation_split=args.validation_size, batch_size=args.batch_size, epochs=args.epochs, shuffle=False, verbose=1, callbacks=callbacks) # ver resutados de entrenamiento training_history(history, model_name="cnn", filename="cnn") # hacer predicciones en el test predictions = cnn.predict(x_test) # gráficar los resultados folder_name = file.replace(".csv", "").replace("featured", "cnn_corr") # visualizaciones plot_multiple_xy_results(predictions, y_test, target_cols, indice, folder_name=folder_name) # arquitectura usada en string architecture = get_model_summary(cnn) print(architecture) # dataframe de salida output = mae_evaluation(predictions, y_test, target_cols, cnn, folder_name=folder_results, filename=f"{indice}-{folder_name}") print(output) path_nn = "results/models" try_create_folder(path_nn) cnn.save(path_nn+f"/{indice}_model_cnn.h5")
# features features = list(df.columns) features.remove("year") for col in target_cols: features.remove(col) # fechas date = df[["year"]] date.reset_index(drop=True, inplace=True) # targets targets = df[target_cols] targets.reset_index(drop=True, inplace=True) # crear folder featured try_create_folder(path + "featured") # eliminar por correlación las varibles y guardar los dataset para probarlos for thres in range(10, 19): thres = thres / 20 print("featured dataset para un threshold de:", thres) # seleccionar solo las carácteristicas dataset = df[features] # eliminar por correlación dataset = selection_by_correlation(dataset, threshold=thres) dataset = pd.concat([dataset, date], axis=1) dataset = pd.concat([dataset, targets], axis=1) print("shape dataset:", dataset.shape) # guardar el dataset eliminado por corr thres = str(thres).replace(".", "_") dataset.to_csv(path + "featured/" + f"featured_{thres}.csv", index=False)