Esempio n. 1
0
    # ================ Split definition =================
    [train_ids, val_ids, test_ids] = utilsNN.split_train_validation_and_test(tot_examples,
                                                                             val_percentage=val_perc,
                                                                             test_percentage=test_perc)

    print("Train examples (total:{}) :{}".format(len(train_ids), folders_to_read[train_ids]))
    print("Validation examples (total:{}) :{}:".format(len(val_ids), folders_to_read[val_ids]))
    print("Test examples (total:{}) :{}".format(len(test_ids), folders_to_read[test_ids]))

    print("Selecting and generating the model....")
    now = datetime.utcnow().strftime("%Y_%m_%d_%H_%M")
    model_name = F'{model_name_user}_{now}'

    # ******************* Selecting the model **********************
    model = select_2d_model(config)
    plot_model(model, to_file=join(output_folder,F'{model_name}.png'), show_shapes=True)

    print("Saving split information...")
    file_name_splits = join(split_info_folder, F'{model_name}.txt')
    utilsNN.save_splits(file_name=file_name_splits, folders_to_read=folders_to_read,
                        train_idx=train_ids, val_idx=val_ids, test_idx=test_ids)

    print("Compiling model ...")
    model.compile(loss=loss_func, optimizer=optimizer, metrics=eval_metrics)

    print("Getting callbacks ...")

    [logger, save_callback, stop_callback] = utilsNN.get_all_callbacks(model_name=model_name,
                                                                       early_stopping_func=F'val_{eval_metrics[0].__name__}',
                                                                       weights_folder=weights_folder,
Esempio n. 2
0
def test_model(config):
    input_folder = config[PredictionParams.input_folder]
    output_folder = config[PredictionParams.output_folder]
    output_fields = config[ProjTrainingParams.output_fields]
    model_weights_file = config[PredictionParams.model_weights_file]
    output_imgs_folder = config[PredictionParams.output_imgs_folder]
    field_names_model = config[ProjTrainingParams.fields_names]
    field_names_obs = config[ProjTrainingParams.fields_names_obs]
    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    run_name = config[TrainingParams.config_name]
    norm_type = config[ProjTrainingParams.norm_type]

    output_imgs_folder = join(output_imgs_folder, run_name)
    create_folder(output_imgs_folder)

    # *********** Chooses the proper model ***********
    print('Reading model ....')
    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    plot_model(model,
               to_file=join(output_folder, F'running.png'),
               show_shapes=True)

    # *********** Reads the weights***********
    print('Reading weights ....')
    model.load_weights(model_weights_file)

    # *********** Read files to predict***********
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])

    z_layers = [0]
    var_file = join(input_folder, "cov_mat", "tops_ias_std.nc")
    field_names_std = config[ProjTrainingParams.fields_names_var]
    if len(field_names_std) > 0:
        input_fields_std = read_netcdf(var_file, field_names_std, z_layers)
    else:
        input_fields_std = []

    cmap_out = chooseCMAP(output_fields)
    cmap_model = chooseCMAP(field_names_model)
    cmap_obs = chooseCMAP(field_names_obs)
    cmap_std = chooseCMAP(field_names_std)

    tot_rows = 891
    tot_cols = 1401

    all_whole_mean_times = []
    all_whole_sum_times = []
    all_whole_rmse = []

    # np.random.shuffle(model_files)  # TODO this is only for testing
    for id_file, c_file in enumerate(model_files):
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        if day_of_year != 5:
            continue

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        input_fields_model = read_netcdf(model_file, field_names_model,
                                         z_layers)
        input_fields_obs = read_netcdf(obs_file, field_names_obs, z_layers)
        output_field_increment = read_netcdf(inc_file, output_fields, z_layers)

        # ******************* Normalizing and Cropping Data *******************
        whole_cnn = np.zeros((891, 1401))
        whole_y = np.zeros((891, 1401))

        this_file_times = []

        start_row = 0
        donerow = False
        while not (donerow):
            donecol = False
            start_col = 0
            while not (donecol):
                # print(F"{start_row}-{start_row+rows} {start_col}-{start_col+cols}")
                # Generate the proper inputs for the NN
                try:
                    perc_ocean = .05
                    input_data, y_data = generateXandY(input_fields_model,
                                                       input_fields_obs,
                                                       input_fields_std,
                                                       output_field_increment,
                                                       field_names_model,
                                                       field_names_obs,
                                                       field_names_std,
                                                       output_fields,
                                                       start_row,
                                                       start_col,
                                                       rows,
                                                       cols,
                                                       norm_type=norm_type,
                                                       perc_ocean=perc_ocean)
                except Exception as e:
                    print(F"Land for {c_file} row:{start_row} col:{start_col}")
                    start_col, donecol = verifyBoundaries(
                        start_col, cols, tot_cols)
                    continue

                # ******************* Replacing nan values *********
                # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
                input_data_nans = np.isnan(input_data)
                input_data = np.nan_to_num(input_data, nan=0)
                y_data = np.nan_to_num(y_data, nan=-0.5)

                X = np.expand_dims(input_data, axis=0)
                Y = np.expand_dims(y_data, axis=0)

                # Make the prediction of the network
                start = time.time()
                output_nn_original = model.predict(X, verbose=1)
                toc = time.time() - start
                this_file_times.append(toc)
                # print(F"Time to get prediction {toc:0.3f} seconds")
                # PLOT RAW DATA
                # import matplotlib.pyplot as plt
                # plt.imshow(np.flip(output_nn_original[0,:,:,0], axis=0))
                # plt.imshow(np.flip(Y[0,:,:,0], axis=0))
                # plt.show()
                # Original MSE
                # print(F"MSE: {mean_squared_error(Y[0,:,:,0], output_nn_original[0,:,:,0])}")

                # Make nan all values inside the land
                land_indexes = Y == -0.5
                output_nn_original[land_indexes] = np.nan

                # ====================== PLOTS RAW DATA  NOT NECESSARY =============================
                # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False)
                # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))),
                #                             var_names=[F"in_model_{x}" for x in field_names_model] +
                #                                       [F"in_obs_{x}" for x in field_names_obs] +
                #                                       [F"in_var_{x}" for x in field_names_std] +
                #                                       [F"out_inc_{x}" for x in output_fields] +
                #                                       [F"cnn_{x}" for x in output_fields],
                #                             file_name=F"RAW_Input_and_CNN_{c_file}_{start_row:03d}_{start_col:03d}",
                #                             rot_90=True,
                #                             cols_per_row=len(field_names_model),
                #                             title=F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}")

                # Denormalize the data to the proper units in each field
                denorm_cnn_output = np.zeros(output_nn_original.shape)
                denorm_y = np.zeros(Y.shape)

                # ==== Denormalizingallinput and outputs
                denorm_cnn_output = denormalizeData(output_nn_original,
                                                    output_fields,
                                                    PreprocParams.type_inc,
                                                    norm_type)
                denorm_y = denormalizeData(Y, output_fields,
                                           PreprocParams.type_inc, norm_type)
                input_types = [
                    PreprocParams.type_model for i in input_fields_model
                ] + [PreprocParams.type_obs for i in input_fields_obs
                     ] + [PreprocParams.type_std for i in input_fields_std]
                denorm_input = denormalizeData(
                    input_data,
                    field_names_model + field_names_obs + field_names_std,
                    input_types, norm_type)

                # Recover the original land areas, they are lost after denormalization
                denorm_input[input_data_nans] = np.nan
                denorm_y[land_indexes] = np.nan

                # Remove the 'extra dimension'
                denorm_cnn_output = np.squeeze(denorm_cnn_output)
                denorm_y = np.squeeze(denorm_y)
                whole_cnn[
                    start_row:start_row + rows, start_col:start_col +
                    cols] = denorm_cnn_output  # Add the the 'whole prediction'
                whole_y[start_row:start_row + rows, start_col:start_col +
                        cols] = denorm_y  # Add the the 'whole prediction'

                # if np.random.random() > .99: # Plot 1% of the times
                if True:  # Plot 1% of the times
                    if len(
                            denorm_cnn_output.shape
                    ) == 2:  # In this case we only had one output and we need to make it 'array' to plot
                        denorm_cnn_output = np.expand_dims(denorm_cnn_output,
                                                           axis=2)
                        denorm_y = np.expand_dims(denorm_y, axis=2)

                    # Compute RMSE
                    rmse_cnn = np.zeros(len(output_fields))
                    for i in range(len(output_fields)):
                        ocean_indexes = np.logical_not(
                            np.isnan(denorm_y[:, :, i]))
                        rmse_cnn[i] = np.sqrt(
                            mean_squared_error(
                                denorm_cnn_output[:, :, i][ocean_indexes],
                                denorm_y[:, :, i][ocean_indexes]))

                    # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar, maxcbar=maxcbar)
                    viz_obj = EOAImageVisualizer(
                        output_folder=output_imgs_folder, disp_images=False)

                    # ================== DISPLAYS ALL INPUTS AND OUTPUTS DENORMALIZED ===================
                    # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))),
                    viz_obj.plot_2d_data_np_raw(
                        np.concatenate(
                            (denorm_input.swapaxes(0,
                                                   2), denorm_y.swapaxes(0, 2),
                             denorm_cnn_output.swapaxes(0, 2))),
                        var_names=[F"in_model_{x}"
                                   for x in field_names_model] +
                        [F"in_obs_{x}" for x in field_names_obs] +
                        [F"in_var_{x}" for x in field_names_std] +
                        [F"out_inc_{x}" for x in output_fields] +
                        [F"cnn_{x}" for x in output_fields],
                        file_name=
                        F"Input_and_CNN_{c_file}_{start_row:03d}_{start_col:03d}",
                        cmap=cmap_model + cmap_obs + cmap_std + cmap_out +
                        cmap_out,
                        rot_90=True,
                        cols_per_row=len(field_names_model),
                        title=
                        F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}"
                    )

                    # =========== Making the same color bar for desired output and the NN =====================
                    mincbar = [
                        np.nanmin(denorm_y[:, :, x])
                        for x in range(denorm_cnn_output.shape[-1])
                    ]
                    maxcbar = [
                        np.nanmax(denorm_y[:, :, x])
                        for x in range(denorm_cnn_output.shape[-1])
                    ]
                    error = (denorm_y - denorm_cnn_output).swapaxes(0, 2)
                    mincbarerror = [
                        np.nanmin(error[i, :, :])
                        for i in range(len(output_fields))
                    ]
                    maxcbarerror = [
                        np.nanmax(error[i, :, :])
                        for i in range(len(output_fields))
                    ]
                    viz_obj = EOAImageVisualizer(
                        output_folder=output_imgs_folder,
                        disp_images=False,
                        mincbar=mincbar + mincbar + mincbarerror,
                        maxcbar=maxcbar + maxcbar + maxcbarerror)

                    # ================== Displays CNN and TSIS with RMSE ================
                    viz_obj.output_folder = join(output_imgs_folder,
                                                 'JoinedErrrorCNN')
                    cmap = chooseCMAP(output_fields)
                    error_cmap = cmocean.cm.diff
                    viz_obj.plot_2d_data_np_raw(
                        np.concatenate((denorm_cnn_output.swapaxes(
                            0, 2), denorm_y.swapaxes(0, 2), error),
                                       axis=0),
                        var_names=[F"CNN INC {x}" for x in output_fields] +
                        [F"TSIS INC {x}" for x in output_fields] +
                        [F'RMSE {c_rmse_cnn:0.4f}' for c_rmse_cnn in rmse_cnn],
                        file_name=
                        F"AllError_{c_file}_{start_row:03d}_{start_col:03d}",
                        rot_90=True,
                        cmap=cmap + cmap + [error_cmap],
                        cols_per_row=len(output_fields),
                        title=F"{output_fields} RMSE: {np.mean(rmse_cnn):0.5f}"
                    )

                start_col, donecol = verifyBoundaries(start_col, cols,
                                                      tot_cols)
                # Column for
            start_row, donerow = verifyBoundaries(start_row, rows, tot_rows)
            # Row for

        # ======= Plots whole output with RMSE
        mincbar = np.nanmin(whole_y) / 2
        maxcbar = np.nanmax(whole_y) / 2
        error = whole_y - whole_cnn
        mincbarerror = np.nanmin(error) / 2
        maxcbarerror = np.nanmax(error) / 2
        no_zero_ids = np.count_nonzero(whole_cnn)

        rmse_cnn = np.sqrt(np.nansum((whole_y - whole_cnn)**2) / no_zero_ids)
        all_whole_rmse.append(rmse_cnn)
        all_whole_mean_times.append(np.mean(np.array(this_file_times)))
        all_whole_sum_times.append(np.sum(np.array(this_file_times)))

        if np.random.random(
        ) > .9 or day_of_year == 353:  # Plot 10% of the times
            viz_obj = EOAImageVisualizer(
                output_folder=output_imgs_folder,
                disp_images=False,
                mincbar=mincbar + mincbar + mincbarerror,
                maxcbar=maxcbar + maxcbar + maxcbarerror)
            # mincbar=[-5, -5, -1],
            # maxcbar=[10, 10, 1])

            # ================== Displays CNN and TSIS with RMSE ================
            viz_obj.output_folder = join(output_imgs_folder,
                                         'WholeOutput_CNN_TSIS')
            viz_obj.plot_2d_data_np_raw(
                [
                    np.flip(whole_cnn, axis=0),
                    np.flip(whole_y, axis=0),
                    np.flip(error, axis=0)
                ],
                var_names=[F"CNN INC {x}" for x in output_fields] +
                [F"TSIS INC {x}"
                 for x in output_fields] + [F'RMSE {rmse_cnn:0.4f}'],
                file_name=F"WholeOutput_CNN_TSIS_{c_file}",
                rot_90=False,
                cols_per_row=3,
                cmap=cmocean.cm.algae,
                title=F"{output_fields} RMSE: {np.mean(rmse_cnn):0.5f}")
Esempio n. 3
0
def singleModel(config):
    input_folder = config[PredictionParams.input_folder]
    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    model_field_names = config[ProjTrainingParams.fields_names]
    obs_field_names = config[ProjTrainingParams.fields_names_obs]
    output_fields = config[ProjTrainingParams.output_fields]
    run_name = config[TrainingParams.config_name]
    output_folder = join(config[PredictionParams.output_imgs_folder],
                         'MODEL_VISUALIZATION', run_name)
    norm_type = config[ProjTrainingParams.norm_type]

    model_weights_file = config[PredictionParams.model_weights_file]

    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    create_folder(output_folder)
    plot_model(model,
               to_file=join(output_folder, F'running.png'),
               show_shapes=True)

    print('Reading weights ....')
    model.load_weights(model_weights_file)

    # # All Number of parameters
    print(F' Number of parameters: {model.count_params()}')
    # Number of parameters by layer
    print(F' Number of parameters first CNN: {model.layers[1].count_params()}')

    # Example of plotting the filters of a single layer
    print("Printing layer names:")
    print_layer_names(model)
    # plot_cnn_filters_by_layer(model.layers[1], 'First set of filters')  # The harcoded 1 should change by project

    # *********** Read files to predict***********
    # # ========= Here you need to build your test input different in each project ====
    all_files = os.listdir(input_folder)
    all_files.sort()

    # ========= Here you need to build your test input different in each project ====
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])

    z_layers = [0]
    var_file = join(input_folder, "cov_mat", "tops_ias_std.nc")
    var_field_names = config[ProjTrainingParams.fields_names_var]
    if len(var_field_names) > 0:
        input_fields_var = read_netcdf(var_file, var_field_names, z_layers)
    else:
        input_fields_var = []

    np.random.shuffle(model_files)  # TODO this is only for testing
    for id_file, c_file in enumerate(model_files):
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        z_layers = [0]
        input_fields_model = read_netcdf(model_file, model_field_names,
                                         z_layers)
        input_fields_obs = read_netcdf(obs_file, obs_field_names, z_layers)
        output_field_increment = read_netcdf(inc_file, output_fields, z_layers)

        # ******************* Normalizing and Cropping Data *******************
        for start_row in np.arange(0, 891 - rows, rows):
            for start_col in np.arange(0, 1401 - cols, cols):
                try:
                    input_data, y_data = generateXandY(input_fields_model,
                                                       input_fields_obs,
                                                       input_fields_var,
                                                       output_field_increment,
                                                       model_field_names,
                                                       obs_field_names,
                                                       var_field_names,
                                                       output_fields,
                                                       start_row,
                                                       start_col,
                                                       rows,
                                                       cols,
                                                       norm_type=norm_type)
                except Exception as e:
                    print(
                        F"Failed for {c_file} row:{start_row} col:{start_col}")
                    continue

                X_nan = np.expand_dims(input_data, axis=0)
                Y_nan = np.expand_dims(y_data, axis=0)

                # ******************* Replacing nan values *********
                # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
                X = np.nan_to_num(X_nan, nan=0)
                Y = np.nan_to_num(Y_nan, nan=-0.5)

                output_nn = model.predict(X, verbose=1)
                output_nn[np.isnan(Y_nan)] = np.nan
                # =========== Output from the last layer (should be the same as output_NN
                print("Evaluating all intermediate layers")
                inp = model.input  # input placeholder
                outputs = [
                    layer.output for layer in model.layers[1:]
                    if layer.name.find("conv") != -1
                ]  # Displaying only conv layers
                # All evaluation functions (used to call the model up to each layer)
                functors = [K.function([inp], [out]) for out in outputs]
                # Outputs for every intermediate layer
                layer_outs = [func([X]) for func in functors]

                for layer_to_plot in range(0, len(outputs)):
                    title = F'Layer {layer_to_plot}_{outputs[layer_to_plot].name}. {c_file}_{start_row:03d}_{start_col:03d}'
                    file_name = F'{c_file}_{start_row:03d}_{start_col:03d}_lay_{layer_to_plot}'
                    plot_intermediate_2dcnn_feature_map(
                        layer_outs[layer_to_plot][0],
                        input_data=X_nan,
                        desired_output_data=Y_nan,
                        nn_output_data=output_nn,
                        input_fields=model_field_names + obs_field_names +
                        var_field_names,
                        title=title,
                        output_folder=output_folder,
                        file_name=file_name,
                        disp_images=False)
Esempio n. 4
0
def test_model(config):
    input_folder = config[PredictionParams.input_folder]
    output_folder = config[PredictionParams.output_folder]
    output_fields = config[ProjTrainingParams.output_fields]
    model_weights_file = config[PredictionParams.model_weights_file]
    output_imgs_folder = config[PredictionParams.output_imgs_folder]
    field_names_model = config[ProjTrainingParams.fields_names]
    field_names_obs = config[ProjTrainingParams.fields_names_obs]
    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    run_name = config[TrainingParams.config_name]
    norm_type = config[ProjTrainingParams.norm_type]

    output_imgs_folder = join(output_imgs_folder, run_name)
    create_folder(output_imgs_folder)

    # *********** Chooses the proper model ***********
    print('Reading model ....')

    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    plot_model(model,
               to_file=join(output_folder, F'running.png'),
               show_shapes=True)

    # *********** Reads the weights***********
    print('Reading weights ....')
    model.load_weights(model_weights_file)

    # *********** Read files to predict***********
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])

    z_layers = [0]
    var_file = join(input_folder, "cov_mat", "tops_ias_std.nc")
    field_names_std = config[ProjTrainingParams.fields_names_var]
    if len(field_names_std) > 0:
        input_fields_std = read_netcdf(var_file, field_names_std, z_layers)
    else:
        input_fields_std = []

    cmap_out = chooseCMAP(output_fields)
    cmap_model = chooseCMAP(field_names_model)
    cmap_obs = chooseCMAP(field_names_obs)
    cmap_std = chooseCMAP(field_names_std)

    tot_rows = 891
    tot_cols = 1401

    all_whole_mean_times = []
    all_whole_sum_times = []
    all_whole_rmse = []

    # np.random.shuffle(model_files)  # TODO this is only for testing
    for id_file, c_file in enumerate(model_files):
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        input_fields_model = read_netcdf(model_file, field_names_model,
                                         z_layers)
        input_fields_obs = read_netcdf(obs_file, field_names_obs, z_layers)
        output_field_increment = read_netcdf(inc_file, output_fields, z_layers)

        # ******************* Normalizing and Cropping Data *******************
        this_file_times = []

        try:
            perc_ocean = .01
            input_data, y_data = generateXandY(input_fields_model,
                                               input_fields_obs,
                                               input_fields_std,
                                               output_field_increment,
                                               field_names_model,
                                               field_names_obs,
                                               field_names_std,
                                               output_fields,
                                               0,
                                               0,
                                               grows,
                                               gcols,
                                               norm_type=norm_type,
                                               perc_ocean=perc_ocean)
        except Exception as e:
            print(F"Exception {e}")

        # ******************* Replacing nan values *********
        # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
        input_data_nans = np.isnan(input_data)
        input_data = np.nan_to_num(input_data, nan=0)
        y_data = np.nan_to_num(y_data, nan=-0.5)

        X = np.expand_dims(input_data, axis=0)
        Y = np.expand_dims(y_data, axis=0)

        # Make the prediction of the network
        start = time.time()
        output_nn_original = model.predict(X, verbose=1)
        toc = time.time() - start
        this_file_times.append(toc)

        # Make nan all values inside the land
        land_indexes = Y == -0.5
        output_nn_original[land_indexes] = np.nan

        # ==== Denormalizingallinput and outputs
        denorm_cnn_output = denormalizeData(output_nn_original, output_fields,
                                            PreprocParams.type_inc, norm_type)
        denorm_y = denormalizeData(Y, output_fields, PreprocParams.type_inc,
                                   norm_type)
        input_types = [PreprocParams.type_model
                       for i in input_fields_model] + [
                           PreprocParams.type_obs for i in input_fields_obs
                       ] + [PreprocParams.type_std for i in input_fields_std]
        denorm_input = denormalizeData(
            input_data, field_names_model + field_names_obs + field_names_std,
            input_types, norm_type)

        # Recover the original land areas, they are lost after denormalization
        denorm_y[land_indexes] = np.nan

        # Remove the 'extra dimension'
        denorm_cnn_output = np.squeeze(denorm_cnn_output)
        denorm_y = np.squeeze(denorm_y)
        whole_cnn = denorm_cnn_output  # Add the the 'whole prediction'
        whole_y = denorm_y  # Add the the 'whole prediction'

        if len(
                denorm_cnn_output.shape
        ) == 2:  # In this case we only had one output and we need to make it 'array' to plot
            denorm_cnn_output = np.expand_dims(denorm_cnn_output, axis=2)
            denorm_y = np.expand_dims(denorm_y, axis=2)

        # Compute RMSE
        # rmse_cnn = np.zeros(len(output_fields))
        # for i in range(len(output_fields)):
        #     ocean_indexes = np.logical_not(np.isnan(denorm_y[:,:,i]))
        #     rmse_cnn[i] = np.sqrt(mean_squared_error(denorm_cnn_output[:,:,i][ocean_indexes], denorm_y[:,:,i][ocean_indexes]))

        # ================== DISPLAYS ALL INPUTS AND OUTPUTS DENORMALIZED ===================
        # Adding back mask to all the input variables
        denorm_input[input_data_nans] = np.nan

        # ======= Plots whole output with RMSE
        mincbar = np.nanmin(whole_y)
        maxcbar = np.nanmax(whole_y)
        error = whole_y - whole_cnn
        mincbarerror = np.nanmin(error)
        maxcbarerror = np.nanmax(error)
        no_zero_ids = np.count_nonzero(whole_cnn)

        if output_fields[
                0] == 'srfhgt':  # This should only be for SSH to adjust the units
            whole_cnn /= 9.81
            whole_y = np.array(whole_y) / 9.81

        rmse_cnn = np.sqrt(np.nansum((whole_y - whole_cnn)**2) / no_zero_ids)

        all_whole_rmse.append(rmse_cnn)
        all_whole_mean_times.append(np.mean(np.array(this_file_times)))
        all_whole_sum_times.append(np.sum(np.array(this_file_times)))

        # if day_of_year == 353: # Plot 10% of the times
        if True:  # Plot 10% of the times

            # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar, maxcbar=maxcbar)
            viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder,
                                         disp_images=False)

            # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))),
            viz_obj.plot_2d_data_np_raw(
                np.concatenate(
                    (denorm_input.swapaxes(0, 2), denorm_y.swapaxes(0, 2),
                     denorm_cnn_output.swapaxes(0, 2))),
                var_names=[F"in_model_{x}" for x in field_names_model] +
                [F"in_obs_{x}" for x in field_names_obs] +
                [F"in_var_{x}" for x in field_names_std] +
                [F"out_inc_{x}"
                 for x in output_fields] + [F"cnn_{x}" for x in output_fields],
                file_name=F"Global_Input_and_CNN_{c_file}",
                rot_90=True,
                cmap=cmap_model + cmap_obs + cmap_std + cmap_out + cmap_out,
                cols_per_row=len(field_names_model),
                title=
                F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}"
            )

            minmax = getMinMaxPlot(output_fields)[0]
            viz_obj = EOAImageVisualizer(
                output_folder=output_imgs_folder,
                disp_images=False,
                # mincbar=mincbar + mincbar + mincbarerror,
                # maxcbar=maxcbar + maxcbar + maxcbarerror)
                # mincbar=[minmax[0], minmax[0], max(minmax[0],-1)],
                # maxcbar=[minmax[1], minmax[1], min(minmax[1],1)])
                mincbar=[minmax[0], minmax[0], -1],
                maxcbar=[minmax[1], minmax[1], 1])

            # ================== Displays CNN and TSIS with RMSE ================
            error_cmap = cmocean.cm.diff
            viz_obj.output_folder = join(output_imgs_folder,
                                         'WholeOutput_CNN_TSIS')
            viz_obj.plot_2d_data_np_raw(
                [
                    np.flip(whole_cnn, axis=0),
                    np.flip(whole_y, axis=0),
                    np.flip(error, axis=0)
                ],
                # var_names=[F"CNN INC {x}" for x in output_fields] + [F"TSIS INC {x}" for x in output_fields] + [F'TSIS - CNN (Mean RMSE {rmse_cnn:0.4f} m)'],
                var_names=[F"CNN increment SSH" for x in output_fields] +
                [F"TSIS increment SSH" for x in output_fields] +
                [F'TSIS - CNN \n (Mean RMSE {rmse_cnn:0.4f} m)'],
                file_name=F"Global_WholeOutput_CNN_TSIS_{c_file}",
                rot_90=False,
                cmap=cmap_out + cmap_out + [error_cmap],
                cols_per_row=3,
                # title=F"{output_fields[0]} RMSE: {np.mean(rmse_cnn):0.5f} m.")
                title=F"SSH RMSE: {np.mean(rmse_cnn):0.5f} m.")

            print("DONE ALL FILES!!!!!!!!!!!!!")
    dic_summary = {
        "File": model_files,
        "rmse": all_whole_rmse,
        "times mean": all_whole_mean_times,
        "times sum": all_whole_sum_times,
    }
    df = pd.DataFrame.from_dict(dic_summary)
    df.to_csv(join(output_imgs_folder, "Global_RMSE_and_times.csv"))
Esempio n. 5
0
def main():
    config = get_segmentation_2d_config()
    cases = config[ClassificationParams.cases]
    save_segmented_ctrs = config[ClassificationParams.save_segmented_ctrs]

    input_folder = config[ClassificationParams.input_folder]
    input_img_names = config[ClassificationParams.input_img_file_names]
    output_folder = config[ClassificationParams.output_folder]
    output_imgs_folder = config[ClassificationParams.output_imgs_folder]
    output_file_name = config[ClassificationParams.output_file_name]
    model_weights_file = config[ClassificationParams.model_weights_file]

    save_imgs = config[ClassificationParams.save_imgs]

    # Builds the visualization object
    viz_obj = MedicalImageVisualizer(
        disp_images=config[ClassificationParams.show_imgs],
        output_folder=output_imgs_folder)

    output_ctr_file_names = config[ClassificationParams.output_ctr_file_names]
    # *********** Chooses the proper model ***********
    print('Reading model ....')
    model = select_2d_model(config)

    # *********** Reads the weights***********
    print('Reading weights ....')
    model.load_weights(model_weights_file)

    examples = select_cases_from_folder(input_folder, cases)
    create_folder(output_imgs_folder)

    # *********** Makes a dataframe to contain the DSC information **********
    metrics_params = config[ClassificationParams.metrics]
    metrics_dict = {met.name: met.value for met in metrics_params}

    # Check if the output files already exist, in that case read the df from it.
    if os.path.exists(join(output_imgs_folder, output_file_name)):
        data = pd.read_csv(join(output_imgs_folder, output_file_name),
                           index_col=0)
    else:
        data_columns = list(metrics_dict.values())
        data = DataFrame(index=examples, columns=data_columns)

    # *********** Iterates over each case *********
    for id_folder, current_folder in enumerate(examples):
        print(F'******* Computing folder {current_folder} ************')
        t0 = time.time()
        try:
            # -------------------- Reading data -------------
            print('\t Reading data....')
            # All these names are predefined, for any other 3d segmentation we will need to create a different configuration
            all_imgs, all_ctrs, _, _ = read_preproc_imgs_and_ctrs_png(
                input_folder,
                folders_to_read=[current_folder],
                img_names=input_img_names,
                ctr_names=output_ctr_file_names)

            imgs_np = all_imgs[0]
            ctrs_lungs_np = all_ctrs[0][0].copy(
            )  # VERIFY THE ORDER IS THE SAME IN THE CONFIG FILE
            ctrs_lesion_np = all_ctrs[0][1].copy(
            )  # VERIFY THE ORDER IS THE SAME IN THE CONFIG FILE
            # If we want to visualize the input images
            # viz_obj.plot_imgs_and_ctrs_itk(img_np[0], ctrs_itk=ctrs_itk[0])

            # ------------------- Making prediction -----------
            print('\t Making prediction....')
            input_array = format_for_nn_classification(imgs_np)
            output_nn_all = model.predict(input_array, verbose=1)
            output_nn_np = output_nn_all[0, :, :, 0]
            output_nn_np[ctrs_lungs_np ==
                         0] = 0  # Making the prediction 0 outside the lungs
            # For visualizing the output of the network
            # viz_obj.plot_img_and_ctrs_np_2d(output_nn_np, np_ctrs=[], file_name_prefix=id_folder)

            # ------------------- Postprocessing -----------
            print('\t Postprocessing prediction....')
            threshold = .5
            print(F'\t\t Threshold NN output to {threshold} ....')
            output_nn_np[
                output_nn_np <=
                threshold] = 0  # Making the prediction 0 outside the lungs
            output_nn_np[
                output_nn_np >
                threshold] = 1  # Making the prediction 0 outside the lungs

            if save_segmented_ctrs:
                print('\t Saving Prediction...')
                create_folder(join(output_folder, current_folder))
                cv2.imwrite(
                    join(output_folder, current_folder,
                         output_ctr_file_names[0]),
                    cv2.convertScaleAbs(output_nn_np, alpha=(255.0)))

            # Compute metrics
            print('\t Computing metrics....')
            for c_metric in metrics_params:  # Here we can add more metrics
                if c_metric == ClassificationMetrics.DSC_2D:
                    metric_value = numpy_dice(output_nn_np, ctrs_lesion_np)
                    data.loc[current_folder][c_metric.value] = metric_value
                    print(F'\t\t ----- DSC: {metric_value:.3f} -----')

            # Saving the results every 10 steps
            if id_folder % 10 == 0:
                save_metrics_images(data,
                                    metric_names=list(metrics_dict.values()),
                                    viz_obj=viz_obj)
                data.to_csv(join(output_folder, output_file_name))

            if save_imgs:
                print('\t Plotting images....')
                plot_intermediate_results(current_folder,
                                          data_columns,
                                          img_np=imgs_np[0],
                                          gt_ctr_np=ctrs_lesion_np,
                                          nn_ctr_np=output_nn_np,
                                          data=data,
                                          viz_obj=viz_obj)

        except Exception as e:
            print(
                "---------------------------- Failed {} error: {} ----------------"
                .format(current_folder, e))
        print(F'\t Done! Elapsed time {time.time()-t0:0.2f} seg')

    save_metrics_images(data,
                        metric_names=list(metrics_dict.values()),
                        viz_obj=viz_obj)
    data.to_csv(join(output_folder, output_file_name))
Esempio n. 6
0
def doTraining(conf):
    input_folder_preproc = config[ProjTrainingParams.input_folder_preproc]
    # input_folder_obs = config[ProjTrainingParams.input_folder_obs]
    years = config[ProjTrainingParams.YEARS]
    fields = config[ProjTrainingParams.fields_names]
    fields_obs = config[ProjTrainingParams.fields_names_obs]
    output_field = config[ProjTrainingParams.output_fields]
    # day_to_predict = config[ProjTrainingParams.prediction_time]

    output_folder = config[TrainingParams.output_folder]
    val_perc = config[TrainingParams.validation_percentage]
    test_perc = config[TrainingParams.test_percentage]
    eval_metrics = config[TrainingParams.evaluation_metrics]
    loss_func = config[TrainingParams.loss_function]
    batch_size = config[TrainingParams.batch_size]
    epochs = config[TrainingParams.epochs]
    run_name = config[TrainingParams.config_name]
    optimizer = config[TrainingParams.optimizer]

    output_folder = join(output_folder, run_name)
    split_info_folder = join(output_folder, 'Splits')
    parameters_folder = join(output_folder, 'Parameters')
    weights_folder = join(output_folder, 'models')
    logs_folder = join(output_folder, 'logs')
    create_folder(split_info_folder)
    create_folder(parameters_folder)
    create_folder(weights_folder)
    create_folder(logs_folder)

    # Compute how many cases
    files_to_read, paths_to_read = get_preproc_increment_files(
        input_folder_preproc)
    tot_examples = len(files_to_read)

    # ================ Split definition =================
    [train_ids, val_ids, test_ids
     ] = utilsNN.split_train_validation_and_test(tot_examples,
                                                 val_percentage=val_perc,
                                                 test_percentage=test_perc,
                                                 shuffle_ids=False)

    print(
        F"Train examples (total:{len(train_ids)}) :{files_to_read[train_ids]}")
    print(
        F"Validation examples (total:{len(val_ids)}) :{files_to_read[val_ids]}:"
    )
    print(F"Test examples (total:{len(test_ids)}) :{files_to_read[test_ids]}")

    print("Selecting and generating the model....")
    now = datetime.utcnow().strftime("%Y_%m_%d_%H_%M")
    model_name = F'{run_name}_{now}'

    # ******************* Selecting the model **********************
    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    plot_model(model,
               to_file=join(output_folder, F'{model_name}.png'),
               show_shapes=True)

    print("Saving split information...")
    file_name_splits = join(split_info_folder, F'{model_name}.txt')
    utilsNN.save_splits(file_name=file_name_splits,
                        train_ids=train_ids,
                        val_ids=val_ids,
                        test_ids=test_ids)

    print("Compiling model ...")
    model.compile(loss=loss_func, optimizer=optimizer, metrics=eval_metrics)

    print("Getting callbacks ...")

    [logger, save_callback, stop_callback] = utilsNN.get_all_callbacks(
        model_name=model_name,
        early_stopping_func=F'val_{eval_metrics[0].__name__}',
        weights_folder=weights_folder,
        logs_folder=logs_folder)

    print("Training ...")
    # ----------- Using preprocessed data -------------------
    generator_train = data_gen_from_preproc(input_folder_preproc, config,
                                            train_ids, fields, fields_obs,
                                            output_field)
    generator_val = data_gen_from_preproc(input_folder_preproc, config,
                                          val_ids, fields, fields_obs,
                                          output_field)

    # Decide which generator to use
    data_augmentation = config[TrainingParams.data_augmentation]

    model.fit_generator(
        generator_train,
        steps_per_epoch=1000,
        validation_data=generator_val,
        # validation_steps=min(100, len(val_ids)),
        validation_steps=100,
        use_multiprocessing=False,
        workers=1,
        # validation_freq=10, # How often to compute the validation loss
        epochs=epochs,
        callbacks=[logger, save_callback, stop_callback])