Example #1
0
def visualizeBackgroundIncrementAnalaysis():
    input_folder = '/data/HYCOM/DA_HYCOM_TSIS/preproc'
    output_folder = '/data/HYCOM/DA_HYCOM_TSIS/SUMMARY/AssimilatedData'
    years = [2009]
    fields = ['srfhgt']
    all_files = os.listdir(input_folder)
    model_files = [
        join(input_folder, x) for x in all_files if x.find('model') != -1
    ]
    obs_files = [
        join(input_folder, x) for x in all_files if x.find('obs') != -1
    ]
    inc_files = [
        join(input_folder, x) for x in all_files if x.find('increment') != -1
    ]
    # Sort all of them or they won't match
    inc_files.sort()
    model_files.sort()
    obs_files.sort()

    mvar = "temp"  #"srfhgt"
    ovar = "sst"  #"ssh"

    var_file = "/data/HYCOM/DA_HYCOM_TSIS/preproc/cov_mat/tops_ias_std.nc"
    data_lat_lon = read_netcdf(var_file, ['xc', 'yc'])
    lat = data_lat_lon['yc'][:]
    lon = data_lat_lon['xc'][:]

    for i in range(len(model_files)):
        model_data = read_netcdf(model_files[i], [mvar])
        obs_data_m = read_netcdf(obs_files[i], [ovar])
        inc_data = read_netcdf(inc_files[i], [mvar])
        obs_data = model_data[mvar][:].copy()
        no_mask_at = np.logical_not(obs_data_m[ovar][:].mask)
        obs_data[no_mask_at] = obs_data_m[ovar][:][no_mask_at]

        data = np.array([model_data[mvar][:], obs_data, inc_data[mvar][:]])
        plotMaps1(data, ["Background", "Observation", "Increment"], "Title",
                  lat, lon)
        plt.savefig(join(output_folder, F"{i:004d}.png"), bbox_inches='tight')
Example #2
0
def ComputeMinMaxSTDFields(file_name, fields_names, output_file):

    data = read_netcdf(file_name, [], [0])
    out_fields = []
    out_mins = []
    out_maxs = []
    out_vars = []
    out_means = []

    for field_name in fields_names:
        if len(data[field_name].shape) == 2:
            field = data[field_name][:]
        elif len(data[field_name].shape) == 3:
            field = data[field_name][0, :]

        # im = plt.imshow(np.flip(field, axis=0), cmap='gist_earth')
        # plt.colorbar(im)
        # plt.title(field_name)
        # plt.show()

        out_fields.append(field_name)
        out_mins.append(np.amin(field))
        out_maxs.append(np.amax(field))
        out_means.append(np.mean(field))
        out_vars.append(np.var(field))

    out_dic = {
        "Name": ["STD" for x in range(len(out_fields))],
        "Field": out_fields,
        "MIN": out_mins,
        "MAX": out_maxs,
        "MEAN": out_means,
        "VAR": out_vars,
    }

    df = pd.DataFrame.from_dict(out_dic)

    df.to_csv(output_file)
Example #3
0
def data_gen_from_preproc(input_folder_preproc,  config, ids, field_names, obs_field_names, output_fields, z_layers=[0]):
    """
    This generator should generate X and Y for a CNN
    :param path:
    :param file_names:
    :return:
    """
    ex_id = -1
    np.random.shuffle(ids)
    batch_size = config[TrainingParams.batch_size]

    all_files = os.listdir(input_folder_preproc)
    obs_files = np.array([join(input_folder_preproc, x) for x in all_files if x.startswith('obs')])
    increment_files = np.array([join(input_folder_preproc, x) for x in all_files if x.startswith('increment')])
    model_files = np.array([join(input_folder_preproc, x) for x in all_files if x.startswith('model')])
    var_file = join(input_folder_preproc, "cov_mat", "tops_ias_std.nc")
    obs_files.sort()
    increment_files.sort()
    model_files.sort()

    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    norm_type = config[ProjTrainingParams.norm_type]

    # Read the variance of selected
    var_field_names = config[ProjTrainingParams.fields_names_var]
    if len(var_field_names) > 0:
        input_fields_var = read_netcdf(var_file, var_field_names, z_layers)
    else:
        input_fields_var = []

    while True:
        # These lines are for sequential selection
        if ex_id < (len(ids) - 1): # We are not supporting batch processing right now
            ex_id += 1
        else:
            ex_id = 0
            np.random.shuffle(ids) # We shuffle the folders every time we have tested all the examples

        c_id = ids[ex_id]
        try:
            output_file_name = increment_files[c_id]
            obs_file_name = obs_files[c_id]
            model_file_name = model_files[c_id]

            # Needs to validate that all the files are from the same date
            model_file_year, model_file_day = get_date_from_preproc_filename(model_file_name)
            obs_file_year, obs_file_day = get_date_from_preproc_filename(obs_file_name)
            output_file_year, output_file_day = get_date_from_preproc_filename(output_file_name)

            if (model_file_day != obs_file_day) or (model_file_day != output_file_day) or\
                    (model_file_year != obs_file_year) or (model_file_year != output_file_year):
               print(F"The year and day do not correspond between the files: {output_file_name}, {model_file_name}, {obs_file_name}")
               exit()

            # If any file doesn't exist, jump to the next example
            if not(exists(output_file_name)):
                print(F"File doesn't exist: {output_file_name}")
                continue

            # *********************** Reading files **************************
            input_fields_model = read_netcdf(model_file_name, field_names, z_layers)
            input_fields_obs = read_netcdf(obs_file_name, obs_field_names, z_layers)
            output_field_increment = read_netcdf(output_file_name, output_fields, z_layers)

            succ_attempts = 0
            while succ_attempts < batch_size:
                start_row = np.random.randint(0, 891 - rows)  # These hardcoded numbers come from the specific size of these files
                start_col = np.random.randint(0, 1401 - cols)

                try:
                    perc_ocean = 0.99  #0.99
                    if config[ModelParams.MODEL] == AiModels.UNET_2D_MULTISTREAMS:
                        input_data, y_data = generateXandYMulti(input_fields_model, input_fields_obs, input_fields_var, output_field_increment,
                                                                field_names, obs_field_names, var_field_names, output_fields,
                                                           start_row, start_col, rows, cols, norm_type=norm_type)
                    else:
                        input_data, y_data = generateXandY(input_fields_model, input_fields_obs, input_fields_var, output_field_increment,
                                                           field_names, obs_field_names, var_field_names, output_fields,
                                                           start_row, start_col, rows, cols, norm_type=norm_type, perc_ocean=perc_ocean)

                except Exception as e:
                    # print(F"Failed for {model_file_name} row:{start_row} col:{start_col}: {e}")
                    continue

                succ_attempts += 1

                # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
                input_data = np.nan_to_num(input_data, nan=0)
                y_data = np.nan_to_num(y_data, nan=-0.5)
                # input_data = np.nan_to_num(input_data, nan=-1000)
                # y_data = np.nan_to_num(y_data, nan=-1000)
                # input_data = np.nan_to_num(input_data, nan=0)
                # y_data = np.nan_to_num(y_data, nan=0)

                if config[ModelParams.MODEL] == AiModels.UNET_2D_MULTISTREAMS:
                    X = [np.expand_dims(x, axis=0) for x in input_data]
                else:
                    X = np.expand_dims(input_data, axis=0)
                Y = np.expand_dims(y_data, axis=0)

                # --------------- Just for debugging Plotting input and output---------------------------
                # import matplotlib.pyplot as plt
                # import pylab
                # # mincbar = np.nanmin(input_data)
                # # maxcbar = np.nanmax(input_data)
                #
                # # viz_obj = EOAImageVisualizer(output_folder=join(input_folder_preproc, "training_imgs"), disp_images=False, mincbar=mincbar, maxcbar=maxcbar)
                # viz_obj = EOAImageVisualizer(output_folder=join(input_folder_preproc, "training_imgs"), disp_images=False)
                # #
                # # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), y_data.swapaxes(0,2))),
                # viz_obj.plot_2d_data_np_raw(np.concatenate((X[0,:,:,:].swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2))),
                #                             var_names=[F"in_model_{x}" for x in field_names] +
                #                                       [F"in_obs_{x}" for x in obs_field_names]+
                #                                       [F"out_inc_{x}" for x in output_fields],
                #                             rot_90=True,
                #                             file_name=F"{model_file_year}_{model_file_day}_{start_col}_{start_row}",
                #                             title=F"Input data: {field_names} and {obs_field_names}, output {output_fields}")

                yield X, Y
                # yield [np.zeros((1,160,160,1)) for x in range(7)], Y
        except Exception as e:
            print(F"----- Not able to generate for file number (from batch):  {succ_attempts} ERROR: ", str(e))
Example #4
0
def test_model(config):
    input_folder = config[PredictionParams.input_folder]
    output_folder = config[PredictionParams.output_folder]
    output_fields = config[ProjTrainingParams.output_fields]
    model_weights_file = config[PredictionParams.model_weights_file]
    output_imgs_folder = config[PredictionParams.output_imgs_folder]
    field_names_model = config[ProjTrainingParams.fields_names]
    field_names_obs = config[ProjTrainingParams.fields_names_obs]
    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    run_name = config[TrainingParams.config_name]
    norm_type = config[ProjTrainingParams.norm_type]

    output_imgs_folder = join(output_imgs_folder, run_name)
    create_folder(output_imgs_folder)

    # *********** Chooses the proper model ***********
    print('Reading model ....')
    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    plot_model(model,
               to_file=join(output_folder, F'running.png'),
               show_shapes=True)

    # *********** Reads the weights***********
    print('Reading weights ....')
    model.load_weights(model_weights_file)

    # *********** Read files to predict***********
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])

    z_layers = [0]
    var_file = join(input_folder, "cov_mat", "tops_ias_std.nc")
    field_names_std = config[ProjTrainingParams.fields_names_var]
    if len(field_names_std) > 0:
        input_fields_std = read_netcdf(var_file, field_names_std, z_layers)
    else:
        input_fields_std = []

    cmap_out = chooseCMAP(output_fields)
    cmap_model = chooseCMAP(field_names_model)
    cmap_obs = chooseCMAP(field_names_obs)
    cmap_std = chooseCMAP(field_names_std)

    tot_rows = 891
    tot_cols = 1401

    all_whole_mean_times = []
    all_whole_sum_times = []
    all_whole_rmse = []

    # np.random.shuffle(model_files)  # TODO this is only for testing
    for id_file, c_file in enumerate(model_files):
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        if day_of_year != 5:
            continue

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        input_fields_model = read_netcdf(model_file, field_names_model,
                                         z_layers)
        input_fields_obs = read_netcdf(obs_file, field_names_obs, z_layers)
        output_field_increment = read_netcdf(inc_file, output_fields, z_layers)

        # ******************* Normalizing and Cropping Data *******************
        whole_cnn = np.zeros((891, 1401))
        whole_y = np.zeros((891, 1401))

        this_file_times = []

        start_row = 0
        donerow = False
        while not (donerow):
            donecol = False
            start_col = 0
            while not (donecol):
                # print(F"{start_row}-{start_row+rows} {start_col}-{start_col+cols}")
                # Generate the proper inputs for the NN
                try:
                    perc_ocean = .05
                    input_data, y_data = generateXandY(input_fields_model,
                                                       input_fields_obs,
                                                       input_fields_std,
                                                       output_field_increment,
                                                       field_names_model,
                                                       field_names_obs,
                                                       field_names_std,
                                                       output_fields,
                                                       start_row,
                                                       start_col,
                                                       rows,
                                                       cols,
                                                       norm_type=norm_type,
                                                       perc_ocean=perc_ocean)
                except Exception as e:
                    print(F"Land for {c_file} row:{start_row} col:{start_col}")
                    start_col, donecol = verifyBoundaries(
                        start_col, cols, tot_cols)
                    continue

                # ******************* Replacing nan values *********
                # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
                input_data_nans = np.isnan(input_data)
                input_data = np.nan_to_num(input_data, nan=0)
                y_data = np.nan_to_num(y_data, nan=-0.5)

                X = np.expand_dims(input_data, axis=0)
                Y = np.expand_dims(y_data, axis=0)

                # Make the prediction of the network
                start = time.time()
                output_nn_original = model.predict(X, verbose=1)
                toc = time.time() - start
                this_file_times.append(toc)
                # print(F"Time to get prediction {toc:0.3f} seconds")
                # PLOT RAW DATA
                # import matplotlib.pyplot as plt
                # plt.imshow(np.flip(output_nn_original[0,:,:,0], axis=0))
                # plt.imshow(np.flip(Y[0,:,:,0], axis=0))
                # plt.show()
                # Original MSE
                # print(F"MSE: {mean_squared_error(Y[0,:,:,0], output_nn_original[0,:,:,0])}")

                # Make nan all values inside the land
                land_indexes = Y == -0.5
                output_nn_original[land_indexes] = np.nan

                # ====================== PLOTS RAW DATA  NOT NECESSARY =============================
                # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False)
                # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))),
                #                             var_names=[F"in_model_{x}" for x in field_names_model] +
                #                                       [F"in_obs_{x}" for x in field_names_obs] +
                #                                       [F"in_var_{x}" for x in field_names_std] +
                #                                       [F"out_inc_{x}" for x in output_fields] +
                #                                       [F"cnn_{x}" for x in output_fields],
                #                             file_name=F"RAW_Input_and_CNN_{c_file}_{start_row:03d}_{start_col:03d}",
                #                             rot_90=True,
                #                             cols_per_row=len(field_names_model),
                #                             title=F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}")

                # Denormalize the data to the proper units in each field
                denorm_cnn_output = np.zeros(output_nn_original.shape)
                denorm_y = np.zeros(Y.shape)

                # ==== Denormalizingallinput and outputs
                denorm_cnn_output = denormalizeData(output_nn_original,
                                                    output_fields,
                                                    PreprocParams.type_inc,
                                                    norm_type)
                denorm_y = denormalizeData(Y, output_fields,
                                           PreprocParams.type_inc, norm_type)
                input_types = [
                    PreprocParams.type_model for i in input_fields_model
                ] + [PreprocParams.type_obs for i in input_fields_obs
                     ] + [PreprocParams.type_std for i in input_fields_std]
                denorm_input = denormalizeData(
                    input_data,
                    field_names_model + field_names_obs + field_names_std,
                    input_types, norm_type)

                # Recover the original land areas, they are lost after denormalization
                denorm_input[input_data_nans] = np.nan
                denorm_y[land_indexes] = np.nan

                # Remove the 'extra dimension'
                denorm_cnn_output = np.squeeze(denorm_cnn_output)
                denorm_y = np.squeeze(denorm_y)
                whole_cnn[
                    start_row:start_row + rows, start_col:start_col +
                    cols] = denorm_cnn_output  # Add the the 'whole prediction'
                whole_y[start_row:start_row + rows, start_col:start_col +
                        cols] = denorm_y  # Add the the 'whole prediction'

                # if np.random.random() > .99: # Plot 1% of the times
                if True:  # Plot 1% of the times
                    if len(
                            denorm_cnn_output.shape
                    ) == 2:  # In this case we only had one output and we need to make it 'array' to plot
                        denorm_cnn_output = np.expand_dims(denorm_cnn_output,
                                                           axis=2)
                        denorm_y = np.expand_dims(denorm_y, axis=2)

                    # Compute RMSE
                    rmse_cnn = np.zeros(len(output_fields))
                    for i in range(len(output_fields)):
                        ocean_indexes = np.logical_not(
                            np.isnan(denorm_y[:, :, i]))
                        rmse_cnn[i] = np.sqrt(
                            mean_squared_error(
                                denorm_cnn_output[:, :, i][ocean_indexes],
                                denorm_y[:, :, i][ocean_indexes]))

                    # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar, maxcbar=maxcbar)
                    viz_obj = EOAImageVisualizer(
                        output_folder=output_imgs_folder, disp_images=False)

                    # ================== DISPLAYS ALL INPUTS AND OUTPUTS DENORMALIZED ===================
                    # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))),
                    viz_obj.plot_2d_data_np_raw(
                        np.concatenate(
                            (denorm_input.swapaxes(0,
                                                   2), denorm_y.swapaxes(0, 2),
                             denorm_cnn_output.swapaxes(0, 2))),
                        var_names=[F"in_model_{x}"
                                   for x in field_names_model] +
                        [F"in_obs_{x}" for x in field_names_obs] +
                        [F"in_var_{x}" for x in field_names_std] +
                        [F"out_inc_{x}" for x in output_fields] +
                        [F"cnn_{x}" for x in output_fields],
                        file_name=
                        F"Input_and_CNN_{c_file}_{start_row:03d}_{start_col:03d}",
                        cmap=cmap_model + cmap_obs + cmap_std + cmap_out +
                        cmap_out,
                        rot_90=True,
                        cols_per_row=len(field_names_model),
                        title=
                        F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}"
                    )

                    # =========== Making the same color bar for desired output and the NN =====================
                    mincbar = [
                        np.nanmin(denorm_y[:, :, x])
                        for x in range(denorm_cnn_output.shape[-1])
                    ]
                    maxcbar = [
                        np.nanmax(denorm_y[:, :, x])
                        for x in range(denorm_cnn_output.shape[-1])
                    ]
                    error = (denorm_y - denorm_cnn_output).swapaxes(0, 2)
                    mincbarerror = [
                        np.nanmin(error[i, :, :])
                        for i in range(len(output_fields))
                    ]
                    maxcbarerror = [
                        np.nanmax(error[i, :, :])
                        for i in range(len(output_fields))
                    ]
                    viz_obj = EOAImageVisualizer(
                        output_folder=output_imgs_folder,
                        disp_images=False,
                        mincbar=mincbar + mincbar + mincbarerror,
                        maxcbar=maxcbar + maxcbar + maxcbarerror)

                    # ================== Displays CNN and TSIS with RMSE ================
                    viz_obj.output_folder = join(output_imgs_folder,
                                                 'JoinedErrrorCNN')
                    cmap = chooseCMAP(output_fields)
                    error_cmap = cmocean.cm.diff
                    viz_obj.plot_2d_data_np_raw(
                        np.concatenate((denorm_cnn_output.swapaxes(
                            0, 2), denorm_y.swapaxes(0, 2), error),
                                       axis=0),
                        var_names=[F"CNN INC {x}" for x in output_fields] +
                        [F"TSIS INC {x}" for x in output_fields] +
                        [F'RMSE {c_rmse_cnn:0.4f}' for c_rmse_cnn in rmse_cnn],
                        file_name=
                        F"AllError_{c_file}_{start_row:03d}_{start_col:03d}",
                        rot_90=True,
                        cmap=cmap + cmap + [error_cmap],
                        cols_per_row=len(output_fields),
                        title=F"{output_fields} RMSE: {np.mean(rmse_cnn):0.5f}"
                    )

                start_col, donecol = verifyBoundaries(start_col, cols,
                                                      tot_cols)
                # Column for
            start_row, donerow = verifyBoundaries(start_row, rows, tot_rows)
            # Row for

        # ======= Plots whole output with RMSE
        mincbar = np.nanmin(whole_y) / 2
        maxcbar = np.nanmax(whole_y) / 2
        error = whole_y - whole_cnn
        mincbarerror = np.nanmin(error) / 2
        maxcbarerror = np.nanmax(error) / 2
        no_zero_ids = np.count_nonzero(whole_cnn)

        rmse_cnn = np.sqrt(np.nansum((whole_y - whole_cnn)**2) / no_zero_ids)
        all_whole_rmse.append(rmse_cnn)
        all_whole_mean_times.append(np.mean(np.array(this_file_times)))
        all_whole_sum_times.append(np.sum(np.array(this_file_times)))

        if np.random.random(
        ) > .9 or day_of_year == 353:  # Plot 10% of the times
            viz_obj = EOAImageVisualizer(
                output_folder=output_imgs_folder,
                disp_images=False,
                mincbar=mincbar + mincbar + mincbarerror,
                maxcbar=maxcbar + maxcbar + maxcbarerror)
            # mincbar=[-5, -5, -1],
            # maxcbar=[10, 10, 1])

            # ================== Displays CNN and TSIS with RMSE ================
            viz_obj.output_folder = join(output_imgs_folder,
                                         'WholeOutput_CNN_TSIS')
            viz_obj.plot_2d_data_np_raw(
                [
                    np.flip(whole_cnn, axis=0),
                    np.flip(whole_y, axis=0),
                    np.flip(error, axis=0)
                ],
                var_names=[F"CNN INC {x}" for x in output_fields] +
                [F"TSIS INC {x}"
                 for x in output_fields] + [F'RMSE {rmse_cnn:0.4f}'],
                file_name=F"WholeOutput_CNN_TSIS_{c_file}",
                rot_90=False,
                cols_per_row=3,
                cmap=cmocean.cm.algae,
                title=F"{output_fields} RMSE: {np.mean(rmse_cnn):0.5f}")
Example #5
0
def singleModel(config):
    input_folder = config[PredictionParams.input_folder]
    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    model_field_names = config[ProjTrainingParams.fields_names]
    obs_field_names = config[ProjTrainingParams.fields_names_obs]
    output_fields = config[ProjTrainingParams.output_fields]
    run_name = config[TrainingParams.config_name]
    output_folder = join(config[PredictionParams.output_imgs_folder],
                         'MODEL_VISUALIZATION', run_name)
    norm_type = config[ProjTrainingParams.norm_type]

    model_weights_file = config[PredictionParams.model_weights_file]

    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    create_folder(output_folder)
    plot_model(model,
               to_file=join(output_folder, F'running.png'),
               show_shapes=True)

    print('Reading weights ....')
    model.load_weights(model_weights_file)

    # # All Number of parameters
    print(F' Number of parameters: {model.count_params()}')
    # Number of parameters by layer
    print(F' Number of parameters first CNN: {model.layers[1].count_params()}')

    # Example of plotting the filters of a single layer
    print("Printing layer names:")
    print_layer_names(model)
    # plot_cnn_filters_by_layer(model.layers[1], 'First set of filters')  # The harcoded 1 should change by project

    # *********** Read files to predict***********
    # # ========= Here you need to build your test input different in each project ====
    all_files = os.listdir(input_folder)
    all_files.sort()

    # ========= Here you need to build your test input different in each project ====
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])

    z_layers = [0]
    var_file = join(input_folder, "cov_mat", "tops_ias_std.nc")
    var_field_names = config[ProjTrainingParams.fields_names_var]
    if len(var_field_names) > 0:
        input_fields_var = read_netcdf(var_file, var_field_names, z_layers)
    else:
        input_fields_var = []

    np.random.shuffle(model_files)  # TODO this is only for testing
    for id_file, c_file in enumerate(model_files):
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        z_layers = [0]
        input_fields_model = read_netcdf(model_file, model_field_names,
                                         z_layers)
        input_fields_obs = read_netcdf(obs_file, obs_field_names, z_layers)
        output_field_increment = read_netcdf(inc_file, output_fields, z_layers)

        # ******************* Normalizing and Cropping Data *******************
        for start_row in np.arange(0, 891 - rows, rows):
            for start_col in np.arange(0, 1401 - cols, cols):
                try:
                    input_data, y_data = generateXandY(input_fields_model,
                                                       input_fields_obs,
                                                       input_fields_var,
                                                       output_field_increment,
                                                       model_field_names,
                                                       obs_field_names,
                                                       var_field_names,
                                                       output_fields,
                                                       start_row,
                                                       start_col,
                                                       rows,
                                                       cols,
                                                       norm_type=norm_type)
                except Exception as e:
                    print(
                        F"Failed for {c_file} row:{start_row} col:{start_col}")
                    continue

                X_nan = np.expand_dims(input_data, axis=0)
                Y_nan = np.expand_dims(y_data, axis=0)

                # ******************* Replacing nan values *********
                # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
                X = np.nan_to_num(X_nan, nan=0)
                Y = np.nan_to_num(Y_nan, nan=-0.5)

                output_nn = model.predict(X, verbose=1)
                output_nn[np.isnan(Y_nan)] = np.nan
                # =========== Output from the last layer (should be the same as output_NN
                print("Evaluating all intermediate layers")
                inp = model.input  # input placeholder
                outputs = [
                    layer.output for layer in model.layers[1:]
                    if layer.name.find("conv") != -1
                ]  # Displaying only conv layers
                # All evaluation functions (used to call the model up to each layer)
                functors = [K.function([inp], [out]) for out in outputs]
                # Outputs for every intermediate layer
                layer_outs = [func([X]) for func in functors]

                for layer_to_plot in range(0, len(outputs)):
                    title = F'Layer {layer_to_plot}_{outputs[layer_to_plot].name}. {c_file}_{start_row:03d}_{start_col:03d}'
                    file_name = F'{c_file}_{start_row:03d}_{start_col:03d}_lay_{layer_to_plot}'
                    plot_intermediate_2dcnn_feature_map(
                        layer_outs[layer_to_plot][0],
                        input_data=X_nan,
                        desired_output_data=Y_nan,
                        nn_output_data=output_nn,
                        input_fields=model_field_names + obs_field_names +
                        var_field_names,
                        title=title,
                        output_folder=output_folder,
                        file_name=file_name,
                        disp_images=False)
Example #6
0
def plot_raw_data_new(proc_id):
    """
    This code makes two plots: 1) model and increment 2) model, increment and observations
    Depending on which plot you want to make, it reads field_names and fields_names_obs from the PreprocConfig file
    :param proc_id:
    :return:
    """
    config = get_preproc_config()
    input_folder_tsis = config[PreprocParams.input_folder_tsis]
    input_folder_forecast = config[PreprocParams.input_folder_hycom]
    input_folder_obs = config[PreprocParams.input_folder_obs]
    output_folder = config[PreprocParams.imgs_output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    fields_obs = config[PreprocParams.fields_names_obs]
    plot_modes = config[PreprocParams.plot_modes_per_field]
    layers = config[PreprocParams.layers_to_plot]

    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # Iterate current year
    for c_year in YEARS:
        # Iterate current month
        for c_month in MONTHS:
            try:
                days_of_month, days_of_year = get_days_from_month(c_month)
                # Reads the data (DA, Free run, and observations)
                increment_files, increment_paths = get_hycom_file_name(
                    input_folder_tsis, c_year, c_month)
                hycom_files, hycom_paths = get_hycom_file_name(
                    input_folder_forecast, c_year, c_month, day_idx=2)
                obs_files, obs_paths = get_obs_file_names(
                    input_folder_obs, c_year, c_month)
            except Exception as e:
                print(F"Failed to find any file for date {c_year}-{c_month}")
                continue

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                if (c_day_of_month % NUM_PROC) == proc_id:
                    # Makes regular expression of the current desired file
                    re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_hycom = F'020_archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    # re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    # re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'
                    re_obs = F'tsis_obs_gomb4_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'

                    try:
                        # Gets the proper index of the file for the three cases
                        increment_file_idx = [
                            i for i, file in enumerate(increment_files)
                            if re.search(re_tsis, file) != None
                        ][0]
                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_hycom, file) != None
                        ][0]
                        obs_file_idx = [
                            i for i, file in enumerate(obs_files)
                            if re.search(re_obs, file) != None
                        ][0]
                    except Exception as e:
                        print(
                            F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}"
                        )
                        continue

                    print(
                        F" =============== Working with: {increment_files[increment_file_idx]} ============= "
                    )
                    print(
                        F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}"
                    )
                    print(
                        F"Available fields on model: {read_field_names(hycom_paths[hycom_file_idx])}"
                    )
                    ds = xr.open_dataset(obs_paths[obs_file_idx])
                    print(
                        F"Available fields on observations: {print(list(ds.keys()))}"
                    )

                    model_state_np_fields = read_hycom_fields(
                        hycom_paths[hycom_file_idx], fields, layers=layers)
                    increment_np_fields = read_hycom_fields(
                        increment_paths[increment_file_idx],
                        fields,
                        layers=layers)

                    # obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs, rename_fields=fields)
                    obs_np_fields = read_netcdf(obs_paths[obs_file_idx],
                                                fields_obs)

                    # Iterate over the fields defined in PreprocConfig and plot them
                    for idx_field, c_field_name in enumerate(fields):
                        increment_np_c_field = increment_np_fields[
                            c_field_name]
                        nan_indx = increment_np_c_field == 0
                        increment_np_c_field[nan_indx] = np.nan
                        model_state_np_c_field = model_state_np_fields[
                            c_field_name]

                        # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field
                        # In these 2 cases, we only compute it for the surface layer
                        # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0]
                        # obs_np_c_field[502,609] - model_state_np_c_field[0][502,609]
                        # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0]

                        # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field)
                        # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0])
                        # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0])

                        if c_field_name == "thknss":
                            divide = 9806
                            model_state_np_c_field = model_state_np_c_field / divide
                            increment_np_c_field = increment_np_c_field / divide
                        if c_field_name == "srfhgt":
                            inc = increment_np_c_field
                        else:
                            inc = (model_state_np_c_field -
                                   increment_np_c_field)

                        # ======================= Only Background state and TSIS increment ==================
                        try:
                            title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}"
                            img_viz.plot_3d_data_np(
                                [model_state_np_c_field, inc],
                                # img_viz.plot_3d_data_np([model_state_np_c_field, increment_np_c_field],
                                var_names=['HYCOM', 'Increment (TSIS)'],
                                title=title,
                                file_name_prefix=
                                F'ModelAndIncrement_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}',
                                z_lavels_names=layers,
                                flip_data=True,
                                plot_mode=plot_modes[idx_field])
                        except Exception as e:
                            print(F"Failed for field: {c_field_name}: {e}")
Example #7
0
def plot_raw_data(proc_id):
    """
    Makes images of the available data (Free run, DA and Observations)
    :param proc_id:
    :return:
    """
    config = get_preproc_config()
    input_folder_tsis = config[PreprocParams.input_folder_tsis]
    input_folder_forecast = config[PreprocParams.input_folder_hycom]
    input_folder_obs = config[PreprocParams.input_folder_obs]
    output_folder = config[PreprocParams.imgs_output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    fields_obs = config[PreprocParams.fields_names_obs]
    plot_modes = config[PreprocParams.plot_modes_per_field]
    layers = config[PreprocParams.layers_to_plot]

    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # Iterate current year
    for c_year in YEARS:
        # Iterate current month
        for c_month in MONTHS:
            try:
                days_of_month, days_of_year = get_days_from_month(c_month)
                # Reads the data (DA, Free run, and observations)
                increment_files, increment_paths = get_hycom_file_name(
                    input_folder_tsis, c_year, c_month)
                hycom_files, hycom_paths = get_hycom_file_name(
                    input_folder_forecast, c_year, c_month)
                obs_files, obs_paths = get_obs_file_names(
                    input_folder_obs, c_year, c_month)
            except Exception as e:
                print(F"Failed to find any file for date {c_year}-{c_month}")
                continue

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                if (c_day_of_month % NUM_PROC) == proc_id:
                    # Makes regular expression of the current desired file
                    re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'

                    try:
                        # Gets the proper index of the file for the three cases
                        increment_file_idx = [
                            i for i, file in enumerate(increment_files)
                            if re.search(re_tsis, file) != None
                        ][0]
                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_hycom, file) != None
                        ][0]
                        obs_file_idx = [
                            i for i, file in enumerate(obs_files)
                            if re.search(re_obs, file) != None
                        ][0]
                    except Exception as e:
                        print(
                            F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}"
                        )
                        continue

                    print(
                        F" =============== Working with: {increment_files[increment_file_idx]} ============= "
                    )
                    print(
                        F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}"
                    )
                    increment_np_fields = read_hycom_fields(
                        increment_paths[increment_file_idx],
                        fields,
                        layers=layers)
                    model_state_np_fields = read_hycom_fields(
                        hycom_paths[hycom_file_idx], fields, layers=layers)
                    obs_np_fields = read_netcdf(obs_paths[obs_file_idx],
                                                fields_obs,
                                                layers=[0],
                                                rename_fields=fields)

                    for idx_field, c_field_name in enumerate(fields):
                        increment_np_c_field = increment_np_fields[
                            c_field_name]
                        nan_indx = increment_np_c_field == 0
                        increment_np_c_field[nan_indx] = np.nan
                        model_state_np_c_field = model_state_np_fields[
                            c_field_name]
                        obs_np_c_field = obs_np_fields[c_field_name]

                        # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field
                        # In these 2 cases, we only compute it for the surface layer
                        # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0]
                        obs_np_c_field[502,
                                       609] - model_state_np_c_field[0][502,
                                                                        609]
                        # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0]

                        # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field)
                        # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0])
                        # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0])

                        title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}"
                        # ======================= Only Fredatae HYCOM, TSIS, Observations ==================
                        img_viz.plot_3d_data_np(
                            [
                                np.expand_dims(obs_np_c_field, 0),
                                model_state_np_c_field, increment_np_c_field
                            ],
                            var_names=[
                                F'Observations', 'HYCOM', 'Increment (TSIS)'
                            ],
                            title=title,
                            file_name_prefix=
                            F'Summary_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}',
                            z_lavels_names=layers,
                            flip_data=True,
                            plot_mode=plot_modes[idx_field])
Example #8
0
import matplotlib.pyplot as plt
from inout.io_netcdf import read_netcdf
import numpy as np
import matplotlib.patches as patches

from os.path import join
import os

input_folder = "/home/olmozavala/Dropbox/MyProjects/EOAS/COAPS/MURI_AI_Ocean/Data_Assimilation/HYCOM-TSIS/testdata"
input_file = "model_2009_205.nc"

data = read_netcdf(join(input_folder, input_file), ["temp"])

fig, ax = plt.subplots()
ax.imshow(np.flip(data["temp"], axis=0))
print(data["temp"].shape)

found = 0
while found < 20:
    row = np.random.randint(0, 891)
    col = np.random.randint(0, 1401)
    ldata = data["temp"][row:row + 160, col:col + 160]
    if len(ldata[ldata.mask]) == 0:
        # Create a Rectangle patch
        print(F"Adding at: row:{row}-{row+160} and col:{col}-{col+160}")
        if row > 445:
            rect = patches.Rectangle((col, int(np.abs(445 - row))),
                                     160,
                                     160,
                                     linewidth=1,
                                     edgecolor='r',
Example #9
0
def test_model(config):
    input_folder = config[PredictionParams.input_folder]
    output_folder = config[PredictionParams.output_folder]
    output_fields = config[ProjTrainingParams.output_fields]
    model_weights_file = config[PredictionParams.model_weights_file]
    output_imgs_folder = config[PredictionParams.output_imgs_folder]
    field_names_model = config[ProjTrainingParams.fields_names]
    field_names_obs = config[ProjTrainingParams.fields_names_obs]
    rows = config[ProjTrainingParams.rows]
    cols = config[ProjTrainingParams.cols]
    run_name = config[TrainingParams.config_name]
    norm_type = config[ProjTrainingParams.norm_type]

    output_imgs_folder = join(output_imgs_folder, run_name)
    create_folder(output_imgs_folder)

    # *********** Chooses the proper model ***********
    print('Reading model ....')

    net_type = config[ProjTrainingParams.network_type]
    if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream:
        model = select_2d_model(config, last_activation=None)
    if net_type == NetworkTypes.SimpleCNN_2:
        model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_4:
        model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_8:
        model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2)
    if net_type == NetworkTypes.SimpleCNN_16:
        model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2)

    plot_model(model,
               to_file=join(output_folder, F'running.png'),
               show_shapes=True)

    # *********** Reads the weights***********
    print('Reading weights ....')
    model.load_weights(model_weights_file)

    # *********** Read files to predict***********
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])

    z_layers = [0]
    var_file = join(input_folder, "cov_mat", "tops_ias_std.nc")
    field_names_std = config[ProjTrainingParams.fields_names_var]
    if len(field_names_std) > 0:
        input_fields_std = read_netcdf(var_file, field_names_std, z_layers)
    else:
        input_fields_std = []

    cmap_out = chooseCMAP(output_fields)
    cmap_model = chooseCMAP(field_names_model)
    cmap_obs = chooseCMAP(field_names_obs)
    cmap_std = chooseCMAP(field_names_std)

    tot_rows = 891
    tot_cols = 1401

    all_whole_mean_times = []
    all_whole_sum_times = []
    all_whole_rmse = []

    # np.random.shuffle(model_files)  # TODO this is only for testing
    for id_file, c_file in enumerate(model_files):
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        input_fields_model = read_netcdf(model_file, field_names_model,
                                         z_layers)
        input_fields_obs = read_netcdf(obs_file, field_names_obs, z_layers)
        output_field_increment = read_netcdf(inc_file, output_fields, z_layers)

        # ******************* Normalizing and Cropping Data *******************
        this_file_times = []

        try:
            perc_ocean = .01
            input_data, y_data = generateXandY(input_fields_model,
                                               input_fields_obs,
                                               input_fields_std,
                                               output_field_increment,
                                               field_names_model,
                                               field_names_obs,
                                               field_names_std,
                                               output_fields,
                                               0,
                                               0,
                                               grows,
                                               gcols,
                                               norm_type=norm_type,
                                               perc_ocean=perc_ocean)
        except Exception as e:
            print(F"Exception {e}")

        # ******************* Replacing nan values *********
        # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land
        input_data_nans = np.isnan(input_data)
        input_data = np.nan_to_num(input_data, nan=0)
        y_data = np.nan_to_num(y_data, nan=-0.5)

        X = np.expand_dims(input_data, axis=0)
        Y = np.expand_dims(y_data, axis=0)

        # Make the prediction of the network
        start = time.time()
        output_nn_original = model.predict(X, verbose=1)
        toc = time.time() - start
        this_file_times.append(toc)

        # Make nan all values inside the land
        land_indexes = Y == -0.5
        output_nn_original[land_indexes] = np.nan

        # ==== Denormalizingallinput and outputs
        denorm_cnn_output = denormalizeData(output_nn_original, output_fields,
                                            PreprocParams.type_inc, norm_type)
        denorm_y = denormalizeData(Y, output_fields, PreprocParams.type_inc,
                                   norm_type)
        input_types = [PreprocParams.type_model
                       for i in input_fields_model] + [
                           PreprocParams.type_obs for i in input_fields_obs
                       ] + [PreprocParams.type_std for i in input_fields_std]
        denorm_input = denormalizeData(
            input_data, field_names_model + field_names_obs + field_names_std,
            input_types, norm_type)

        # Recover the original land areas, they are lost after denormalization
        denorm_y[land_indexes] = np.nan

        # Remove the 'extra dimension'
        denorm_cnn_output = np.squeeze(denorm_cnn_output)
        denorm_y = np.squeeze(denorm_y)
        whole_cnn = denorm_cnn_output  # Add the the 'whole prediction'
        whole_y = denorm_y  # Add the the 'whole prediction'

        if len(
                denorm_cnn_output.shape
        ) == 2:  # In this case we only had one output and we need to make it 'array' to plot
            denorm_cnn_output = np.expand_dims(denorm_cnn_output, axis=2)
            denorm_y = np.expand_dims(denorm_y, axis=2)

        # Compute RMSE
        # rmse_cnn = np.zeros(len(output_fields))
        # for i in range(len(output_fields)):
        #     ocean_indexes = np.logical_not(np.isnan(denorm_y[:,:,i]))
        #     rmse_cnn[i] = np.sqrt(mean_squared_error(denorm_cnn_output[:,:,i][ocean_indexes], denorm_y[:,:,i][ocean_indexes]))

        # ================== DISPLAYS ALL INPUTS AND OUTPUTS DENORMALIZED ===================
        # Adding back mask to all the input variables
        denorm_input[input_data_nans] = np.nan

        # ======= Plots whole output with RMSE
        mincbar = np.nanmin(whole_y)
        maxcbar = np.nanmax(whole_y)
        error = whole_y - whole_cnn
        mincbarerror = np.nanmin(error)
        maxcbarerror = np.nanmax(error)
        no_zero_ids = np.count_nonzero(whole_cnn)

        if output_fields[
                0] == 'srfhgt':  # This should only be for SSH to adjust the units
            whole_cnn /= 9.81
            whole_y = np.array(whole_y) / 9.81

        rmse_cnn = np.sqrt(np.nansum((whole_y - whole_cnn)**2) / no_zero_ids)

        all_whole_rmse.append(rmse_cnn)
        all_whole_mean_times.append(np.mean(np.array(this_file_times)))
        all_whole_sum_times.append(np.sum(np.array(this_file_times)))

        # if day_of_year == 353: # Plot 10% of the times
        if True:  # Plot 10% of the times

            # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar, maxcbar=maxcbar)
            viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder,
                                         disp_images=False)

            # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))),
            viz_obj.plot_2d_data_np_raw(
                np.concatenate(
                    (denorm_input.swapaxes(0, 2), denorm_y.swapaxes(0, 2),
                     denorm_cnn_output.swapaxes(0, 2))),
                var_names=[F"in_model_{x}" for x in field_names_model] +
                [F"in_obs_{x}" for x in field_names_obs] +
                [F"in_var_{x}" for x in field_names_std] +
                [F"out_inc_{x}"
                 for x in output_fields] + [F"cnn_{x}" for x in output_fields],
                file_name=F"Global_Input_and_CNN_{c_file}",
                rot_90=True,
                cmap=cmap_model + cmap_obs + cmap_std + cmap_out + cmap_out,
                cols_per_row=len(field_names_model),
                title=
                F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}"
            )

            minmax = getMinMaxPlot(output_fields)[0]
            viz_obj = EOAImageVisualizer(
                output_folder=output_imgs_folder,
                disp_images=False,
                # mincbar=mincbar + mincbar + mincbarerror,
                # maxcbar=maxcbar + maxcbar + maxcbarerror)
                # mincbar=[minmax[0], minmax[0], max(minmax[0],-1)],
                # maxcbar=[minmax[1], minmax[1], min(minmax[1],1)])
                mincbar=[minmax[0], minmax[0], -1],
                maxcbar=[minmax[1], minmax[1], 1])

            # ================== Displays CNN and TSIS with RMSE ================
            error_cmap = cmocean.cm.diff
            viz_obj.output_folder = join(output_imgs_folder,
                                         'WholeOutput_CNN_TSIS')
            viz_obj.plot_2d_data_np_raw(
                [
                    np.flip(whole_cnn, axis=0),
                    np.flip(whole_y, axis=0),
                    np.flip(error, axis=0)
                ],
                # var_names=[F"CNN INC {x}" for x in output_fields] + [F"TSIS INC {x}" for x in output_fields] + [F'TSIS - CNN (Mean RMSE {rmse_cnn:0.4f} m)'],
                var_names=[F"CNN increment SSH" for x in output_fields] +
                [F"TSIS increment SSH" for x in output_fields] +
                [F'TSIS - CNN \n (Mean RMSE {rmse_cnn:0.4f} m)'],
                file_name=F"Global_WholeOutput_CNN_TSIS_{c_file}",
                rot_90=False,
                cmap=cmap_out + cmap_out + [error_cmap],
                cols_per_row=3,
                # title=F"{output_fields[0]} RMSE: {np.mean(rmse_cnn):0.5f} m.")
                title=F"SSH RMSE: {np.mean(rmse_cnn):0.5f} m.")

            print("DONE ALL FILES!!!!!!!!!!!!!")
    dic_summary = {
        "File": model_files,
        "rmse": all_whole_rmse,
        "times mean": all_whole_mean_times,
        "times sum": all_whole_sum_times,
    }
    df = pd.DataFrame.from_dict(dic_summary)
    df.to_csv(join(output_imgs_folder, "Global_RMSE_and_times.csv"))
Example #10
0
def data_gen_hycomtsis(paths,
                       file_names,
                       obs_path,
                       field_names,
                       obs_field_names,
                       output_field,
                       days_separation=1,
                       z_layers=[0]):
    """
    This generator should generate X and Y for a CNN
    :param path:
    :param file_names:
    :return:
    """
    ex_id = -1
    ids = np.arange(len(file_names))
    while True:
        # These lines are for sequential selection
        if ex_id < (len(ids) - 1):
            ex_id += 1
        else:
            ex_id = 0
            np.random.shuffle(
                ids
            )  # We shuffle the folders every time we have tested all the examples

        file_name = join(paths[ex_id], file_names[ex_id])
        date_str = file_names[ex_id].split('.')[1]  # This should be the date
        date_array = date_str.split('_')

        year = int(date_array[0])
        day_of_year = int(date_array[1])
        month, day_month = get_month_and_day_of_month_from_day_of_year(
            day_of_year, year)

        # Verify next time exist
        cur_date = date(year, month, day_month)
        desired_date = date(year, month,
                            day_month) + timedelta(days=days_separation)
        desired_file_name = F'archv.{desired_date.year}_{get_day_of_year_from_month_and_day(desired_date.month, desired_date.day):03d}_00.a'

        if not (exists(join(paths[ex_id], desired_file_name))):
            print(F"Warning! File {desired_file_name} doesn't exist")
            continue

        # try:
        # *********************** Reading DA files **************************
        input_fields_da = read_hycom_output(file_name,
                                            field_names,
                                            layers=z_layers)
        output_field_da = read_hycom_output(join(paths[ex_id],
                                                 desired_file_name),
                                            [output_field],
                                            layers=z_layers)

        # *********************** Reading Obs file **************************
        # TODO Hardcoded text "WITH_PIES"
        obs_file_name = join(
            obs_path, "WITH_PIES",
            F"tsis_obs_ias_{desired_date.year}{desired_date.month:02d}{desired_date.day:02d}00.nc"
        )
        if not (exists(obs_file_name)):
            print(F"Warning! Observation file doesn't exist {obs_file_name}")
            continue

        # ******************* Normalizing and Cropping Data *******************
        # TODO hardcoded dimensions and cropping code
        input_fields_obs = read_netcdf(obs_file_name, obs_field_names,
                                       z_layers)
        # dims = input_fields_da[field_names[0]].shape
        rows = 888
        cols = 1400
        num_fields = 8

        data_cube = np.zeros((rows, cols, num_fields))

        id_field = 0
        for c_field in field_names:
            # data_cube[id_field, :, :] = input_fields_da[c_field][0, :, :]
            data_cube[:, :,
                      id_field] = (input_fields_da[c_field][0, :rows, :cols] -
                                   MIN_DA[c_field]) / MAX_DA[c_field]
            id_field += 1

        for c_field in obs_field_names:
            # if len(input_fields_obs[c_field].shape) == 3:
            # data_cube[id_field, :, :] = input_fields_obs[c_field][0, :, :]
            # data_cube[:, :, id_field] = input_fields_obs[c_field][0, :rows, :cols]
            if len(input_fields_obs[c_field].shape) == 2:
                # data_cube[id_field, :, :] = input_fields_obs[c_field][:, :]
                data_cube[:, :,
                          id_field] = (input_fields_obs[c_field][:rows, :cols]
                                       - MIN_OBS[c_field]) / MAX_OBS[c_field]
            id_field += 1

        # ******************* Replacing nan values *********

        # Only use slices that have data (lesion inside)
        X = np.expand_dims(data_cube, axis=0)
        Y = np.expand_dims(np.expand_dims(
            output_field_da[output_field][0, :rows, :cols], axis=2),
                           axis=0)

        X = np.nan_to_num(X, nan=-1)
        Y = np.nan_to_num(Y, nan=-1)

        # img_viz.plot_3d_data_singlevar_np(np.swapaxes(np.swapaxes(X[0],0,2), 1,2),
        #                               z_levels=range(len(field_names+obs_field_names)),
        #                                title='Input NN',
        #                                file_name_prefix=F'{year}_{month:02d}_{day_month:02d}',
        #                                   flip_data=True)
        #
        # img_viz.plot_3d_data_singlevar_np(np.swapaxes(np.swapaxes(Y[0],0,2), 1,2),
        #                               z_levels=[0],
        #                                title='Input NN',
        #                                file_name_prefix=F'output_{year}_{month:02d}_{day_month:02d}',
        #                                   flip_data=True)

        yield X, Y
Example #11
0
def ComputeOverallMinMaxVar():
    """
    Computes the mean, max and variance for all the fields in the files
    :return:
    """
    config = get_training_2d()
    input_folder = config[ProjTrainingParams.input_folder_preproc]
    fields = config[ProjTrainingParams.fields_names]
    fields_obs = config[ProjTrainingParams.fields_names_obs]

    max_values_model = {field: 0 for field in fields}
    min_values_model = {field: 10**5 for field in fields}
    max_values_obs = {field: 0 for field in fields_obs}
    min_values_obs = {field: 10**5 for field in fields_obs}
    max_values_inc = {field: 0 for field in fields}
    min_values_inc = {field: 10**5 for field in fields}

    mean_values_model = {field: 0 for field in fields}
    mean_values_obs = {field: 0 for field in fields_obs}
    mean_values_inc = {field: 0 for field in fields}

    var_values_model = {field: 0 for field in fields}
    var_values_obs = {field: 0 for field in fields_obs}
    var_values_inc = {field: 0 for field in fields}

    # These are the data assimilated files
    all_files = os.listdir(input_folder)
    all_files.sort()
    model_files = np.array([x for x in all_files if x.startswith('model')])
    model_files.sort()

    # model_files = model_files[55:58]
    tot_files = len(model_files)

    # Iterate over all the model files
    for id_file, c_file in enumerate(model_files):
        print(F"Working with {c_file}")
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        z_layers = [0]
        input_fields_model = read_netcdf(model_file, fields, z_layers)
        input_fields_obs = read_netcdf(obs_file, fields_obs, z_layers)
        output_field_increment = read_netcdf(inc_file, fields, z_layers)

        # =============== Computing max values for the model
        for idx_field, c_field_name in enumerate(fields):
            da_np_c_field = input_fields_model[c_field_name]
            # Computing mean also
            mean_values_model[c_field_name] += np.nanmean(
                da_np_c_field) / tot_files
            c_max = np.nanmax(da_np_c_field)
            c_min = np.nanmin(da_np_c_field)
            if c_max >= max_values_model[c_field_name]:
                max_values_model[c_field_name] = c_max
            if c_min <= min_values_model[c_field_name]:
                min_values_model[c_field_name] = c_min
        # print(F"Cur max for model: {max_values_model}")
        # print(F"Cur max for model: {min_values_model}")

        # =============== Computing max values for the observations
        for idx_field, c_field_name in enumerate(fields_obs):
            da_np_c_field = input_fields_obs[c_field_name]
            # We needed to add this try because in some cases there are none observations, like in day 245
            try:
                mean_values_obs[c_field_name] += np.nanmean(
                    da_np_c_field) / tot_files
            except Exception as e:
                mean_values_obs[c_field_name] += 0
            print(F' {c_file}:{c_field_name}: {mean_values_obs[c_field_name]}')

            c_max = np.nanmax(da_np_c_field)
            c_min = np.nanmin(da_np_c_field)
            if c_max >= max_values_obs[c_field_name]:
                max_values_obs[c_field_name] = c_max
            if c_min <= min_values_obs[c_field_name]:
                min_values_obs[c_field_name] = c_min
        # print(F"Cur max for obs: {max_values_obs}")
        # print(F"Cur min for obs: {min_values_obs}")

        # =============== Computing max values for the increment
        for idx_field, c_field_name in enumerate(fields):
            da_np_c_field = output_field_increment[c_field_name]
            # Computing mean also
            mean_values_inc[c_field_name] += np.nanmean(
                da_np_c_field) / tot_files
            c_max = np.nanmax(da_np_c_field)
            c_min = np.nanmin(da_np_c_field)
            if c_max >= max_values_inc[c_field_name]:
                max_values_inc[c_field_name] = c_max
            if c_min <= min_values_inc[c_field_name]:
                min_values_inc[c_field_name] = c_min
        # print(F"Cur max for inc: {max_values_inc}")
        # print(F"Cur min for inc: {min_values_inc}")

    # Computing STD
    print("=============================== Computing Variance....")
    for id_file, c_file in enumerate(model_files):
        print(F"Working with {c_file}")
        # Find current and next date
        year = int(c_file.split('_')[1])
        day_of_year = int(c_file.split('_')[2].split('.')[0])

        model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc')
        inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc')
        obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc')

        # *********************** Reading files **************************
        z_layers = [0]
        input_fields_model = read_netcdf(model_file, fields, z_layers)
        input_fields_obs = read_netcdf(obs_file, fields_obs, z_layers)
        output_field_increment = read_netcdf(inc_file, fields, z_layers)

        # =============== Computing max values for the model
        for idx_field, c_field_name in enumerate(fields):
            da_np_c_field = input_fields_model[c_field_name]
            var_values_model[c_field_name] += np.nanmean(
                (da_np_c_field - mean_values_model[c_field_name])**
                2) / tot_files

        # =============== Computing max values for the observations
        for idx_field, c_field_name in enumerate(fields_obs):
            da_np_c_field = input_fields_obs[c_field_name]
            data = (da_np_c_field[:].filled(np.nan) -
                    mean_values_obs[c_field_name])**2
            if (np.logical_not(np.isnan(data)).any()):
                var_values_obs[c_field_name] += np.nanmean(data) / tot_files
            # print(F' {c_file}:{c_field_name}: {var_values_obs[c_field_name]}')

        # =============== Computing max values for the increment
        for idx_field, c_field_name in enumerate(fields):
            da_np_c_field = output_field_increment[c_field_name]
            var_values_inc[c_field_name] += np.nanmean(
                (da_np_c_field - mean_values_inc[c_field_name])**2) / tot_files

    print("----------------- Model --------------------")
    f = open("MIN_MAX_MEAN_STD.csv", 'w')
    text = F"TYPE,Field,MIN,MAX,MEAN,VARIANCE,STD\n"
    f.write(text)

    for c_field_name in fields:
        text = F"MODEL,{c_field_name},  {min_values_model[c_field_name]:0.6f},  {max_values_model[c_field_name]:0.6f}, " \
               F" {mean_values_model[c_field_name]:0.6f}, {var_values_model[c_field_name]: 0.6f}, {np.sqrt(var_values_model[c_field_name]): 0.6f}\n"
        f.write(text)
        print(text)

    print("----------------- Observations --------------------")
    for c_field_name in fields_obs:
        text = F"OBS,{c_field_name},  {min_values_obs[c_field_name]:0.6f},  {max_values_obs[c_field_name]:0.6f}, " \
            F" {mean_values_obs[c_field_name]:0.6f}, {var_values_obs[c_field_name]: 0.6f}, {np.sqrt(var_values_obs[c_field_name]): 0.6f}\n"
        f.write(text)
        print(text)

    print("----------------- Increment --------------------")
    for c_field_name in fields:
        text = F"INC,{c_field_name},  {min_values_inc[c_field_name]:0.6f},  {max_values_inc[c_field_name]:0.6f}," \
        F" {mean_values_inc[c_field_name]:0.6f}, {var_values_inc[c_field_name]: 0.6f}, {np.sqrt(var_values_inc[c_field_name]): 0.6f}\n"
        f.write(text)
        print(text)

    f.close()