Esempio n. 1
0
def img_generation_hycom(proc_id):
    """
    Makes images of the available data (Free run, DA and Observations)
    :param proc_id:
    :return:
    """
    config = get_preproc_config()
    input_folder_tsis = config[PreprocParams.input_folder_tsis]
    input_folder_forecast = config[PreprocParams.input_folder_hycom]
    input_folder_obs = config[PreprocParams.input_folder_obs]
    output_folder = config[PreprocParams.imgs_output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    fields_obs = config[PreprocParams.fields_names_obs]
    plot_modes = config[PreprocParams.plot_modes_per_field]
    layers = config[PreprocParams.layers_to_plot]

    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # Iterate current year
    for c_year in YEARS:
        # Iterate current month
        for c_month in MONTHS:
            try:
                days_of_month, days_of_year = get_days_from_month(c_month)
                # Reads the data (DA, Free run, and observations)
                hycom_files, hycom_paths = get_hycom_file_name(
                    input_folder_forecast, c_year, c_month)
            except Exception as e:
                print(F"Failed to find any file for date {c_year}-{c_month}")
                continue

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                if (c_day_of_month % NUM_PROC) == proc_id:
                    # Makes regular expression of the current desired file
                    re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    try:
                        # Gets the proper index of the file for the three cases
                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_hycom, file) != None
                        ][0]
                    except Exception as e:
                        print(
                            F"ERROR: The file for date {c_year} - {c_month} - {c_day_of_month} doesn't exist: {e}"
                        )
                        continue

                    print(
                        F" =============== Working with: {hycom_files[hycom_file_idx]} ============= "
                    )
                    print(
                        F"Available fields: {read_field_names(hycom_paths[hycom_file_idx])}"
                    )
                    model_state_np_fields = read_hycom_fields(
                        hycom_paths[hycom_file_idx], fields, layers=layers)
                    for idx_field, c_field_name in enumerate(fields):
                        model_state_np_c_field = model_state_np_fields[
                            c_field_name]
                        title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}"
                        # ======================= Only Fredatae HYCOM, TSIS, Observations ==================
                        img_viz.plot_3d_data_np(
                            [model_state_np_c_field],
                            var_names=[F'HYCOM'],
                            title=title,
                            file_name_prefix=
                            F'HYCOM_{c_field_name}_{c_year}_{c_month:02d}_{c_day_of_month:02d}',
                            z_lavels_names=layers,
                            flip_data=True,
                            plot_mode=plot_modes[idx_field])
Esempio n. 2
0
def plot_raw_data_new(proc_id):
    """
    This code makes two plots: 1) model and increment 2) model, increment and observations
    Depending on which plot you want to make, it reads field_names and fields_names_obs from the PreprocConfig file
    :param proc_id:
    :return:
    """
    config = get_preproc_config()
    input_folder_tsis = config[PreprocParams.input_folder_tsis]
    input_folder_forecast = config[PreprocParams.input_folder_hycom]
    input_folder_obs = config[PreprocParams.input_folder_obs]
    output_folder = config[PreprocParams.imgs_output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    fields_obs = config[PreprocParams.fields_names_obs]
    plot_modes = config[PreprocParams.plot_modes_per_field]
    layers = config[PreprocParams.layers_to_plot]

    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # Iterate current year
    for c_year in YEARS:
        # Iterate current month
        for c_month in MONTHS:
            try:
                days_of_month, days_of_year = get_days_from_month(c_month)
                # Reads the data (DA, Free run, and observations)
                increment_files, increment_paths = get_hycom_file_name(
                    input_folder_tsis, c_year, c_month)
                hycom_files, hycom_paths = get_hycom_file_name(
                    input_folder_forecast, c_year, c_month, day_idx=2)
                obs_files, obs_paths = get_obs_file_names(
                    input_folder_obs, c_year, c_month)
            except Exception as e:
                print(F"Failed to find any file for date {c_year}-{c_month}")
                continue

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                if (c_day_of_month % NUM_PROC) == proc_id:
                    # Makes regular expression of the current desired file
                    re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_hycom = F'020_archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    # re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    # re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'
                    re_obs = F'tsis_obs_gomb4_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'

                    try:
                        # Gets the proper index of the file for the three cases
                        increment_file_idx = [
                            i for i, file in enumerate(increment_files)
                            if re.search(re_tsis, file) != None
                        ][0]
                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_hycom, file) != None
                        ][0]
                        obs_file_idx = [
                            i for i, file in enumerate(obs_files)
                            if re.search(re_obs, file) != None
                        ][0]
                    except Exception as e:
                        print(
                            F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}"
                        )
                        continue

                    print(
                        F" =============== Working with: {increment_files[increment_file_idx]} ============= "
                    )
                    print(
                        F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}"
                    )
                    print(
                        F"Available fields on model: {read_field_names(hycom_paths[hycom_file_idx])}"
                    )
                    ds = xr.open_dataset(obs_paths[obs_file_idx])
                    print(
                        F"Available fields on observations: {print(list(ds.keys()))}"
                    )

                    model_state_np_fields = read_hycom_fields(
                        hycom_paths[hycom_file_idx], fields, layers=layers)
                    increment_np_fields = read_hycom_fields(
                        increment_paths[increment_file_idx],
                        fields,
                        layers=layers)

                    # obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs, rename_fields=fields)
                    obs_np_fields = read_netcdf(obs_paths[obs_file_idx],
                                                fields_obs)

                    # Iterate over the fields defined in PreprocConfig and plot them
                    for idx_field, c_field_name in enumerate(fields):
                        increment_np_c_field = increment_np_fields[
                            c_field_name]
                        nan_indx = increment_np_c_field == 0
                        increment_np_c_field[nan_indx] = np.nan
                        model_state_np_c_field = model_state_np_fields[
                            c_field_name]

                        # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field
                        # In these 2 cases, we only compute it for the surface layer
                        # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0]
                        # obs_np_c_field[502,609] - model_state_np_c_field[0][502,609]
                        # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0]

                        # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field)
                        # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0])
                        # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0])

                        if c_field_name == "thknss":
                            divide = 9806
                            model_state_np_c_field = model_state_np_c_field / divide
                            increment_np_c_field = increment_np_c_field / divide
                        if c_field_name == "srfhgt":
                            inc = increment_np_c_field
                        else:
                            inc = (model_state_np_c_field -
                                   increment_np_c_field)

                        # ======================= Only Background state and TSIS increment ==================
                        try:
                            title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}"
                            img_viz.plot_3d_data_np(
                                [model_state_np_c_field, inc],
                                # img_viz.plot_3d_data_np([model_state_np_c_field, increment_np_c_field],
                                var_names=['HYCOM', 'Increment (TSIS)'],
                                title=title,
                                file_name_prefix=
                                F'ModelAndIncrement_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}',
                                z_lavels_names=layers,
                                flip_data=True,
                                plot_mode=plot_modes[idx_field])
                        except Exception as e:
                            print(F"Failed for field: {c_field_name}: {e}")
Esempio n. 3
0
def plot_raw_data(proc_id):
    """
    Makes images of the available data (Free run, DA and Observations)
    :param proc_id:
    :return:
    """
    config = get_preproc_config()
    input_folder_tsis = config[PreprocParams.input_folder_tsis]
    input_folder_forecast = config[PreprocParams.input_folder_hycom]
    input_folder_obs = config[PreprocParams.input_folder_obs]
    output_folder = config[PreprocParams.imgs_output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    fields_obs = config[PreprocParams.fields_names_obs]
    plot_modes = config[PreprocParams.plot_modes_per_field]
    layers = config[PreprocParams.layers_to_plot]

    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # Iterate current year
    for c_year in YEARS:
        # Iterate current month
        for c_month in MONTHS:
            try:
                days_of_month, days_of_year = get_days_from_month(c_month)
                # Reads the data (DA, Free run, and observations)
                increment_files, increment_paths = get_hycom_file_name(
                    input_folder_tsis, c_year, c_month)
                hycom_files, hycom_paths = get_hycom_file_name(
                    input_folder_forecast, c_year, c_month)
                obs_files, obs_paths = get_obs_file_names(
                    input_folder_obs, c_year, c_month)
            except Exception as e:
                print(F"Failed to find any file for date {c_year}-{c_month}")
                continue

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                if (c_day_of_month % NUM_PROC) == proc_id:
                    # Makes regular expression of the current desired file
                    re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'

                    try:
                        # Gets the proper index of the file for the three cases
                        increment_file_idx = [
                            i for i, file in enumerate(increment_files)
                            if re.search(re_tsis, file) != None
                        ][0]
                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_hycom, file) != None
                        ][0]
                        obs_file_idx = [
                            i for i, file in enumerate(obs_files)
                            if re.search(re_obs, file) != None
                        ][0]
                    except Exception as e:
                        print(
                            F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}"
                        )
                        continue

                    print(
                        F" =============== Working with: {increment_files[increment_file_idx]} ============= "
                    )
                    print(
                        F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}"
                    )
                    increment_np_fields = read_hycom_fields(
                        increment_paths[increment_file_idx],
                        fields,
                        layers=layers)
                    model_state_np_fields = read_hycom_fields(
                        hycom_paths[hycom_file_idx], fields, layers=layers)
                    obs_np_fields = read_netcdf(obs_paths[obs_file_idx],
                                                fields_obs,
                                                layers=[0],
                                                rename_fields=fields)

                    for idx_field, c_field_name in enumerate(fields):
                        increment_np_c_field = increment_np_fields[
                            c_field_name]
                        nan_indx = increment_np_c_field == 0
                        increment_np_c_field[nan_indx] = np.nan
                        model_state_np_c_field = model_state_np_fields[
                            c_field_name]
                        obs_np_c_field = obs_np_fields[c_field_name]

                        # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field
                        # In these 2 cases, we only compute it for the surface layer
                        # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0]
                        obs_np_c_field[502,
                                       609] - model_state_np_c_field[0][502,
                                                                        609]
                        # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0]

                        # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field)
                        # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0])
                        # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0])

                        title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}"
                        # ======================= Only Fredatae HYCOM, TSIS, Observations ==================
                        img_viz.plot_3d_data_np(
                            [
                                np.expand_dims(obs_np_c_field, 0),
                                model_state_np_c_field, increment_np_c_field
                            ],
                            var_names=[
                                F'Observations', 'HYCOM', 'Increment (TSIS)'
                            ],
                            title=title,
                            file_name_prefix=
                            F'Summary_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}',
                            z_lavels_names=layers,
                            flip_data=True,
                            plot_mode=plot_modes[idx_field])
Esempio n. 4
0
def compute_consecutive_days_difference():
    """
    Computes the difference between consecutive days on the hycom files.
    :param proc_id:
    :return:
    """
    config = get_preproc_config()
    input_folder_forecast = config[PreprocParams.input_folder_hycom]
    output_folder = config[PreprocParams.imgs_output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    layers = config[PreprocParams.layers_to_plot]

    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # Iterate current year
    for c_year in YEARS:
        # Iterate current month
        diff_per_field = {field: [] for field in fields}
        days_with_data = []
        for c_month in MONTHS:
            # Reading the data
            try:
                days_of_month, days_of_year = get_days_from_month(c_month)
                # Reading hycom files
                hycom_files, hycom_paths = get_hycom_file_name(
                    input_folder_forecast, c_year, c_month)
            except Exception as e:
                print(F"Failed to find any file for date {c_year}-{c_month}")
                continue

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                print(
                    F"---------- Year {c_year} day: {c_day_of_year} --------------"
                )
                # Makes regular expression of the current desired file
                re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                re_hycom_prev = F'archv.{c_year}_{(c_day_of_year-1):03d}\S*.a'
                try:
                    # Gets the proper index of the file for the three cases
                    hycom_file_idx = [
                        i for i, file in enumerate(hycom_files)
                        if re.search(re_hycom, file) != None
                    ][0]
                    hycom_file_idx_prev = [
                        i for i, file in enumerate(hycom_files)
                        if re.search(re_hycom_prev, file) != None
                    ][0]
                except Exception as e:
                    print(
                        F"ERROR: The file for date {c_year} - {c_month} - {c_day_of_month} (and prev day) don't exist: {e}"
                    )
                    continue

                days_with_data.append(c_day_of_year)
                model_state_np_fields = read_hycom_fields(
                    hycom_paths[hycom_file_idx], fields, layers=layers)
                model_state_np_fields_prev = read_hycom_fields(
                    hycom_paths[hycom_file_idx_prev], fields, layers=layers)
                # Computes the difference between consecutive days from the desired fields
                for idx_field, c_field_name in enumerate(fields):
                    model_state_np_c_field = model_state_np_fields[
                        c_field_name]
                    model_state_np_c_field_prev = model_state_np_fields_prev[
                        c_field_name]
                    c_diff = np.abs(
                        np.nanmean(model_state_np_c_field_prev -
                                   model_state_np_c_field))
                    diff_per_field[c_field_name].append(c_diff)

        # Plots the differences between consecutive days. For all the fields together.
        img_viz.plot_1d_data_np(
            days_with_data, [diff_per_field[a] for a in diff_per_field.keys()],
            title='Difference between days',
            labels=fields,
            file_name_prefix='HYCOM_Diff_Between_Days',
            wide_ratio=4)
        # Plots the differences between consecutive days. Separated by fields
        for field in diff_per_field.keys():
            img_viz.plot_1d_data_np(
                days_with_data, [diff_per_field[field]],
                title=F'Difference between days {field}',
                labels=[field],
                file_name_prefix=F'HYCOM_Diff_Between_Days_{field}',
                wide_ratio=4)
Esempio n. 5
0
def preproc_data(proc_id):
    """
    This function preprocess the desired data. It does the following:
        1) Looks for dates where there is 'increment', model, and observations data.
        2) Saves the files on the same folder with only the 'desired' fields in netcdf format
    :param proc_id:
    :return:
    """
    print("Preprocessing data....")
    config = get_preproc_config()
    input_folder_increment = config[PreprocParams.input_folder_tsis]
    input_folder_model = config[PreprocParams.input_folder_hycom]
    input_folder_obs = config[PreprocParams.input_folder_obs]
    output_folder = config[PreprocParams.output_folder]
    YEARS = config[PreprocParams.YEARS]
    MONTHS = config[PreprocParams.MONTHS]
    fields = config[PreprocParams.fields_names]
    obs_fields = config[PreprocParams.fields_names_obs]
    layers = config[PreprocParams.layers_to_plot]
    img_viz = EOAImageVisualizer(output_folder=output_folder,
                                 disp_images=False)

    # These are the data assimilated files
    for c_year in YEARS:
        for c_month in MONTHS:
            print(
                F"=============== Year: {c_year}  Month: {c_month} ==========="
            )
            days_of_month, days_of_year = get_days_from_month(c_month)
            # Rads all the files for this month
            da_files, da_paths = get_hycom_file_name(input_folder_increment,
                                                     c_year, c_month)
            hycom_files, hycom_paths = get_hycom_file_name(
                input_folder_model, c_year, c_month)
            obs_files, obs_paths = get_obs_file_names(input_folder_obs, c_year,
                                                      c_month)

            # This for is fixed to be able to run in parallel
            for c_day_of_month, c_day_of_year in enumerate(days_of_year):
                if (c_day_of_month % NUM_PROC) == proc_id:
                    re_increment = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_model = F'archv.{c_year}_{c_day_of_year:03d}\S*.a'
                    re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc'

                    try:
                        da_file_idx = [
                            i for i, file in enumerate(da_files)
                            if re.search(re_increment, file) != None
                        ][0]
                        print(
                            F" =============== Working with: {da_files[da_file_idx]} Proc_id={proc_id} ============= "
                        )
                        da_np_fields = read_hycom_fields(da_paths[da_file_idx],
                                                         fields,
                                                         layers=layers)

                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_model, file) != None
                        ][0]
                        hycom_np_fields = read_hycom_fields(
                            hycom_paths[hycom_file_idx], fields, layers=layers)

                        # --------- Preprocessing Increment (TSIS) -------------
                        proc_increment_data(
                            da_np_fields, hycom_np_fields, fields,
                            join(output_folder,
                                 F"increment_{c_year}_{c_day_of_year:03d}.nc"))
                    except Exception as e:
                        print(
                            F"Warning: Increment file for date {c_year}-{c_month}-{c_day_of_month} ({re_increment}) doesn't exist: {e}"
                        )
                        # Only when the increment file is not found we go to the next day.
                        continue

                    try:
                        print(
                            F" --------------- Working with: {hycom_files[hycom_file_idx]} ------------- "
                        )
                        hycom_file_idx = [
                            i for i, file in enumerate(hycom_files)
                            if re.search(re_model, file) != None
                        ][0]
                        hycom_np_fields = read_hycom_fields(
                            hycom_paths[hycom_file_idx], fields, layers=layers)
                        # --------- Preprocessing HYCOM data -------------
                        proc_model_data(
                            hycom_np_fields, fields,
                            join(output_folder,
                                 F"model_{c_year}_{c_day_of_year:03d}.nc"))
                    except Exception as e:
                        print(
                            F"Warning: HYCOM file for date {c_year}-{c_month}-{c_day_of_month} ({re_model}) doesn't exist: {e}"
                        )

                    try:
                        obs_file_idx = [
                            i for i, file in enumerate(obs_files)
                            if re.search(re_obs, file) != None
                        ][0]
                        # --------- Preprocessing observed data -------------
                        print(
                            F" --------------- Working with: {hycom_files[hycom_file_idx]} ------------- "
                        )
                        obs_ds = xr.load_dataset(obs_paths[obs_file_idx])
                        for id_field, c_obs_field in enumerate(obs_fields):
                            if id_field == 0:
                                preproc_obs_ds = obs_ds[
                                    c_obs_field].to_dataset()
                            else:
                                preproc_obs_ds = preproc_obs_ds.merge(
                                    obs_ds[c_obs_field].to_dataset())

                        # --------------- Here we add the fields from the profiles as gridded data -----------
                        temp_group = 0
                        saln_group = 1
                        sst_p = np.zeros(
                            preproc_obs_ds[c_obs_field].values.shape)
                        sss_p = np.zeros(sst_p.shape)
                        profiles = obs_ds.val
                        tot_profiles = profiles.shape[0]
                        obs_groups = obs_ds.ob_grp_present

                        lons_i = obs_ds.grdi.values[:, 0, 0]
                        lats_i = obs_ds.grdj.values[:, 0, 0]
                        for i_group, c_type in enumerate(obs_groups):
                            if c_type == saln_group or c_type == temp_group:
                                for c_profile_i in range(tot_profiles):
                                    c_data = profiles[c_profile_i, -1, i_group]
                                    if c_type == saln_group:
                                        sss_p[
                                            int(lats_i[c_profile_i]),
                                            int(lons_i[c_profile_i])] = c_data
                                    if c_type == temp_group:
                                        sst_p[
                                            int(lats_i[c_profile_i]),
                                            int(lons_i[c_profile_i])] = c_data
                        print(F"Max value: {np.amax(sst_p)}")
                        print(F"Max value s: {np.amax(sss_p)}")
                        preproc_obs_ds['sst_p'] = xr.DataArray(
                            sst_p, dims=['yc', 'xc'])
                        preproc_obs_ds['sss_p'] = xr.DataArray(
                            sss_p, dims=['yc', 'xc'])
                        preproc_obs_ds.to_netcdf(
                            join(output_folder,
                                 F"obs_{c_year}_{c_day_of_year:03d}.nc"))
                    except Exception as e:
                        print(
                            F"Warning: OBS file for date {c_year}-{c_month}-{c_day_of_month} doesn't exist: {e}"
                        )