raw_series = dict()
        for station, obs in observations.iterrows():
            a = obs_data[obs['station']]
            try:
                kw = dict(k=10, max_dist=0.04, min_var=0.01)
                args = cube, tree, obs.lon, obs.lat
                series, dist, idx = get_nearest_water(*args, **kw)
            # RuntimeError may occurs, but you should run it again!
            except ValueError as e:
                log.warning(e)
                continue
            if not series:
                status = "Found Land"
            else:
                raw_series.update({obs['station']: series})
                series = as_series(series)
                status = "Found Water"
                ax.plot(lon[idx], lat[idx], 'g.')

            log.info('[{}] {}'.format(status, obs.name))

        if raw_series:  # Save cube.
            for station, cube in raw_series.items():
                cube = standardize_fill_value(cube)
                cube = add_station(cube, station)
            try:
                cube = iris.cube.CubeList(raw_series.values()).merge_cube()
            except MergeError as e:
                log.warning(e)

            ensure_timeseries(cube)
Exemple #2
0
def save_results(regridded_profiles, tracer_columns, params):
    """
    Save (merged) results.
    """
    
    # SET OUTPUT FILE NAMES
    out_profiles_basename = string.replace(
        params['out_profiles_basename'], "*", params['station_name']
    )
    out_columns_basename = string.replace(
        params['out_columns_basename'], "*", params['station_name'])

    if params['out_dir'] is None:
        params['out_dir'] = os.path.abspath(os.getcwd())
    out_profiles_basepath = os.path.join(params['out_dir'],
                                         out_profiles_basename)
    out_columns_basepath = os.path.join(params['out_dir'],
                                        out_columns_basename)
    
    out_profiles_cubes_fn = out_profiles_basepath + '.nc'
    out_columns_cubes_fn = out_columns_basepath + '.nc'
    
    out_profiles_tables_fn = '.'.join([out_profiles_basepath,
                                       params['out_format']])
    out_columns_tables_fn = '.'.join([out_columns_basepath,
                                      params['out_format']])                                   
    

    # SAVE IN NETCDF FILES (CUBES)
    # make altitude as dimension coord
    for profile_cube in regridded_profiles:
        if isinstance(profile_cube.coord('altitude'), iris.coords.AuxCoord):
            iris_tools.permute_dim_aux_coords(profile_cube,
                                              'model_level_number',
                                              'altitude')

    for cube in regridded_profiles:
        iris_tools.fix_cube_attributes_vals(cube)

    for cube in tracer_columns:
        iris_tools.fix_cube_attributes_vals(cube)

    iris.save(regridded_profiles, out_profiles_cubes_fn)
    iris.save(tracer_columns, out_columns_cubes_fn)
    
    # SAVE AS DATA TABLES (EXCEL, CSV...)
    dataframe_profiles = {}
    for profile in regridded_profiles:
        profile_cube = profile.copy()

        # there must be only one defined dimension coordinate for each
        # cube dimension (no auxilliary coordinate (convert iris to pandas)
        z_dim = profile_cube.coord_dims(profile_cube.coord(name='altitude'))
        iris_tools.remove_dim_aux_coords(profile_cube, z_dim)

        dataframe_units = str(profile_cube.units).replace('/', '_')
        if dataframe_units == 'unknown':
            dataframe_units = profile_cube.attributes['no_udunits2']
        dataframe_name = "{tracer}_{units}".format(
            tracer=profile_cube.attributes['name'],
            units=dataframe_units
        )

        # scalar time coordinate
        if profile_cube.ndim == 1:
            series = ipandas.as_series(profile)
            time_coord = profile_cube.coord('time')  
            date = time_coord.units.num2date(time_coord.points[0])
            dataframe_profiles[dataframe_name] = pd.DataFrame({date : series}).transpose()
        # dimensional time coordinate
        else:
            dataframe_profiles[dataframe_name] = ipandas.as_data_frame(profile_cube)

    panel_profiles = pd.Panel(dataframe_profiles).astype(np.float64)

    series_columns = {}
    for column in tracer_columns:
        series_name = "{tracer}_{units}".format(
            tracer=column.attributes['name'],
            units='molec_cm-2'
        )
        series_columns[series_name] = ipandas.as_series(column)
        time_coord = column.coord('time')  
        date = time_coord.units.num2date(time_coord.points)
        series_columns[series_name].index = date

    dataframe_columns = pd.DataFrame(series_columns).astype(np.float64)

    if params['out_format'] in ('hdf', 'hdf5'):
        panel_profiles.to_hdf(out_profiles_tables_fn, 'profiles')
        dataframe_columns.to_hdf(out_columns_tables_fn, 'columns')
    
    elif params['out_format'] in ('xls', 'xlsx'):
        panel_profiles.to_excel(out_profiles_tables_fn, 'profiles')
        dataframe_columns.to_excel(out_columns_tables_fn, 'columns')

    elif params['out_format'] == 'csv':
        for pr in panel_profiles:
            panel_profiles[pr].to_csv('{0}_{1}.csv'
                                      .format(out_profiles_basepath, pr))
        dataframe_columns.to_csv(out_columns_tables_fn)
    
    return ([panel_profiles, dataframe_columns],
            [out_profiles_cubes_fn, out_columns_cubes_fn,
             out_profiles_tables_fn, out_columns_tables_fn])
def create_ts_both_graphs(calc, calc2, variable, analysis, save_out):
    """
    Used to plot the control and future run in one graph
    :param calc: control cube
    :param calc2: future cube
    :param variable: variable
    :param analysis: analysis string
    :return: Plot
    """
    # Construct figure and axes
    sns.set(rc={'figure.figsize': (11, 4)})

    # Convert cube to pandas series dataframe
    pd_data, pd_data2 = None, None
    try:
        pd_data = as_series(calc)
        pd_data2 = as_series(calc2)
    except Exception:
        print(
            "WARNING in function create_timeseries: cannot construct timeseries with more than 1-dimension cube."
        )
        return None

    # Get x values in series
    indices, indices2 = pd_data.index, pd_data2.index
    if len(indices) == 1:
        print(
            "WARNING in function create_timeseries: cannot construct timeseries of one value."
        )
        return None
    # Save dates that are in datetime format
    new_indices, new_indices2 = [], []
    for i in range(len(indices)):
        # Convert cftime.datetime to datetime
        dt = convert_cftime_datetime(indices[i])
        dt2 = convert_cftime_datetime(indices2[i])
        new_indices.append(dt)
        new_indices2.append(dt2)

    indx, indx2 = pd.DatetimeIndex(new_indices), pd.DatetimeIndex(new_indices2)
    # df = pd.Series(pd_data.to_numpy(), index=indx)
    df = pd.DataFrame(data=pd_data.to_numpy(), index=indx, columns=[variable])
    df2 = pd.DataFrame(data=pd_data2.to_numpy(),
                       index=indx2,
                       columns=[variable])

    # Get names of indexes for which column is > 1e+20
    indexNames = df[df[variable] >= 1e+20].index
    # Delete these row indexes from dataFrame
    df.drop(indexNames, inplace=True)
    df.dropna(inplace=True)
    # Get names of indexes for which column is > 1e+20
    indexNames = df2[df2[variable] >= 1e+20].index
    # Delete these row indexes from dataFrame
    df2.drop(indexNames, inplace=True)
    df2.dropna(inplace=True)

    # Plot the timeseries
    fig, axs = plt.subplots(1, 1)
    plt.tight_layout()
    title_name = "Control and future run of spatial " + analysis + " of " + calc.name(
    )

    # Timeseries
    axs = df.plot(title=title_name, grid=True, alpha=0.7, color='b', ls='-')
    df2.plot(ax=axs, color='g', ls='-')
    axs.legend(["Control", "Future"])
    axs.set_xlabel("Time (month/year)")
    axs.set_ylabel(calc.name())

    if save_out:
        file_name = make_into_file_name(title_name)
        plt.savefig(os.path.join(directories.ANALYSIS, file_name))
        print("Timeseries plot is saved in the " + directories.ANALYSIS +
              " folder as a png file.")
def create_timeseries_helper(cube, variable, start_date, end_date, time_str,
                             monthly, title_name, save_out, spatial,
                             second_date_given, plot, analysis, start2, end2):
    """
    Create timeseries helper function
    """

    # Convert cube to pandas series dataframe
    pd_data = None
    try:
        pd_data = as_series(cube)
    except Exception:
        print(
            "WARNING in function create_timeseries: cannot construct timeseries with more than 1-dimension cube."
        )
        return None

    # Get x values in series
    indices = pd_data.index
    if len(indices) == 1:
        print(
            "WARNING in function create_timeseries: cannot construct timeseries of one value."
        )
        return None
    # Save dates that are in datetime format
    new_indices = []
    for i in range(len(indices)):
        # Convert cftime.datetime to datetime
        dt = convert_cftime_datetime(indices[i])
        new_indices.append(dt)

    indx = pd.DatetimeIndex(new_indices)
    data = np.asarray(cube.data)
    # df = pd.Series(pd_data.to_numpy(), index=indx)
    df = pd.DataFrame(data=data, index=indx, columns=[variable])

    # Get names of indexes for which column is > 1e+20
    indexNames = df[df[variable] >= 1e+20].index
    # Delete these row indexes from dataFrame
    df.drop(indexNames, inplace=True)
    df.dropna(inplace=True)

    # Save table to file for each variable
    # Construct file name
    date_str = " " + time_str + "_multimodel"
    if not second_date_given:
        date_str = " " + time_str + " "

    file_name = "ts_" + analysis + '_' + cube.name() + date_str + "_" + str(
        start_date[2]) + "-" + str(end_date[2])
    if second_date_given:
        file_name = file_name + "_" + str(start2[2]) + '-' + str(
            end2[2]) + '_multi_model'
    file_name = make_into_file_name(file_name)
    if save_out:
        # np.savetxt(file_name, df.values, fmt='%d', comments="dates  " + cube.name())
        df.to_csv(os.path.join(directories.ANALYSIS, file_name + '.txt'),
                  sep=',',
                  index=True,
                  index_label='dates')
        print("Timeseries data is saved in the " + directories.ANALYSIS +
              " folder as a txt file.")

    if plot:
        # Construct figure and axes
        sns.set(rc={'figure.figsize': (11, 4)})
        num_years = len(np.unique(indx.year))
        # Plot the timeseries
        fig, axs = plt.subplots(1, 1)
        # Timeseries
        df.plot(ax=axs,
                title=title_name,
                grid=True,
                legend=False,
                alpha=0.7,
                color='m',
                ls='-')
        if not monthly:
            df.resample('BM').mean().plot(ax=axs, style='-')
        rolling_str = 'one-year rolling mean'

        if num_years > 3:
            if monthly:
                df.rolling(12).mean().plot(ax=axs, style='-')
            else:
                df.rolling(365).mean().plot(ax=axs, style='-')
        if not monthly and num_years > 3:
            axs.legend(['input', 'monthly mean', rolling_str],
                       loc='upper left')
        elif monthly and num_years > 3:
            axs.legend(['input', rolling_str], loc='upper left')
        elif num_years <= 2 and monthly:
            axs.legend(['input'], loc='upper left')
        elif num_years <= 2:
            axs.legend(['input', 'monthly mean'], loc='upper left')

        if second_date_given:
            axs.set_xlabel("Time (the control year)")
        else:
            axs.set_xlabel("Time (month/year)")
        axs.set_ylabel(cube.name())

        if save_out:
            plt.savefig(os.path.join(directories.ANALYSIS, file_name))
            print("Timeseries plot is saved in the " + directories.ANALYSIS +
                  " folder as a png file.")

        # Boxplot - Yearly seasonality
        fig, axs1 = plt.subplots(1, 1)
        df['Month'] = df.index.month
        sns.boxplot(ax=axs1, data=df, x='Month', y=variable)
        axs1.set_title("Yearly seasonality of " + cube.name())
        axs1.set_xlabel("Months")
        axs1.set_ylabel(cube.name())

        if save_out:
            file_name = "bp_" + analysis + '_' + cube.name() + date_str
            file_name = make_into_file_name(file_name)
            plt.savefig(os.path.join(directories.ANALYSIS, file_name))
            print("Box plot is saved in the " + directories.ANALYSIS +
                  " folder as a png file.")
def create_timeseries(list_ens,
                      start_date,
                      end_date,
                      variables,
                      monthly=False,
                      save_out=True,
                      ens_num=1,
                      func_name=None,
                      second_date_given=False,
                      plot=None):
    """
    Analysis the data given - in this case it computes the timeseries (assumes grid/sample point)
    :param list_ens: the list of ensembles (dicts) containing the data of the climate variables
    :param start_date extract from end date from data, list [d, m, y]
    :param end_date: extract till end date from data, list [d, m, y]
    :param variables: If one variable - then have 1D histogram. If list of 2 variables, then 2D histogram
    :param monthly: data is stored in monthly increments (time = 12) else assumed (time = 365)
    :param save_out: if set, then save output of histogram/ rimeseries
    :param ens_num: selection of ensemble to use
    :param func_name: if user function analysis used, this is the function name
    :param second_date_given: if set, multi model averages calculated
    """

    # Make sure data structures are not empty
    assert list_ens is not None
    assert variables is not None

    # If variables is just one object, cast to list
    if not isinstance(variables, list):
        variables = [variables]

    # Daily or monthly
    time_str = "daily"
    if monthly:
        time_str = "monthly"

    # Construct figure and axes
    sns.set(rc={'figure.figsize': (11, 4)})
    # Convert pandas dates to matplotlib date format
    register_matplotlib_converters()

    for variable in variables:
        # Get cube from dictionary
        cube = list_ens[ens_num - 1][variable]
        # Construct title name
        title_name = cube.name() + " measured " + time_str + " between " + str(
            start_date[2]) + " and " + str(end_date[2])

        if func_name is not None:
            title_name = func_name + " of " + title_name
        if second_date_given and func_name is not None:
            title_name = "Multi model " + title_name
        # Convert cube to pandas series dataframe
        pd_data = None
        try:
            pd_data = as_series(cube)
        except Exception:
            print(
                "WARNING in function create_timeseries: cannot construct timeseries with more than 1-dimension cube."
            )
            return None

        # Get x values in series
        indices = pd_data.index
        if len(indices) == 1:
            print(
                "WARNING in function create_timeseries: cannot construct timeseries of one value."
            )
            return None
        # Save dates that are in datetime format
        new_indices = []
        selected_indices = []
        for i in range(len(indices)):
            # Convert cftime.datetime to datetime
            dt = convert_cftime_datetime(indices[i])
            new_indices.append(dt)
            # Get first of the year
            if dt.month == 1 and dt.day == 1:
                selected_indices.append(indices[i])

        indx = pd.DatetimeIndex(new_indices)
        data = np.asarray(cube.data)
        df = pd.DataFrame(data=data, index=indx, columns=[variable])

        # Save table to file for each variable
        # Construct file name
        date_str = " " + time_str + " " + str(start_date[2]) + "_" + str(
            end_date[2])
        file_name = "ts_" + cube.name() + date_str
        file_name = make_into_file_name(file_name)
        if save_out:
            # np.savetxt(file_name, df.values, fmt='%d', comments="dates  " + cube.name())
            df.to_csv(os.path.join(directories.ANALYSIS, file_name + '.txt'),
                      sep=',',
                      index=True,
                      index_label='dates')
            print("Timeseries data is saved in the " + directories.ANALYSIS +
                  " folder as a txt file.")

        if plot:
            num_years = len(np.unique(indx.year))
            # Plot the timeseries
            fig, axs = plt.subplots(1, 1)
            # Timeseries
            df.plot(ax=axs,
                    title=title_name,
                    grid=True,
                    legend=False,
                    alpha=0.7,
                    color='m',
                    ls='-')
            if not monthly:
                df.resample('BM').mean().plot(ax=axs, style='-')
            rolling_str = 'one-year rolling mean'
            if num_years > 3:
                if monthly:
                    df.rolling(12).mean().plot(ax=axs, style='-')
                else:
                    df.rolling(365).mean().plot(ax=axs, style='-')
            if not monthly and num_years > 3:
                axs.legend(['input', 'monthly mean', rolling_str],
                           loc='upper left')
            elif monthly and num_years > 3:
                axs.legend(['input', rolling_str], loc='upper left')
            elif num_years <= 2 and monthly:
                axs.legend(['input'], loc='upper left')
            elif num_years <= 2:
                axs.legend(['input', 'monthly mean'], loc='upper left')
            axs.set_xlabel("Time (month/year)")
            axs.set_ylabel(cube.name())

            if save_out:
                plt.savefig(os.path.join(directories.ANALYSIS, file_name))
                print("Timeseries plot is saved in the " +
                      directories.ANALYSIS + " folder as a png file.")

            # Boxplot - Yearly seasonality
            fig, axs1 = plt.subplots(1, 1)
            df['Month'] = df.index.month
            sns.boxplot(ax=axs1, data=df, x='Month', y=variable)
            axs1.set_title("Yearly seasonality of " + cube.name())
            axs1.set_xlabel("Months")
            axs1.set_ylabel(cube.name())
         kw = dict(k=10, max_dist=0.08, min_var=0.01)
         args = cube, tree, obs["lon"], obs["lat"]
         try:
             series, dist, idx = get_nearest_water(*args, **kw)
         except RuntimeError as e:
             print("Cannot download {!r}.\n{}".format(cube, e))
             series = None
     except ValueError:
         status = "No Data"
         print("[{}] {}".format(status, obs["station_name"]))
         continue
     if not series:
         status = "Land   "
     else:
         raw_series.update({station: series})
         series = as_series(series)
         status = "Water  "
     print("[{}] {}".format(status, obs["station_name"]))
 if raw_series:  # Save cube.
     for station, cube in raw_series.items():
         cube = add_station(cube, station)
     try:
         cube = iris.cube.CubeList(raw_series.values()).merge_cube()
     except MergeError as e:
         print(e)
     ensure_timeseries(cube)
     try:
         iris.save(cube, fname)
     except AttributeError:
         # FIXME: we should patch the bad attribute instead of removing everything.
         cube.attributes = {}
Exemple #7
0
print cl

# <codecell>

fig, ax = plt.subplots(figsize=(12, 3.5))
qplt.plot(cl[2], label=cl[2].name())
plt.grid()

# <headingcell level=2>

# You can also convert Iris cube object to a Pandas Series object

# <codecell>

from iris.pandas import as_cube, as_series, as_data_frame
df = as_series(cl[2])
df.head()

# <codecell>

df.plot(figsize=(12,3.5));

# <codecell>

df.describe()

# <codecell>