예제 #1
0
def update_time_in_NetCDF2save(ds, convert_time2dt=False):
    """
    Update time of monthly output to be in NetCDF saveable format

    Parameters
    -------
    convert_time2dt (bool): convert the time into a datetime.datetime format
    """
    # Climate model time
    sdate = datetime.datetime(1985, 1, 1)
    # Convert / setup time dim?
    if convert_time2dt:
        months = np.arange(1, 13)
        ds['time'] = [AC.add_months(sdate, i - 1) for i in months]
    # Update to hours since X
    hours = [(AC.dt64_2_dt([i])[0] - sdate).days * 24.
             for i in ds['time'].values]
    ds['time'] = hours
    attrs_dict = {'units': 'hours since 1985-01-01 00:00:00'}
    ds['time'].attrs = attrs_dict
    return ds
예제 #2
0
def plot_up_df_data_by_yr(df=None, Datetime_var='datetime', TimeWindow=5,
                          start_from_last_obs=False, drop_bins_without_data=True,
                          target='Iodide', dpi=320):
    """
    Plot up # of obs. data (Y) binned by region against year (X)

    Parameters
    -------
    df (pd.DataFrame): DataFrame of data with and a datetime variable
    target (str): Name of the target variable (e.g. iodide)
    TimeWindow (int): number years to bit observations over
    start_from_last_obs (bool): start from the last observational date
    drop_bins_without_data (bool): exclude bins with no data from plotting
    dpi (int): resolution of figure (dots per sq inch)

    Returns
    -------
    (None)
    """
    # Sort the dataframe by date
    df.sort_values( by=Datetime_var, inplace=True )
    # Get the minimum and maximum dates
    min_date = df[Datetime_var].min()
    max_date = df[Datetime_var].max()
    # How many years of data are there?
    yrs_of_data = (max_date-min_date).total_seconds()/60/60/24/365
    nbins = AC.myround(yrs_of_data/TimeWindow, base=1 )
    # Start from last observation or from last block of time
    sdate_block = AC.myround(max_date.year, 5)
    sdate_block =  datetime.datetime(sdate_block, 1, 1)
    # Make sure the dates used are datetimes
    min_date, max_date = pd.to_datetime( [min_date, max_date] ).values
    min_date, max_date = AC.dt64_2_dt( [min_date, max_date])
    # Calculate the number of points for each bin by region
    dfs = {}
    for nbin in range(nbins+2):
        # Start from last observation or from last block of time?
        days2rm = int(nbin*365*TimeWindow)
        if start_from_last_obs:
            bin_start = AC.add_days( max_date, -int(days2rm+(365*TimeWindow)))
            bin_end = AC.add_days( max_date, -days2rm )
        else:
            bin_start = AC.add_days( sdate_block,-int(days2rm+(365*TimeWindow)))
            bin_end = AC.add_days( sdate_block, -days2rm )
        # Select the data within the observational dates
        bool1 = df[Datetime_var] > bin_start
        bool2 = df[Datetime_var] <= bin_end
        df_tmp = df.loc[bool1 & bool2, :]
        # Print the number of values in regions for bin
        if verbose:
            print(bin_start, bin_end, df_tmp.shape)
        # String to save data with
        if start_from_last_obs:
            bin_start_str = bin_start.strftime( '%Y/%m/%d')
            bin_end_str = bin_end.strftime( '%Y/%m/%d')
        else:
            bin_start_str = bin_start.strftime( '%Y')
            bin_end_str = bin_end.strftime( '%Y')
        str2use = '{}-{}'.format(bin_start_str, bin_end_str)
        # Sum up the number of values by region
        dfs[ str2use] = df_tmp['ocean'].value_counts(dropna=False)
    # Combine to single dataframe and sort by date
    dfA = pd.DataFrame( dfs )
    dfA = dfA[list(sorted(dfA.columns)) ]
    # Drop the years without any data
    if drop_bins_without_data:
        dfA = dfA.T.dropna(how='all').T
    # Update index names
    dfA = dfA.T
    dfA.columns
    rename_cols = {
    np.NaN : 'Other',  'INDIAN OCEAN': 'Indian Ocean', 'SOUTHERN OCEAN' : 'Southern Ocean'
    }
    dfA = dfA.rename(columns=rename_cols)
    dfA = dfA.T
    # Plot up as a stacked bar plot
    import seaborn as sns
    sns.set()
    dfA.T.plot(kind='bar', stacked=True)
    # Add title etc
    plt.ylabel( '# of observations')
    plt.title( '{} obs. data by region'.format(target))
    # Save plotted figure
    savename = 's2s_{}_data_by_year_region'.format(target)
    plt.savefig(savename, dpi=dpi, bbox_inches='tight', pad_inches=0.05)
예제 #3
0
def mk_NetCDF_from_productivity_data():
    """
    Convert productivity .csv file (Behrenfeld and Falkowski, 1997) into a NetCDF file
    """
    # Location of data (update to use public facing host)
    folder = utils.get_file_locations('data_root') + '/Productivity/'
    # Which file to use?
    filename = 'productivity_behrenfeld_and_falkowski_1997_extrapolated.csv'
    # Setup coordinates
    lon = np.arange(-180, 180, 1/6.)
    lat = np.arange(-90, 90, 1/6.)
    lat = np.append(lat, [90])
    # Setup time
    varname = 'vgpm'
    months = np.arange(1, 13)
    # Extract data
    df = pd.read_csv(folder+filename, header=None)
    print(df.shape)
    # Extract data by month
    da_l = []
    for n in range(12):
        # Assume the data is in blocks by longitude?
        arr = df.values[:, n*1081: (n+1)*1081].T[None, ...]
        print(arr.shape)
        da_l += [xr.Dataset(
            data_vars={varname: (['time', 'lat', 'lon', ], arr)},
            coords={'lat': lat, 'lon': lon, 'time': [n]})]
    # Concatenate to data xr.Dataset
    ds = xr.concat(da_l, dim='time')
    # Update time ...
    sdate = datetime.datetime(1985, 1, 1)  # Climate model tiem
    ds['time'] = [AC.add_months(sdate, i-1) for i in months]
    # Update to hours since X
    hours = [(AC.dt64_2_dt([i])[0] - sdate).days *
             24. for i in ds['time'].values]
    ds['time'] = hours
    # Add units
    attrs_dict = {'units': 'hours since 1985-01-01 00:00:00'}
    ds['time'].attrs = attrs_dict
    # Add attributes for variable
    attrs_dict = {
        'long_name': "net primary production",
        'units': "mg C / m**2 / day",
    }
    ds[varname].attrs = attrs_dict
    # For latitude...
    attrs_dict = {
        'long_name': "latitude",
        'units': "degrees_north",
        "standard_name": "latitude",
        "axis": "Y",
    }
    ds['lat'].attrs = attrs_dict
    # And longitude...
    attrs_dict = {
        'long_name': "longitude",
        'units': "degrees_east",
        "standard_name": "longitude",
        "axis": "X",
    }
    ds['lon'].attrs = attrs_dict
    # Add extra global attributes
    global_attribute_dictionary = {
        'Title': 'Sea-surface productivity (Behrenfeld and Falkowski, 1997)',
        'Author': 'Tomas Sherwen ([email protected])',
        'Notes': "Data extracted from OCRA and extrapolated to poles by Martin Wadley. NetCDF contructed using xarray (xarray.pydata.org) by Tomas Sherwen. \n NOTES from oringal site (http://orca.science.oregonstate.edu/) from 'based on the standard vgpm algorithm. npp is based on the standard vgpm, using modis chl, sst4, and par as input; clouds have been filled in the input data using our own gap-filling software. For citation, please reference the original vgpm paper by Behrenfeld and Falkowski, 1997a as well as the Ocean Productivity site for the data.' ",
        'History': 'Last Modified on:' + strftime("%B %d %Y", gmtime()),
        'Conventions': "COARDS",
    }
    ds.attrs = global_attribute_dictionary
    # Save to NetCDF
    filename = 'productivity_behrenfeld_and_falkowski_1997_extrapolated.nc'
    ds.to_netcdf(filename, unlimited_dims={'time': True})
예제 #4
0
def combine_output2_1_file_per_day(runs2use,
                                   run_dict=None,
                                   res='4x5',
                                   version="0.1.1"):
    """
    Combine the PREFIA African AQ files into daily files for PREFIA inter-comparison
    """
    # format of outputted file
    FileStr = 'PREFIA_York_GEOSChem_{}_{}_{}_v{}'
    # file prefixes to use
    prefixes = [
        'HEMCO_diagnostics',
    ]
    GCprefixs = [
        'StateMet',
        'LevelEdgeDiags',
        'SpeciesConc',
        'WetLossLS',
        'Aerosols',
        'ConcAfterChem',
        'DryDep',
        'WetLossConv',
    ]
    prefixes += ['GEOSChem.' + i for i in GCprefixs]
    REFprefix = 'GEOSChem.StateMet'
    # Set ordering of prefixes
    # NOTE: use Met for base as it has all of the extra dimensions
    prefixes = [REFprefix] + [i for i in prefixes if i != REFprefix]
    # Loop by runs
    if isinstance(runs2use, type(None)):
        runs2use = run_dict.keys()
    for run in runs2use:
        folder = run_dict[run]
        print(run, folder)
        # Loop by prefix
        files4prefix = {}
        for prefix in prefixes:
            print(prefix)
            files4prefix[prefix] = get_files_in_folder4prefix(folder=folder,
                                                              prefix=prefix)
        # Check the number of files for each prefix.
        N_files = [len(files4prefix[i]) for i in files4prefix.keys()]
        if len(set(N_files)) == 1:
            print('WARNING: Different numbers of files for prefixes')
        # Get a list of dates
        dates = get_files_in_folder4prefix(folder=folder,
                                           prefix=REFprefix,
                                           rtn_dates4files=True)
        for day in dates:
            # Check all the files are present and that there is one of them
            #            day_str = day.strftime(format='%Y%m%d_%H%M')
            day_str = day.strftime(format='%Y%m%d')
            print(day_str)
            # Loop to get file names by prefix
            files2combine = []
            for prefix in prefixes:
                files = [i for i in files4prefix[prefix] if day_str in i]
                if len(files) != 1:
                    print('WARNING: more than one file found for prefix')
                print(day_str, prefix, files)
                #                files2combine += [ xr.open_dataset( file[0] ) ]
                files2combine += [files]
            # Remove the doubled up data variables
            vars2rm = ['AREA', 'P0', 'hybm', 'hyam', 'hyai', 'hybi', '']
            # Open first fileset as a single xarray dataset
            ds = xr.open_mfdataset(files2combine[0])
            # Now add other file sets to this
            for files in files2combine[1:]:
                print(files)
                dsNew = xr.open_mfdataset(files)
                # Make sure indices for levels are coordinate variables
                var2add = 'ilev'
                if var2add not in dsNew.coords:
                    print('Adding ilev to dataset')
                    if var2add not in dsNew.data_vars:
                        dsNew[var2add] = ds[var2add].values
                    dsNew = dsNew.set_coords(var2add)
                    dsNew = dsNew.assign_coords(ilev=ds[var2add].copy())
                var2add = 'lev'
                if var2add not in dsNew.coords:
                    print('Adding lev to dataset')
                    if var2add not in dsNew.data_vars:
                        dsNew[var2add] = ds[var2add].values
                    dsNew = dsNew.set_coords(var2add)
                    dsNew = dsNew.assign_coords(lev=ds[var2add])
                # Update the timestamp for the HEMCO files
                if all(['HEMCO_diagnostics' in i for i in files]):
                    dt = [
                        AC.add_hrs(i, 0.5)
                        for i in AC.dt64_2_dt(dsNew.time.values)
                    ]
                    dsNew.time.values = dt
                # Combine new data into a core file
                vars2add = [i for i in dsNew.data_vars if i not in vars2rm]
                ds = xr.merge([ds, dsNew[vars2add]])
                del dsNew
            # Remove OH and HO2 from SpeciesConc diag.
            ds = remove_OH_HO2_from_SpeciesConc(ds=ds)
            # Remove any unneeded spatial dimensions
            ds = remove_unneeded_vertical_dimensions(ds=ds)
            # Update any units to those requested for PREFIA...
            ds = convert_IC_ds_units(ds=ds)  # loosing units here
            # Add combined variables
            ds = add_combined_vars(ds=ds)
            # Only include requested parameters
            ds = only_inc_requested_vars(ds=ds)
            # Change any names
            ds = update_names_in_ds(ds=ds)
            # Update extents to be over Africa
            # (19.9W, 39.9S), and north-eastern cell centre is placed at (54.9W, 39.9N)
            ds = only_consider_domain_over_Africa(ds)
            # Update the global attributes
            ds = add_global_attributes4run(ds, run=run)
            # remove all the unneeded coordinates
            ds = ds.squeeze()
            # Now save the file for the given day
            filename = FileStr.format(res, run, day_str, version)
            ds.to_netcdf('{}/{}.nc'.format(folder, filename))
            # Remove the used files from memory
            del ds
            gc.collect()