Example #1
0
    def as_data_frame(self, copy=True):
        """
        Convert a GriddedData object to a Pandas DataFrame.

        :param copy: Create a copy of the data for the new DataFrame? Default is True.
        :return: A Pandas DataFrame representing the data and coordinates. Note that this won't include any metadata.
        """
        from iris.pandas import as_data_frame
        return as_data_frame(self, copy=copy)
Example #2
0
    def as_data_frame(self, copy=True):
        """
        Convert a GriddedData object to a Pandas DataFrame.

        :param copy: Create a copy of the data for the new DataFrame? Default is True.
        :return: A Pandas DataFrame representing the data and coordinates. Note that this won't include any metadata.
        """
        from iris.pandas import as_data_frame
        return as_data_frame(self, copy=copy)
def cube_to_df(cube_max):
    df = as_data_frame(cube_max, copy=True)
    df = df.reset_index()
    df['month'] = pd.DatetimeIndex(df['index']).month
    df['year'] = pd.DatetimeIndex(df['index']).year
    df.drop('index', axis=1, inplace=True)
    df = df.rename(columns={0: "Temperature"})
    df = df.query("year >= 1992 and year <=2015")

    return df
Example #4
0
def nc2df(fname):
    cube = iris.load_cube(fname)
    for coord in cube.coords(dimensions=[0]):
        name = coord.name()
        if name != 'time':
            cube.remove_coord(name)
    for coord in cube.coords(dimensions=[1]):
        name = coord.name()
        if name != 'station name':
            cube.remove_coord(name)
    df = as_data_frame(cube)
    if cube.ndim == 1:  # Horrible work around iris.
        station = cube.coord('station name').points[0]
        df.columns = [station]
    return df
def nc2df(fname):
    cube = iris.load_cube(fname)
    for coord in cube.coords(dimensions=[0]):
        name = coord.name()
        if name != 'time':
            cube.remove_coord(name)
    for coord in cube.coords(dimensions=[1]):
        name = coord.name()
        if name != 'station name':
            cube.remove_coord(name)
    df = as_data_frame(cube)
    if cube.ndim == 1:  # Horrible work around iris.
        station = cube.coord('station name').points[0]
        df.columns = [station]
    return df
Example #6
0
def nc2df(fname, columns_name='station_code'):
    """
    Load a netCDF timeSeries file as a dataframe.

    """
    cube = iris.load_cube(fname)
    for coord in cube.coords(dimensions=[0]):
        name = coord.name()
        if name != 'time':
            cube.remove_coord(name)
    for coord in cube.coords(dimensions=[1]):
        name = coord.name()
        if name != columns_name:
            cube.remove_coord(name)
    df = as_data_frame(cube)
    if cube.ndim == 1:  # Horrible work around iris.
        station = cube.coord(columns_name).points[0]
        df.columns = [station]
    return df
Example #7
0
def nc2df(fname, columns_name='station_code'):
    """
    Load a netCDF timeSeries file as a dataframe.

    """
    cube = iris.load_cube(fname)
    for coord in cube.coords(dimensions=[0]):
        name = coord.name()
        if name != 'time':
            cube.remove_coord(name)
    for coord in cube.coords(dimensions=[1]):
        name = coord.name()
        if name != columns_name:
            cube.remove_coord(name)
    df = as_data_frame(cube)
    if cube.ndim == 1:  # Horrible work around iris.
        station = cube.coord(columns_name).points[0]
        df.columns = [station]
    return df
Example #8
0
def save_results(regridded_profiles, tracer_columns, params):
    """
    Save (merged) results.
    """
    
    # SET OUTPUT FILE NAMES
    out_profiles_basename = string.replace(
        params['out_profiles_basename'], "*", params['station_name']
    )
    out_columns_basename = string.replace(
        params['out_columns_basename'], "*", params['station_name'])

    if params['out_dir'] is None:
        params['out_dir'] = os.path.abspath(os.getcwd())
    out_profiles_basepath = os.path.join(params['out_dir'],
                                         out_profiles_basename)
    out_columns_basepath = os.path.join(params['out_dir'],
                                        out_columns_basename)
    
    out_profiles_cubes_fn = out_profiles_basepath + '.nc'
    out_columns_cubes_fn = out_columns_basepath + '.nc'
    
    out_profiles_tables_fn = '.'.join([out_profiles_basepath,
                                       params['out_format']])
    out_columns_tables_fn = '.'.join([out_columns_basepath,
                                      params['out_format']])                                   
    

    # SAVE IN NETCDF FILES (CUBES)
    # make altitude as dimension coord
    for profile_cube in regridded_profiles:
        if isinstance(profile_cube.coord('altitude'), iris.coords.AuxCoord):
            iris_tools.permute_dim_aux_coords(profile_cube,
                                              'model_level_number',
                                              'altitude')

    for cube in regridded_profiles:
        iris_tools.fix_cube_attributes_vals(cube)

    for cube in tracer_columns:
        iris_tools.fix_cube_attributes_vals(cube)

    iris.save(regridded_profiles, out_profiles_cubes_fn)
    iris.save(tracer_columns, out_columns_cubes_fn)
    
    # SAVE AS DATA TABLES (EXCEL, CSV...)
    dataframe_profiles = {}
    for profile in regridded_profiles:
        profile_cube = profile.copy()

        # there must be only one defined dimension coordinate for each
        # cube dimension (no auxilliary coordinate (convert iris to pandas)
        z_dim = profile_cube.coord_dims(profile_cube.coord(name='altitude'))
        iris_tools.remove_dim_aux_coords(profile_cube, z_dim)

        dataframe_units = str(profile_cube.units).replace('/', '_')
        if dataframe_units == 'unknown':
            dataframe_units = profile_cube.attributes['no_udunits2']
        dataframe_name = "{tracer}_{units}".format(
            tracer=profile_cube.attributes['name'],
            units=dataframe_units
        )

        # scalar time coordinate
        if profile_cube.ndim == 1:
            series = ipandas.as_series(profile)
            time_coord = profile_cube.coord('time')  
            date = time_coord.units.num2date(time_coord.points[0])
            dataframe_profiles[dataframe_name] = pd.DataFrame({date : series}).transpose()
        # dimensional time coordinate
        else:
            dataframe_profiles[dataframe_name] = ipandas.as_data_frame(profile_cube)

    panel_profiles = pd.Panel(dataframe_profiles).astype(np.float64)

    series_columns = {}
    for column in tracer_columns:
        series_name = "{tracer}_{units}".format(
            tracer=column.attributes['name'],
            units='molec_cm-2'
        )
        series_columns[series_name] = ipandas.as_series(column)
        time_coord = column.coord('time')  
        date = time_coord.units.num2date(time_coord.points)
        series_columns[series_name].index = date

    dataframe_columns = pd.DataFrame(series_columns).astype(np.float64)

    if params['out_format'] in ('hdf', 'hdf5'):
        panel_profiles.to_hdf(out_profiles_tables_fn, 'profiles')
        dataframe_columns.to_hdf(out_columns_tables_fn, 'columns')
    
    elif params['out_format'] in ('xls', 'xlsx'):
        panel_profiles.to_excel(out_profiles_tables_fn, 'profiles')
        dataframe_columns.to_excel(out_columns_tables_fn, 'columns')

    elif params['out_format'] == 'csv':
        for pr in panel_profiles:
            panel_profiles[pr].to_csv('{0}_{1}.csv'
                                      .format(out_profiles_basepath, pr))
        dataframe_columns.to_csv(out_columns_tables_fn)
    
    return ([panel_profiles, dataframe_columns],
            [out_profiles_cubes_fn, out_columns_cubes_fn,
             out_profiles_tables_fn, out_columns_tables_fn])