def as_data_frame(self, copy=True): """ Convert a GriddedData object to a Pandas DataFrame. :param copy: Create a copy of the data for the new DataFrame? Default is True. :return: A Pandas DataFrame representing the data and coordinates. Note that this won't include any metadata. """ from iris.pandas import as_data_frame return as_data_frame(self, copy=copy)
def cube_to_df(cube_max): df = as_data_frame(cube_max, copy=True) df = df.reset_index() df['month'] = pd.DatetimeIndex(df['index']).month df['year'] = pd.DatetimeIndex(df['index']).year df.drop('index', axis=1, inplace=True) df = df.rename(columns={0: "Temperature"}) df = df.query("year >= 1992 and year <=2015") return df
def nc2df(fname): cube = iris.load_cube(fname) for coord in cube.coords(dimensions=[0]): name = coord.name() if name != 'time': cube.remove_coord(name) for coord in cube.coords(dimensions=[1]): name = coord.name() if name != 'station name': cube.remove_coord(name) df = as_data_frame(cube) if cube.ndim == 1: # Horrible work around iris. station = cube.coord('station name').points[0] df.columns = [station] return df
def nc2df(fname, columns_name='station_code'): """ Load a netCDF timeSeries file as a dataframe. """ cube = iris.load_cube(fname) for coord in cube.coords(dimensions=[0]): name = coord.name() if name != 'time': cube.remove_coord(name) for coord in cube.coords(dimensions=[1]): name = coord.name() if name != columns_name: cube.remove_coord(name) df = as_data_frame(cube) if cube.ndim == 1: # Horrible work around iris. station = cube.coord(columns_name).points[0] df.columns = [station] return df
def save_results(regridded_profiles, tracer_columns, params): """ Save (merged) results. """ # SET OUTPUT FILE NAMES out_profiles_basename = string.replace( params['out_profiles_basename'], "*", params['station_name'] ) out_columns_basename = string.replace( params['out_columns_basename'], "*", params['station_name']) if params['out_dir'] is None: params['out_dir'] = os.path.abspath(os.getcwd()) out_profiles_basepath = os.path.join(params['out_dir'], out_profiles_basename) out_columns_basepath = os.path.join(params['out_dir'], out_columns_basename) out_profiles_cubes_fn = out_profiles_basepath + '.nc' out_columns_cubes_fn = out_columns_basepath + '.nc' out_profiles_tables_fn = '.'.join([out_profiles_basepath, params['out_format']]) out_columns_tables_fn = '.'.join([out_columns_basepath, params['out_format']]) # SAVE IN NETCDF FILES (CUBES) # make altitude as dimension coord for profile_cube in regridded_profiles: if isinstance(profile_cube.coord('altitude'), iris.coords.AuxCoord): iris_tools.permute_dim_aux_coords(profile_cube, 'model_level_number', 'altitude') for cube in regridded_profiles: iris_tools.fix_cube_attributes_vals(cube) for cube in tracer_columns: iris_tools.fix_cube_attributes_vals(cube) iris.save(regridded_profiles, out_profiles_cubes_fn) iris.save(tracer_columns, out_columns_cubes_fn) # SAVE AS DATA TABLES (EXCEL, CSV...) dataframe_profiles = {} for profile in regridded_profiles: profile_cube = profile.copy() # there must be only one defined dimension coordinate for each # cube dimension (no auxilliary coordinate (convert iris to pandas) z_dim = profile_cube.coord_dims(profile_cube.coord(name='altitude')) iris_tools.remove_dim_aux_coords(profile_cube, z_dim) dataframe_units = str(profile_cube.units).replace('/', '_') if dataframe_units == 'unknown': dataframe_units = profile_cube.attributes['no_udunits2'] dataframe_name = "{tracer}_{units}".format( tracer=profile_cube.attributes['name'], units=dataframe_units ) # scalar time coordinate if profile_cube.ndim == 1: series = ipandas.as_series(profile) time_coord = profile_cube.coord('time') date = time_coord.units.num2date(time_coord.points[0]) dataframe_profiles[dataframe_name] = pd.DataFrame({date : series}).transpose() # dimensional time coordinate else: dataframe_profiles[dataframe_name] = ipandas.as_data_frame(profile_cube) panel_profiles = pd.Panel(dataframe_profiles).astype(np.float64) series_columns = {} for column in tracer_columns: series_name = "{tracer}_{units}".format( tracer=column.attributes['name'], units='molec_cm-2' ) series_columns[series_name] = ipandas.as_series(column) time_coord = column.coord('time') date = time_coord.units.num2date(time_coord.points) series_columns[series_name].index = date dataframe_columns = pd.DataFrame(series_columns).astype(np.float64) if params['out_format'] in ('hdf', 'hdf5'): panel_profiles.to_hdf(out_profiles_tables_fn, 'profiles') dataframe_columns.to_hdf(out_columns_tables_fn, 'columns') elif params['out_format'] in ('xls', 'xlsx'): panel_profiles.to_excel(out_profiles_tables_fn, 'profiles') dataframe_columns.to_excel(out_columns_tables_fn, 'columns') elif params['out_format'] == 'csv': for pr in panel_profiles: panel_profiles[pr].to_csv('{0}_{1}.csv' .format(out_profiles_basepath, pr)) dataframe_columns.to_csv(out_columns_tables_fn) return ([panel_profiles, dataframe_columns], [out_profiles_cubes_fn, out_columns_cubes_fn, out_profiles_tables_fn, out_columns_tables_fn])