Esempi in Python per get_dtype

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: pocean.utils

Metodo/funzione: get_dtype

Esempi su hotexamples.com: 15

get_dtype in Python: 15 esempi trovati. Questi sono i migliori esempi reali in Python per pocean.utils.get_dtype, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: cr.py Progetto: pyoceans/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes

        _ = kwargs.pop('reduce_dims', False)
        _ = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not support in xarray
            changed_axes = {
                k: '{}_dim'.format(v)
                for k, v in axes._asdict().items()
            }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        with ContiguousRaggedTrajectoryProfile(output, 'w') as nc:

            trajectory_groups = df.groupby(axes.trajectory)
            unique_trajectories = list(trajectory_groups.groups.keys())
            num_trajectories = len(unique_trajectories)

            nc.createDimension(daxes.trajectory, num_trajectories)
            trajectory = nc.createVariable(axes.trajectory,
                                           get_dtype(df[axes.trajectory]),
                                           (daxes.trajectory, ))
            trajectory[:] = np.array(unique_trajectories)

            # Calculate the max number of profiles
            unique_profiles = df[axes.profile].unique()
            num_profiles = len(unique_profiles)

            nc.createDimension(daxes.profile, num_profiles)
            profile = nc.createVariable(axes.profile,
                                        get_dtype(df[axes.profile]),
                                        (daxes.profile, ))
            profile[:] = np.array(unique_profiles)

            # Get unique obs by grouping on traj and profile and getting the max size
            num_obs = len(df)
            nc.createDimension(daxes.sample, num_obs)

            # The trajectory this profile belongs to
            t_ind = nc.createVariable('trajectoryIndex', 'i4',
                                      (daxes.profile, ))
            # Number of observations in each profile
            row_size = nc.createVariable('rowSize', 'i4', (daxes.profile, ))

            # Create all of the axis variables
            time = nc.createVariable(axes.t,
                                     'f8', (daxes.profile, ),
                                     fill_value=np.dtype('f8').type(
                                         cls.default_fill_value))
            latitude = nc.createVariable(
                axes.y,
                get_dtype(df[axes.y]), (daxes.profile, ),
                fill_value=df[axes.y].dtype.type(cls.default_fill_value))
            longitude = nc.createVariable(
                axes.x,
                get_dtype(df[axes.x]), (daxes.profile, ),
                fill_value=df[axes.x].dtype.type(cls.default_fill_value))

            # Axes variables are already processed so skip them
            data_columns = [d for d in df.columns if d not in axes]
            attributes = dict_update(nc.nc_attributes(axes, daxes),
                                     kwargs.pop('attributes', {}))

            # Variables defined on only the profile axis
            profile_vars = kwargs.pop('profile_vars', [])
            profile_columns = [p for p in profile_vars if p in data_columns]
            for c in profile_columns:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    create_ncvar_from_series(nc,
                                             var_name, (daxes.profile, ),
                                             df[c],
                                             zlib=True,
                                             complevel=1)

            for i, (_, trg) in enumerate(trajectory_groups):
                for j, (_, pfg) in enumerate(trg.groupby(axes.profile)):
                    time[j] = get_ncdata_from_series(pfg[axes.t],
                                                     time).astype('f8')[0]
                    latitude[j] = get_ncdata_from_series(
                        pfg[axes.y], latitude)[0]
                    longitude[j] = get_ncdata_from_series(
                        pfg[axes.x], longitude)[0]
                    row_size[j] = len(pfg)
                    t_ind[j] = i

                    # Save any profile variables on the "profile" index using the first value found
                    # in the column.
                    for c in profile_columns:
                        var_name = cf_safe_name(c)
                        if var_name not in nc.variables:
                            continue
                        v = nc.variables[var_name]
                        vvalues = get_ncdata_from_series(pfg[c], v)[0]
                        try:
                            v[j] = vvalues
                        except BaseException:
                            L.exception('Failed to add {}'.format(c))
                            continue

            # Add back in the z axes that was removed when calculating data_columns
            # and ignore variables that were stored in the profile index
            sample_columns = [
                f for f in data_columns + [axes.z] if f not in profile_columns
            ]
            skips = ['trajectoryIndex', 'rowSize']
            for c in [d for d in sample_columns if d not in skips]:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(nc,
                                                 var_name, (daxes.sample, ),
                                                 df[c],
                                                 zlib=True,
                                                 complevel=1)
                else:
                    v = nc.variables[var_name]
                vvalues = get_ncdata_from_series(df[c], v)
                try:
                    v[:] = vvalues.reshape(v.shape)
                except BaseException:
                    L.exception('Failed to add {}'.format(c))
                    continue

            # Metadata variables
            if 'crs' not in nc.variables:
                nc.createVariable('crs', 'i4')

            # Set attributes
            nc.update_attributes(attributes)

        return ContiguousRaggedTrajectoryProfile(output, **kwargs)

Esempio n. 2

Mostra file

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        data_columns = [d for d in df.columns if d not in axes]

        reduce_dims = kwargs.pop('reduce_dims', False)
        unlimited = kwargs.pop('unlimited', False)

        # Downcast anything from int64 to int32
        df = downcast_dataframe(df)

        # Make a new index that is the Cartesian product of all of the values from all of the
        # values of the old index. This is so don't have to iterate over anything. The full column
        # of data will be able to be shaped to the size of the final unique sized dimensions.
        index_order = [axes.t, axes.z, axes.station]
        df = df.set_index(index_order)
        df = df.reindex(
            pd.MultiIndex.from_product(df.index.levels, names=index_order))

        unique_z = df.index.get_level_values(axes.z).unique().values
        unique_t = df.index.get_level_values(
            axes.t).unique().tolist()  # tolist converts to Timestamp
        all_stations = df.index.get_level_values(axes.station)
        unique_s = all_stations.unique()

        with OrthogonalMultidimensionalTimeseriesProfile(output, 'w') as nc:

            if reduce_dims is True and unique_s.size == 1:
                # If a singlular trajectory, we can reduce that dimension if it is of size 1
                def ts():
                    return np.s_[:, :]

                default_dimensions = (axes.t, axes.z)
                station_dimensions = ()
            else:

                def ts():
                    return np.s_[:, :, :]

                default_dimensions = (axes.t, axes.z, axes.station)
                station_dimensions = (axes.station, )
                nc.createDimension(axes.station, unique_s.size)

            station = nc.createVariable(axes.station, get_dtype(unique_s),
                                        station_dimensions)
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]),
                                         station_dimensions)
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]),
                                          station_dimensions)
            # Assign over loop because VLEN variables (strings) have to be assigned by integer index
            # and we need to find the lat/lon based on station index
            for si, st in enumerate(unique_s):
                station[si] = st
                latitude[si] = df[axes.y][all_stations == st].dropna().iloc[0]
                longitude[si] = df[axes.x][all_stations == st].dropna().iloc[0]

            # Metadata variables
            nc.createVariable('crs', 'i4')

            # Create all of the variables
            if unlimited is True:
                nc.createDimension(axes.t, None)
            else:
                nc.createDimension(axes.t, len(unique_t))
            time = nc.createVariable(axes.t, 'f8', (axes.t, ))
            time[:] = nc4.date2num(unique_t, units=cls.default_time_unit)

            nc.createDimension(axes.z, unique_z.size)
            z = nc.createVariable(axes.z, get_dtype(unique_z), (axes.z, ))
            z[:] = unique_z

            attributes = dict_update(nc.nc_attributes(axes),
                                     kwargs.pop('attributes', {}))

            for c in data_columns:
                # Create variable if it doesn't exist
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(nc,
                                                 var_name,
                                                 default_dimensions,
                                                 df[c],
                                                 zlib=True,
                                                 complevel=1)
                    attributes[var_name] = dict_update(
                        attributes.get(var_name, {}), {
                            'coordinates':
                            '{} {} {} {}'.format(axes.t, axes.z, axes.x,
                                                 axes.y)
                        })
                else:
                    v = nc.variables[var_name]

                vvalues = get_ncdata_from_series(df[c], v)
                v[ts()] = vvalues.reshape(len(unique_t), unique_z.size,
                                          unique_s.size)

            nc.update_attributes(attributes)

        return OrthogonalMultidimensionalTimeseriesProfile(output, **kwargs)

Esempio n. 3

Mostra file

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes
        data_columns = [d for d in df.columns if d not in axes]

        unlimited = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not support in xarray
            changed_axes = {
                k: '{}_dim'.format(v)
                for k, v in axes._asdict().items()
            }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        with IncompleteMultidimensionalProfile(output, 'w') as nc:

            profile_group = df.groupby(axes.profile)

            if unlimited is True:
                max_profiles = None
            else:
                max_profiles = df[axes.profile].unique().size
            nc.createDimension(daxes.profile, max_profiles)

            max_zs = profile_group.size().max()
            nc.createDimension(daxes.z, max_zs)

            # Metadata variables
            nc.createVariable('crs', 'i4')

            profile = nc.createVariable(axes.profile,
                                        get_dtype(df[axes.profile]),
                                        (daxes.profile, ))

            # Create all of the variables
            time = nc.createVariable(axes.t, 'f8', (daxes.profile, ))
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]),
                                         (daxes.profile, ))
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]),
                                          (daxes.profile, ))
            z = nc.createVariable(
                axes.z,
                get_dtype(df[axes.z]), (daxes.profile, daxes.z),
                fill_value=df[axes.z].dtype.type(cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(axes, daxes),
                                     kwargs.pop('attributes', {}))

            # Create vars based on full dataframe (to get all variables)
            for c in data_columns:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(nc,
                                                 var_name,
                                                 (daxes.profile, daxes.z),
                                                 df[c],
                                                 zlib=True,
                                                 complevel=1)
                    attributes[var_name] = dict_update(
                        attributes.get(var_name, {}), {
                            'coordinates':
                            '{} {} {} {}'.format(axes.t, axes.z, axes.x,
                                                 axes.y)
                        })

            # Write values for each profile within profile_group
            for i, (uid, pdf) in enumerate(profile_group):
                profile[i] = uid

                time[i] = date2num(pdf[axes.t].iloc[0],
                                   units=cls.default_time_unit)
                latitude[i] = pdf[axes.y].iloc[0]
                longitude[i] = pdf[axes.x].iloc[0]

                zvalues = pdf[axes.z].fillna(z._FillValue).values
                sl = slice(0, zvalues.size)
                z[i, sl] = zvalues

                for c in data_columns:
                    var_name = cf_safe_name(c)
                    v = nc.variables[var_name]

                    vvalues = get_ncdata_from_series(pdf[c], v)

                    sl = slice(0, vvalues.size)
                    v[i, sl] = vvalues

            # Set global attributes
            nc.update_attributes(attributes)

        return IncompleteMultidimensionalProfile(output, **kwargs)

Esempio n. 4

Mostra file

File: r.py Progetto: pyoceans/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes

        reduce_dims = kwargs.pop('reduce_dims', False)
        unlimited = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not supported in xarray
            changed_axes = { k: '{}_dim'.format(v) for k, v in axes._asdict().items() }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        with RaggedTimeseriesProfile(output, 'w') as nc:

            station_groups = df.groupby(axes.station)
            unique_stations = list(station_groups.groups.keys())
            num_stations = len(unique_stations)

            # Calculate the max number of profiles
            profile_groups = df.groupby(axes.profile)
            unique_profiles = list(profile_groups.groups.keys())
            num_profiles = len(unique_profiles)
            nc.createDimension(daxes.profile, num_profiles)

            if reduce_dims is True and num_stations == 1:
                # If a singular station, remove the dimension
                station_dimensions = ()
                s_ind = None
            else:
                station_dimensions = (daxes.station,)
                nc.createDimension(daxes.station, num_stations)
                # The station this profile belongs to
                s_ind = nc.createVariable('stationIndex', 'i4', (daxes.profile,))

            station = nc.createVariable(axes.station, get_dtype(unique_stations), station_dimensions)
            profile = nc.createVariable(axes.profile, get_dtype(df[axes.profile]), (daxes.profile,))
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions)
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions)

            # Get unique obs by grouping on traj and profile and getting the max size
            if unlimited is True:
                nc.createDimension(daxes.sample, None)
            else:
                nc.createDimension(daxes.sample, len(df))

            # Number of observations in each profile
            row_size = nc.createVariable('rowSize', 'i4', (daxes.profile,))

            # Axes variables are already processed so skip them
            data_columns = [ d for d in df.columns if d not in axes ]
            data_columns += [axes.t, axes.z]  # time isn't really special, its dimensioned by obs
            attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop('attributes', {}))

            for i, (sname, srg) in enumerate(station_groups):
                station[i] = sname
                latitude[i] = df[axes.y][df[axes.station] == sname].dropna().iloc[0]
                longitude[i] = df[axes.x][df[axes.station] == sname].dropna().iloc[0]

            for j, (pname, pfg) in enumerate(profile_groups):
                profile[j] = pname
                row_size[j] = len(pfg)
                if s_ind is not None:
                    s_ind[j] = np.asscalar(np.argwhere(station[:] == pfg[axes.station].dropna().iloc[0]))

            # Add back in the z axes that was removed when calculating data_columns
            # and ignore variables that were stored in the profile index
            skips = ['stationIndex', 'rowSize']
            for c in [ d for d in data_columns if d not in skips ]:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(
                        nc,
                        var_name,
                        (daxes.sample,),
                        df[c],
                        zlib=True,
                        complevel=1
                    )
                else:
                    v = nc.variables[var_name]
                vvalues = get_ncdata_from_series(df[c], v)
                try:
                    if unlimited is True:
                        v[:] = vvalues
                    else:
                        v[:] = vvalues.reshape(v.shape)
                except BaseException:
                    L.exception('Failed to add {}'.format(c))
                    continue

            # Metadata variables
            nc.createVariable('crs', 'i4')

            # Set attributes
            nc.update_attributes(attributes)

        return RaggedTimeseriesProfile(output, **kwargs)

Esempio n. 5

Mostra file

File: im.py Progetto: lucmehl/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        data_columns = [ d for d in df.columns if d not in axes ]

        unlimited = kwargs.pop('unlimited', False)

        with IncompleteMultidimensionalProfile(output, 'w') as nc:

            profile_group = df.groupby(axes.profile)

            if unlimited is True:
                max_profiles = None
            else:
                max_profiles = df[axes.profile].unique().size
            nc.createDimension(axes.profile, max_profiles)

            max_zs = profile_group.size().max()
            nc.createDimension(axes.z, max_zs)

            # Metadata variables
            nc.createVariable('crs', 'i4')

            profile = nc.createVariable(axes.profile, get_dtype(df[axes.profile]), (axes.profile,))

            # Create all of the variables
            time = nc.createVariable(axes.t, 'f8', (axes.profile,))
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), (axes.profile,))
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), (axes.profile,))
            z = nc.createVariable(axes.z, get_dtype(df[axes.z]), (axes.profile, axes.z), fill_value=df[axes.z].dtype.type(cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(axes), kwargs.pop('attributes', {}))

            for i, (uid, pdf) in enumerate(profile_group):
                profile[i] = uid

                time[i] = nc4.date2num(pdf[axes.t].iloc[0], units=cls.default_time_unit)
                latitude[i] = pdf[axes.y].iloc[0]
                longitude[i] = pdf[axes.x].iloc[0]

                zvalues = pdf[axes.z].fillna(z._FillValue).values
                sl = slice(0, zvalues.size)
                z[i, sl] = zvalues
                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        v = create_ncvar_from_series(
                            nc,
                            var_name,
                            (axes.profile, axes.z),
                            pdf[c],
                            zlib=True,
                            complevel=1
                        )
                        attributes[var_name] = dict_update(attributes.get(var_name, {}), {
                            'coordinates' : '{} {} {} {}'.format(
                                axes.t, axes.z, axes.x, axes.y
                            )
                        })
                    else:
                        v = nc.variables[var_name]

                    vvalues = get_ncdata_from_series(pdf[c], v)

                    sl = slice(0, vvalues.size)
                    v[i, sl] = vvalues

            # Set global attributes
            nc.update_attributes(attributes)

        return IncompleteMultidimensionalProfile(output, **kwargs)

Esempio n. 6

Mostra file

File: cr.py Progetto: lizferguson5/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        _ = kwargs.pop('reduce_dims', False)
        _ = kwargs.pop('unlimited', False)

        with ContiguousRaggedTrajectoryProfile(output, 'w') as nc:

            trajectory_groups = df.groupby(axes.trajectory)
            unique_trajectories = list(trajectory_groups.groups.keys())
            num_trajectories = len(unique_trajectories)

            nc.createDimension(axes.trajectory, num_trajectories)
            trajectory = nc.createVariable(axes.trajectory, get_dtype(df[axes.trajectory]), (axes.trajectory,))
            trajectory[:] = np.array(unique_trajectories)

            # Calculate the max number of profiles
            unique_profiles = df[axes.profile].unique()
            num_profiles = len(unique_profiles)

            nc.createDimension(axes.profile, num_profiles)
            profile = nc.createVariable(axes.profile, get_dtype(df[axes.profile]), (axes.profile,))
            profile[:] = np.array(unique_profiles)

            # Get unique obs by grouping on traj and profile and getting the max size
            num_obs = len(df)
            nc.createDimension(axes.sample, num_obs)

            # The trajectory this profile belongs to
            t_ind = nc.createVariable('trajectoryIndex', 'i4', (axes.profile,))
            # Number of observations in each profile
            row_size = nc.createVariable('rowSize', 'i4', (axes.profile,))

            # Create all of the axis variables
            time = nc.createVariable(axes.t, 'f8', (axes.profile,), fill_value=np.dtype('f8').type(cls.default_fill_value))
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), (axes.profile,), fill_value=df[axes.y].dtype.type(cls.default_fill_value))
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), (axes.profile,), fill_value=df[axes.x].dtype.type(cls.default_fill_value))

            # Axes variables are already processed so skip them
            data_columns = [ d for d in df.columns if d not in axes ]
            attributes = dict_update(nc.nc_attributes(axes), kwargs.pop('attributes', {}))

            for i, (_, trg) in enumerate(trajectory_groups):
                for j, (_, pfg) in enumerate(trg.groupby(axes.profile)):
                    time[j] = get_ncdata_from_series(pfg[axes.t], time)[0]
                    latitude[j] = get_ncdata_from_series(pfg[axes.y], latitude)[0]
                    longitude[j] = get_ncdata_from_series(pfg[axes.x], longitude)[0]
                    row_size[j] = len(pfg)
                    t_ind[j] = i

            # Add back in the z axes that was removed when calculating data_columns
            data_columns = data_columns + [axes.z]
            skips = ['trajectoryIndex', 'rowSize']
            for c in [ d for d in data_columns if d not in skips ]:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(
                        nc,
                        var_name,
                        (axes.sample,),
                        df[c],
                        zlib=True,
                        complevel=1
                    )
                else:
                    v = nc.variables[var_name]
                vvalues = get_ncdata_from_series(df[c], v)
                try:
                    v[:] = vvalues
                except BaseException:
                    L.exception('Failed to add {}'.format(c))
                    continue

            # Metadata variables
            if 'crs' not in nc.variables:
                nc.createVariable('crs', 'i4')

            # Set attributes
            nc.update_attributes(attributes)

        return ContiguousRaggedTrajectoryProfile(output, **kwargs)

Esempio n. 7

Mostra file

File: om.py Progetto: TomasTorsvik-tools/pocean-core-TTfork

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes
        data_columns = [d for d in df.columns if d not in axes]

        reduce_dims = kwargs.pop('reduce_dims', False)
        _ = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not support in xarray
            changed_axes = {
                k: '{}_dim'.format(v)
                for k, v in axes._asdict().items()
            }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        with OrthogonalMultidimensionalTimeseries(output, 'w') as nc:

            station_group = df.groupby(axes.station)
            num_stations = len(station_group)
            has_z = axes.z is not None

            if reduce_dims is True and num_stations == 1:
                # If a station, we can reduce that dimension if it is of size 1
                def ts(i):
                    return np.s_[:]

                default_dimensions = (daxes.t, )
                station_dimensions = ()
            else:

                def ts(i):
                    return np.s_[i, :]

                default_dimensions = (daxes.station, daxes.t)
                station_dimensions = (daxes.station, )
                nc.createDimension(daxes.station, num_stations)

            # Set the coordinates attribute correctly
            coordinates = [axes.t, axes.x, axes.y]
            if has_z is True:
                coordinates.insert(1, axes.z)
            coordinates = ' '.join(coordinates)

            # assume all groups are the same size and have identical times
            _, sdf = list(station_group)[0]
            t = sdf[axes.t]

            # Metadata variables
            nc.createVariable('crs', 'i4')

            # Create all of the variables
            nc.createDimension(daxes.t, t.size)
            time = nc.createVariable(axes.t, 'f8', (daxes.t, ))
            station = nc.createVariable(axes.station,
                                        get_dtype(df[axes.station]),
                                        station_dimensions)
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]),
                                         station_dimensions)
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]),
                                          station_dimensions)
            if has_z is True:
                z = nc.createVariable(axes.z,
                                      get_dtype(df[axes.z]),
                                      station_dimensions,
                                      fill_value=df[axes.z].dtype.type(
                                          cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(axes, daxes),
                                     kwargs.pop('attributes', {}))

            time[:] = get_ncdata_from_series(t, time)

            # Create vars based on full dataframe (to get all variables)
            for c in data_columns:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(nc,
                                                 var_name,
                                                 default_dimensions,
                                                 df[c],
                                                 zlib=True,
                                                 complevel=1)
                    attributes[var_name] = dict_update(
                        attributes.get(var_name, {}),
                        {'coordinates': coordinates})

            for i, (uid, sdf) in enumerate(station_group):
                station[i] = uid
                latitude[i] = sdf[axes.y].iloc[0]
                longitude[i] = sdf[axes.x].iloc[0]

                if has_z is True:
                    # TODO: write a test for a Z with a _FillValue
                    z[i] = sdf[axes.z].iloc[0]

                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    v = nc.variables[var_name]

                    vvalues = get_ncdata_from_series(sdf[c], v)
                    try:
                        v[ts(i)] = vvalues
                    except BaseException:
                        L.debug(
                            '{} was not written. Likely a metadata variable'.
                            format(v.name))

            # Set global attributes
            nc.update_attributes(attributes)

        return OrthogonalMultidimensionalTimeseries(output, **kwargs)

Esempio n. 8

Mostra file

File: om.py Progetto: lucmehl/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        data_columns = [d for d in df.columns if d not in axes]

        with OrthogonalMultidimensionalTimeseries(output, 'w') as nc:

            station_group = df.groupby(axes.station)
            num_stations = len(station_group)

            # assume all groups are the same size and have identical times
            _, sdf = list(station_group)[0]
            t = sdf[axes.t]

            # Metadata variables
            nc.createVariable('crs', 'i4')

            # Create all of the variables
            nc.createDimension(axes.t, t.size)
            nc.createDimension(axes.station, num_stations)
            station = nc.createVariable(axes.station, get_dtype(df.station),
                                        (axes.station, ))

            time = nc.createVariable(axes.t, 'f8', (axes.t, ))
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]),
                                         (axes.station, ))
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]),
                                          (axes.station, ))
            z = nc.createVariable(axes.z,
                                  get_dtype(df[axes.z]), (axes.station, ),
                                  fill_value=df[axes.z].dtype.type(
                                      cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(axes),
                                     kwargs.pop('attributes', {}))

            # tolist() converts to a python datetime object without timezone and has NaTs.
            g = t.tolist()
            # date2num convers NaTs to np.nan
            gg = nc4.date2num(g, units=cls.default_time_unit)
            # masked_invalid moves np.nan to a masked value
            time[:] = np.ma.masked_invalid(gg)

            for i, (uid, sdf) in enumerate(station_group):
                station[i] = uid
                latitude[i] = sdf[axes.y].iloc[0]
                longitude[i] = sdf[axes.x].iloc[0]

                # TODO: write a test for a Z with a _FillValue
                z[i] = sdf[axes.z].iloc[0]

                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        v = create_ncvar_from_series(nc,
                                                     var_name,
                                                     (axes.station, axes.t),
                                                     sdf[c],
                                                     zlib=True,
                                                     complevel=1)
                        attributes[var_name] = dict_update(
                            attributes.get(var_name, {}), {
                                'coordinates':
                                '{} {} {} {}'.format(axes.t, axes.z, axes.x,
                                                     axes.y)
                            })
                    else:
                        v = nc.variables[var_name]

                    vvalues = get_ncdata_from_series(sdf[c], v)
                    try:
                        v[i, :] = vvalues
                    except BaseException:
                        L.debug(
                            '{} was not written. Likely a metadata variable'.
                            format(v.name))

            # Set global attributes
            nc.update_attributes(attributes)

        return OrthogonalMultidimensionalTimeseries(output, **kwargs)

Esempio n. 9

Mostra file

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes

        # Should never be a CR file with one trajectory so we ignore the "reduce_dims" attribute
        _ = kwargs.pop('reduce_dims', False)  # noqa
        unlimited = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not support in xarray
            changed_axes = {
                k: '{}_dim'.format(v)
                for k, v in axes._asdict().items()
            }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        with ContiguousRaggedTrajectory(output, 'w') as nc:

            trajectory_groups = df.groupby(axes.trajectory)
            unique_trajectories = list(trajectory_groups.groups.keys())
            num_trajectories = len(unique_trajectories)
            nc.createDimension(daxes.trajectory, num_trajectories)
            trajectory = nc.createVariable(axes.trajectory,
                                           get_dtype(df[axes.trajectory]),
                                           (daxes.trajectory, ))

            # Get unique obs by grouping on traj getting the max size
            if unlimited is True:
                nc.createDimension(daxes.sample, None)
            else:
                nc.createDimension(daxes.sample, len(df))

            # Number of observations in each trajectory
            row_size = nc.createVariable('rowSize', 'i4', (daxes.trajectory, ))

            attributes = dict_update(nc.nc_attributes(axes, daxes),
                                     kwargs.pop('attributes', {}))

            # Variables defined on only the trajectory axis
            traj_vars = kwargs.pop('traj_vars', [])
            traj_columns = [p for p in traj_vars if p in df.columns]
            for c in traj_columns:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    create_ncvar_from_series(nc,
                                             var_name, (daxes.trajectory, ),
                                             df[c],
                                             zlib=True,
                                             complevel=1)

            for i, (trajid, trg) in enumerate(trajectory_groups):
                trajectory[i] = trajid
                row_size[i] = len(trg)

                # Save any trajectory variables using the first value found
                # in the column.
                for c in traj_columns:
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        continue
                    v = nc.variables[var_name]
                    vvalues = get_ncdata_from_series(trg[c], v)[0]
                    try:
                        v[i] = vvalues
                    except BaseException:
                        L.exception('Failed to add {}'.format(c))
                        continue

            # Add all of the columns based on the sample dimension. Take all columns and remove the
            # trajectory, rowSize and other trajectory based columns.
            sample_columns = [
                f for f in df.columns
                if f not in traj_columns + ['rowSize', axes.trajectory]
            ]
            for c in sample_columns:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(nc,
                                                 var_name, (daxes.sample, ),
                                                 df[c],
                                                 zlib=True,
                                                 complevel=1)
                else:
                    v = nc.variables[var_name]
                vvalues = get_ncdata_from_series(df[c], v)
                try:
                    if unlimited is True:
                        v[:] = vvalues
                    else:
                        v[:] = vvalues.reshape(v.shape)
                except BaseException:
                    L.exception('Failed to add {}'.format(c))
                    continue

            # Metadata variables
            if 'crs' not in nc.variables:
                nc.createVariable('crs', 'i4')

            # Set attributes
            nc.update_attributes(attributes)

        return ContiguousRaggedTrajectory(output, **kwargs)

Esempio n. 10

Mostra file

File: im.py Progetto: lucmehl/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        data_columns = [d for d in df.columns if d not in axes]

        reduce_dims = kwargs.pop('reduce_dims', False)
        unlimited = kwargs.pop('unlimited', False)

        with IncompleteMultidimensionalTrajectory(output, 'w') as nc:

            trajectory_group = df.groupby(axes.trajectory)

            if unlimited is True:
                max_obs = None
            else:
                max_obs = trajectory_group.size().max()
            nc.createDimension(axes.sample, max_obs)

            num_trajectories = len(trajectory_group)
            if reduce_dims is True and num_trajectories == 1:
                # If a singlular trajectory, we can reduce that dimension if it is of size 1
                def ts(t_index, size):
                    return np.s_[0:size]

                default_dimensions = (axes.sample, )
                trajectory = nc.createVariable(axes.trajectory,
                                               get_dtype(df[axes.trajectory]))
            else:

                def ts(t_index, size):
                    return np.s_[t_index, 0:size]

                default_dimensions = (axes.trajectory, axes.sample)
                nc.createDimension(axes.trajectory, num_trajectories)
                trajectory = nc.createVariable(axes.trajectory,
                                               get_dtype(df[axes.trajectory]),
                                               (axes.trajectory, ))

            # Create all of the variables
            time = nc.createVariable(axes.t,
                                     'f8',
                                     default_dimensions,
                                     fill_value=np.dtype('f8').type(
                                         cls.default_fill_value))
            z = nc.createVariable(axes.z,
                                  get_dtype(df[axes.z]),
                                  default_dimensions,
                                  fill_value=df[axes.z].dtype.type(
                                      cls.default_fill_value))
            latitude = nc.createVariable(axes.y,
                                         get_dtype(df[axes.y]),
                                         default_dimensions,
                                         fill_value=df[axes.y].dtype.type(
                                             cls.default_fill_value))
            longitude = nc.createVariable(axes.x,
                                          get_dtype(df[axes.x]),
                                          default_dimensions,
                                          fill_value=df[axes.x].dtype.type(
                                              cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(axes),
                                     kwargs.pop('attributes', {}))

            for i, (uid, gdf) in enumerate(trajectory_group):
                trajectory[i] = uid

                # tolist() converts to a python datetime object without timezone and has NaTs.
                g = gdf[axes.t].tolist()
                # date2num convers NaTs to np.nan
                gg = nc4.date2num(g, units=cls.default_time_unit)
                # masked_invalid moves np.nan to a masked value
                time[ts(i, gg.size)] = np.ma.masked_invalid(gg)

                lats = gdf[axes.y].fillna(get_fill_value(latitude)).values
                latitude[ts(i, lats.size)] = lats

                lons = gdf[axes.x].fillna(get_fill_value(longitude)).values
                longitude[ts(i, lons.size)] = lons

                zs = gdf[axes.z].fillna(get_fill_value(z)).values
                z[ts(i, zs.size)] = zs

                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        v = create_ncvar_from_series(nc,
                                                     var_name,
                                                     default_dimensions,
                                                     gdf[c],
                                                     zlib=True,
                                                     complevel=1)
                        attributes[var_name] = dict_update(
                            attributes.get(var_name, {}), {
                                'coordinates':
                                '{} {} {} {}'.format(axes.t, axes.z, axes.x,
                                                     axes.y)
                            })
                    else:
                        v = nc.variables[var_name]

                    vvalues = get_ncdata_from_series(gdf[c], v)
                    v[ts(i, vvalues.size)] = vvalues

            # Metadata variables
            if 'crs' not in nc.variables:
                nc.createVariable('crs', 'i4')

            # Set attributes
            nc.update_attributes(attributes)

        return IncompleteMultidimensionalTrajectory(output, **kwargs)

Esempio n. 11

Mostra file

File: om.py Progetto: pyoceans/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes
        data_columns = [d for d in df.columns if d not in axes]

        reduce_dims = kwargs.pop('reduce_dims', False)
        unlimited = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not supported in xarray
            changed_axes = {
                k: '{}_dim'.format(v)
                for k, v in axes._asdict().items()
            }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        # Make a new index that is the Cartesian product of all of the values from all of the
        # values of the old index. This is so don't have to iterate over anything. The full column
        # of data will be able to be shaped to the size of the final unique sized dimensions.
        index_order = [axes.t, axes.z, axes.station]
        df = df.set_index(index_order)
        df = df.reindex(
            pd.MultiIndex.from_product(df.index.levels, names=index_order))

        unique_z = df.index.get_level_values(axes.z).unique().values
        unique_t = df.index.get_level_values(
            axes.t).unique().tolist()  # tolist converts to Timestamp
        all_stations = df.index.get_level_values(axes.station)
        unique_s = all_stations.unique()

        with OrthogonalMultidimensionalTimeseriesProfile(output, 'w') as nc:

            if reduce_dims is True and unique_s.size == 1:
                # If a singular trajectory, we can reduce that dimension if it is of size 1
                default_dimensions = (daxes.t, daxes.z)
                station_dimensions = ()
            else:
                default_dimensions = (daxes.t, daxes.z, daxes.station)
                station_dimensions = (daxes.station, )
                nc.createDimension(daxes.station, unique_s.size)

            station = nc.createVariable(axes.station, get_dtype(unique_s),
                                        station_dimensions)
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]),
                                         station_dimensions)
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]),
                                          station_dimensions)
            # Assign over loop because VLEN variables (strings) have to be assigned by integer index
            # and we need to find the lat/lon based on station index
            for si, st in enumerate(unique_s):
                station[si] = st
                latitude[si] = df[axes.y][all_stations == st].dropna().iloc[0]
                longitude[si] = df[axes.x][all_stations == st].dropna().iloc[0]

            # Metadata variables
            nc.createVariable('crs', 'i4')

            # Create all of the variables
            if unlimited is True:
                nc.createDimension(daxes.t, None)
            else:
                nc.createDimension(daxes.t, len(unique_t))
            time = nc.createVariable(axes.t, 'f8', (daxes.t, ))
            time[:] = date2num(unique_t,
                               units=cls.default_time_unit).astype('f8')

            nc.createDimension(daxes.z, unique_z.size)
            z = nc.createVariable(axes.z, get_dtype(unique_z), (daxes.z, ))
            z[:] = unique_z

            attributes = dict_update(nc.nc_attributes(axes, daxes),
                                     kwargs.pop('attributes', {}))

            # Variables defined on only the time axis and not the depth axis
            detach_z_vars = kwargs.pop('detach_z', [])
            detach_z_columnms = [p for p in detach_z_vars if p in data_columns]
            for c in detach_z_columnms:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(
                        nc,
                        var_name,
                        default_dimensions[
                            0::2],  # this removes the second dimension (z)
                        df[c],
                        zlib=True,
                        complevel=1)
                    attributes[var_name] = dict_update(
                        attributes.get(var_name, {}), {
                            'coordinates':
                            '{} {} {}'.format(axes.t, axes.x, axes.y)
                        })
                else:
                    v = nc.variables[var_name]

                # Because we need access to the fillvalues here, we ask not to return
                # the values with them already filled.
                vvalues = get_ncdata_from_series(df[c], v, fillna=False)
                # Reshape to the full array, with Z
                vvalues = vvalues.reshape(len(unique_t), unique_z.size,
                                          unique_s.size)
                # The Z axis is always the second axis, take the mean over that axis
                vvalues = np.apply_along_axis(np.nanmean, 1, vvalues).flatten()
                # Now reshape to the array without Z
                vvalues = vvalues.reshape(len(unique_t), unique_s.size)
                try:
                    v[:] = vvalues.reshape(v.shape)
                except BaseException:
                    L.exception('Failed to add {}'.format(c))
                    continue

            full_columns = [
                f for f in data_columns if f not in detach_z_columnms
            ]
            for c in full_columns:
                # Create variable if it doesn't exist
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(nc,
                                                 var_name,
                                                 default_dimensions,
                                                 df[c],
                                                 zlib=True,
                                                 complevel=1)
                    attributes[var_name] = dict_update(
                        attributes.get(var_name, {}), {
                            'coordinates':
                            '{} {} {} {}'.format(axes.t, axes.z, axes.x,
                                                 axes.y)
                        })
                else:
                    v = nc.variables[var_name]

                vvalues = get_ncdata_from_series(df[c], v)
                v[:] = vvalues.reshape(v.shape)

            nc.update_attributes(attributes)

        return OrthogonalMultidimensionalTimeseriesProfile(output, **kwargs)

Esempio n. 12

Mostra file

File: im.py Progetto: TomasTorsvik-tools/pocean-core-TTfork

    def from_dataframe(cls, df, output, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        daxes = axes
        data_columns = [ d for d in df.columns if d not in axes ]

        reduce_dims = kwargs.pop('reduce_dims', False)
        unlimited = kwargs.pop('unlimited', False)

        unique_dims = kwargs.pop('unique_dims', False)
        if unique_dims is True:
            # Rename the dimension to avoid a dimension and coordinate having the same name
            # which is not support in xarray
            changed_axes = { k: '{}_dim'.format(v) for k, v in axes._asdict().items() }
            daxes = get_default_axes(changed_axes)

        # Downcast anything from int64 to int32
        # Convert any timezone aware datetimes to native UTC times
        df = downcast_dataframe(nativize_times(df))

        with IncompleteMultidimensionalTrajectory(output, 'w') as nc:

            trajectory_group = df.groupby(axes.trajectory)

            if unlimited is True:
                max_obs = None
            else:
                max_obs = trajectory_group.size().max()
            nc.createDimension(daxes.sample, max_obs)

            num_trajectories = len(trajectory_group)
            if reduce_dims is True and num_trajectories == 1:
                # If a singlular trajectory, we can reduce that dimension if it is of size 1
                def ts(t_index, size):
                    return np.s_[0:size]
                default_dimensions = (daxes.sample,)
                trajectory = nc.createVariable(axes.trajectory, get_dtype(df[axes.trajectory]))
            else:
                def ts(t_index, size):
                    return np.s_[t_index, 0:size]
                default_dimensions = (daxes.trajectory, daxes.sample)
                nc.createDimension(daxes.trajectory, num_trajectories)
                trajectory = nc.createVariable(axes.trajectory, get_dtype(df[axes.trajectory]), (daxes.trajectory,))

            # Create all of the variables
            time = nc.createVariable(axes.t, 'f8', default_dimensions, fill_value=np.dtype('f8').type(cls.default_fill_value))
            z = nc.createVariable(axes.z, get_dtype(df[axes.z]), default_dimensions, fill_value=df[axes.z].dtype.type(cls.default_fill_value))
            latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), default_dimensions, fill_value=df[axes.y].dtype.type(cls.default_fill_value))
            longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), default_dimensions, fill_value=df[axes.x].dtype.type(cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop('attributes', {}))

            # Create vars based on full dataframe (to get all variables)
            for c in data_columns:
                var_name = cf_safe_name(c)
                if var_name not in nc.variables:
                    v = create_ncvar_from_series(
                        nc,
                        var_name,
                        default_dimensions,
                        df[c],
                        zlib=True,
                        complevel=1
                    )
                    attributes[var_name] = dict_update(attributes.get(var_name, {}), {
                        'coordinates': '{} {} {} {}'.format(
                            axes.t, axes.z, axes.x, axes.y
                        )
                    })

            for i, (uid, gdf) in enumerate(trajectory_group):
                trajectory[i] = uid

                times = get_ncdata_from_series(gdf[axes.t], time)
                time[ts(i, times.size)] = times

                lats = get_ncdata_from_series(gdf[axes.y], latitude)
                latitude[ts(i, lats.size)] = lats

                lons = get_ncdata_from_series(gdf[axes.x], longitude)
                longitude[ts(i, lons.size)] = lons

                zs = gdf[axes.z].fillna(get_fill_value(z)).values
                z[ts(i, zs.size)] = zs

                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    v = nc.variables[var_name]

                    vvalues = get_ncdata_from_series(gdf[c], v)
                    slicer = ts(i, vvalues.size)
                    v[slicer] = vvalues

            # Metadata variables
            if 'crs' not in nc.variables:
                nc.createVariable('crs', 'i4')

            # Set attributes
            nc.update_attributes(attributes)

        return IncompleteMultidimensionalTrajectory(output, **kwargs)

Esempio n. 13

Mostra file

    def from_dataframe(cls, df, output, **kwargs):
        reserved_columns = ['trajectory', 't', 'x', 'y', 'z', 'distance']
        data_columns = [ d for d in df.columns if d not in reserved_columns ]

        with IncompleteMultidimensionalTrajectory(output, 'w') as nc:

            trajectory_group = df.groupby('trajectory')
            max_obs = trajectory_group.size().max()

            unique_trajectories = df.trajectory.unique()
            nc.createDimension('trajectory', unique_trajectories.size)
            nc.createDimension('obs', max_obs)

            # Metadata variables
            nc.createVariable('crs', 'i4')

            trajectory = nc.createVariable('trajectory', get_dtype(df.trajectory), ('trajectory',))

            # Create all of the variables
            time = nc.createVariable('time', 'i4', ('trajectory', 'obs'), fill_value=int(cls.default_fill_value))
            z = nc.createVariable('z', get_dtype(df.z), ('trajectory', 'obs'), fill_value=df.z.dtype.type(cls.default_fill_value))
            latitude = nc.createVariable('latitude', get_dtype(df.y), ('trajectory', 'obs'), fill_value=df.y.dtype.type(cls.default_fill_value))
            longitude = nc.createVariable('longitude', get_dtype(df.x), ('trajectory', 'obs'), fill_value=df.x.dtype.type(cls.default_fill_value))
            if 'distance' in df:
                distance = nc.createVariable('distance', get_dtype(df.distance), ('trajectory', 'obs'), fill_value=df.distance.dtype.type(cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(), kwargs.pop('attributes', {}))

            for i, (uid, gdf) in enumerate(trajectory_group):
                trajectory[i] = uid

                # tolist() converts to a python datetime object without timezone
                g = gdf.t.fillna(999999).tolist()   # 999999 is a dummy value
                NaTs = gdf.t.isnull()
                timenums = np.ma.MaskedArray(nc4.date2num(g, units=cls.default_time_unit))
                timenums.mask = NaTs
                time[i, :] = timenums

                latitude[i, :] = gdf.y.fillna(latitude._FillValue).values
                longitude[i, :] = gdf.x.fillna(longitude._FillValue).values
                z[i, :] = gdf.z.fillna(z._FillValue).values
                if 'distance' in gdf:
                    distance[i, :] = gdf.distance.fillna(distance._FillValue).values

                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        if np.issubdtype(gdf[c].dtype, 'S') or gdf[c].dtype == object:
                            # AttributeError: cannot set _FillValue attribute for VLEN or compound variable
                            v = nc.createVariable(var_name, get_dtype(gdf[c]), ('trajectory', 'obs'))
                        else:
                            v = nc.createVariable(var_name, get_dtype(gdf[c]), ('trajectory', 'obs'), fill_value=gdf[c].dtype.type(cls.default_fill_value))

                        if var_name not in attributes:
                            attributes[var_name] = {}
                        attributes[var_name] = dict_update(attributes[var_name], {
                            'coordinates' : 'time latitude longitude z',
                        })
                    else:
                        v = nc.variables[var_name]

                    if hasattr(v, '_FillValue'):
                        vvalues = gdf[c].fillna(v._FillValue).values
                    else:
                        # Use an empty string... better than nothing!
                        vvalues = gdf[c].fillna('').values

                    sl = slice(0, vvalues.size)
                    v[i, sl] = vvalues

            # Set global attributes
            nc.update_attributes(attributes)

        return IncompleteMultidimensionalTrajectory(output, **kwargs)

Esempio n. 14

Mostra file

    def from_dataframe(cls, df, output, **kwargs):
        reserved_columns = ['station', 't', 'x', 'y', 'z']
        data_columns = [ d for d in df.columns if d not in reserved_columns ]

        with OrthogonalMultidimensionalTimeseries(output, 'w') as nc:

            station_group = df.groupby('station')
            num_stations = len(station_group)
            
            # assume all groups are the same size and have identical times
            _, sdf = list(station_group)[0]
            t = sdf.t
            
            # Metadata variables
            nc.createVariable('crs', 'i4')

            # Create all of the variables
            nc.createDimension('time', t.size)
            nc.createDimension('station', num_stations)
            station = nc.createVariable('station', get_dtype(df.station), ('station',))

            time = nc.createVariable('time', 'f8', ('time',))
            latitude = nc.createVariable('latitude', get_dtype(df.y), ('station',))
            longitude = nc.createVariable('longitude', get_dtype(df.x), ('station',))
            z = nc.createVariable('z', get_dtype(df.z), ('station',), fill_value=df.z.dtype.type(cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(), kwargs.pop('attributes', {}))

            logger.info(df.t.values.dtype)
            time[:] = nc4.date2num(t.tolist(), units=cls.default_time_unit)

            for i, (uid, sdf) in enumerate(station_group):
                station[i] = uid
                latitude[i] = sdf.y.iloc[0]
                longitude[i] = sdf.x.iloc[0]

                # TODO: write a test for a Z with a _FillValue
                z[i] = sdf.z.iloc[0]

                for c in data_columns:

                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        if var_name not in attributes:
                            attributes[var_name] = {}
                        if sdf[c].dtype == np.dtype('datetime64[ns]'):
                            fv = np.dtype('f8').type(cls.default_fill_value)
                            v = nc.createVariable(var_name, 'f8', ('station', 'time',), fill_value=fv)
                            tvalues = pd.Series(nc4.date2num(sdf[c].tolist(), units=cls.default_time_unit))
                            attributes[var_name] = dict_update(attributes[var_name], {
                                'units': cls.default_time_unit
                            })
                        elif np.issubdtype(sdf[c].dtype, 'S') or sdf[c].dtype == object:
                            # AttributeError: cannot set _FillValue attribute for VLEN or compound variable
                            v = nc.createVariable(var_name, get_dtype(sdf[c]), ('station', 'time',))
                        else:
                            v = nc.createVariable(var_name, get_dtype(sdf[c]), ('station', 'time',), fill_value=sdf[c].dtype.type(cls.default_fill_value))

                        attributes[var_name] = dict_update(attributes[var_name], {
                            'coordinates' : 'time latitude longitude z',
                        })
                    else:
                        v = nc.variables[var_name]

                    if sdf[c].dtype == np.dtype('datetime64[ns]'):
                        vvalues = tvalues.fillna(v._FillValue).values
                    elif hasattr(v, '_FillValue'):
                        vvalues = sdf[c].fillna(v._FillValue).values
                    else:
                        # Use an empty string... better than nothing!
                        vvalues = sdf[c].fillna('').values

                    try:
                        v[i, :] = vvalues
                    except BaseException:
                        logger.error('{} NOPE'.format(v.name))

            # Set global attributes
            nc.update_attributes(attributes)

        return OrthogonalMultidimensionalTimeseries(output, **kwargs)

Esempio n. 15

Mostra file

File: im.py Progetto: joefutrelle/pocean-core

    def from_dataframe(cls, df, output, **kwargs):
        reserved_columns = [
            'trajectory', 'profile', 't', 'x', 'y', 'z', 'distance'
        ]
        data_columns = [d for d in df.columns if d not in reserved_columns]

        with IncompleteMultidimensionalProfile(output, 'w') as nc:

            profile_group = df.groupby('profile')
            max_zs = profile_group.size().max()

            unique_profiles = df.profile.unique()
            nc.createDimension('profile', unique_profiles.size)
            nc.createDimension('z', max_zs)

            # Metadata variables
            nc.createVariable('crs', 'i4')

            profile = nc.createVariable('profile', get_dtype(df.profile),
                                        ('profile', ))

            # Create all of the variables
            time = nc.createVariable('time', 'i4', ('profile', ))
            latitude = nc.createVariable('latitude', get_dtype(df.y),
                                         ('profile', ))
            longitude = nc.createVariable('longitude', get_dtype(df.x),
                                          ('profile', ))
            if 'distance' in df:
                distance = nc.createVariable('distance',
                                             get_dtype(df.distance),
                                             ('profile', ))
            z = nc.createVariable('z',
                                  get_dtype(df.z), ('profile', 'z'),
                                  fill_value=df.z.dtype.type(
                                      cls.default_fill_value))

            attributes = dict_update(nc.nc_attributes(),
                                     kwargs.pop('attributes', {}))

            for i, (uid, pdf) in enumerate(profile_group):
                profile[i] = uid

                time[i] = nc4.date2num(pdf.t.iloc[0],
                                       units=cls.default_time_unit)
                latitude[i] = pdf.y.iloc[0]
                longitude[i] = pdf.x.iloc[0]
                if 'distance' in pdf:
                    distance[i] = pdf.distance.iloc[0]

                zvalues = pdf.z.fillna(z._FillValue).values
                sl = slice(0, zvalues.size)
                z[i, sl] = zvalues
                for c in data_columns:
                    # Create variable if it doesn't exist
                    var_name = cf_safe_name(c)
                    if var_name not in nc.variables:
                        if np.issubdtype(pdf[c].dtype,
                                         'S') or pdf[c].dtype == object:
                            # AttributeError: cannot set _FillValue attribute for VLEN or compound variable
                            v = nc.createVariable(var_name, get_dtype(pdf[c]),
                                                  ('profile', 'z'))
                        else:
                            v = nc.createVariable(var_name,
                                                  get_dtype(pdf[c]),
                                                  ('profile', 'z'),
                                                  fill_value=pdf[c].dtype.type(
                                                      cls.default_fill_value))

                        if var_name not in attributes:
                            attributes[var_name] = {}
                        attributes[var_name] = dict_update(
                            attributes[var_name], {
                                'coordinates': 'time latitude longitude z',
                            })
                    else:
                        v = nc.variables[var_name]

                    if hasattr(v, '_FillValue'):
                        vvalues = pdf[c].fillna(v._FillValue).values
                    else:
                        # Use an empty string... better than nothing!
                        vvalues = pdf[c].fillna('').values

                    sl = slice(0, vvalues.size)
                    v[i, sl] = vvalues

            # Set global attributes
            nc.update_attributes(attributes)

        return IncompleteMultidimensionalProfile(output, **kwargs)