Esempio n. 1
0
    def test_generic_masked_bad_min_max_value(self):

        _, tpath = tempfile.mkstemp(suffix='.nc', prefix='pocean-test')
        shutil.copy2(self.input_file, tpath)

        with EnhancedDataset(tpath, 'a') as ncd:
            v = ncd.variables['v_component_wind_true_direction_all_geometries']
            v.valid_min = 0.1
            v.valid_max = 0.1
            r = generic_masked(v[:], attrs=ncd.vatts(v.name))
            rflat = r.flatten()
            assert rflat[~rflat.mask].size == 0

            # Create a byte variable with a float valid_min and valid_max
            # to make sure it doesn't error
            b = ncd.createVariable('imabyte', 'b')
            b.valid_min = 0
            b.valid_max = 600  # this ss over a byte and thus invalid
            b[:] = 3

            r = generic_masked(b[:], attrs=ncd.vatts(b.name))
            assert np.all(r.mask == False)  # noqa

            b.valid_min = 0
            b.valid_max = 2
            r = generic_masked(b[:], attrs=ncd.vatts(b.name))
            assert np.all(r.mask == True)  # noqa

            c = ncd.createVariable('imanotherbyte', 'f4')
            c.setncattr('valid_min', '0b')
            c.setncattr('valid_max', '9b')
            c[:] = 3
            r = generic_masked(c[:], attrs=ncd.vatts(c.name))
            assert np.all(r.mask == False)  # noqa

            c = ncd.createVariable('imarange', 'f4')
            c.valid_range = [0.0, 2.0]
            c[:] = 3.0
            r = generic_masked(c[:], attrs=ncd.vatts(c.name))
            assert np.all(r.mask == True)  # noqa

            c.valid_range = [0.0, 2.0]
            c[:] = 1.0
            r = generic_masked(c[:], attrs=ncd.vatts(c.name))
            assert np.all(r.mask == False)  # noqa

        if os.path.exists(tpath):
            os.remove(tpath)
Esempio n. 2
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self, axes)

        svar = axv.station
        s = normalize_countable_array(svar)

        # T
        t = get_masked_datetime_array(axv.t[:], axv.t)
        n_times = t.size

        # X
        x = generic_masked(axv.x[:], attrs=self.vatts(axv.x.name))

        # Y
        y = generic_masked(axv.y[:], attrs=self.vatts(axv.y.name))

        # Z
        z = generic_masked(axv.z[:], attrs=self.vatts(axv.z.name))
        n_z = z.size

        # denormalize table structure
        t = np.repeat(t, s.size * n_z)
        z = np.tile(np.repeat(z, s.size), n_times)
        s = np.tile(s, n_z * n_times)
        y = np.tile(y, n_times * n_z)
        x = np.tile(x, n_times * n_z)

        df_data = OrderedDict([
            (axes.t, t),
            (axes.x, x),
            (axes.y, y),
            (axes.z, z),
            (axes.station, s),
        ])

        building_index_to_drop = np.ones(t.size, dtype=bool)

        # Axes variables are already processed so skip them
        extract_vars = copy(self.variables)
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):
            vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                   attrs=self.vatts(dnam))

            # Carry through size 1 variables
            if vdata.size == 1:
                if vdata[0] is np.ma.masked:
                    L.warning("Skipping variable {} that is completely masked".
                              format(dnam))
                    continue
                vdata = vdata[0]
            else:
                if dvar[:].flatten().size != t.size:
                    L.warning("Variable {} is not the correct size, skipping.".
                              format(dnam))
                    continue

                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 3
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self, axes)

        # The index variable (trajectory_index) is identified by having an
        # attribute with name of instance_dimension whose value is the instance
        # dimension name (trajectory in this example). The index variable must
        # have the profile dimension as its sole dimension, and must be type
        # integer. Each value in the index variable is the zero-based trajectory
        # index that the profile belongs to i.e. profile p belongs to trajectory
        # i=trajectory_index(p), as in section H.2.5.
        r_index_var = self.filter_by_attrs(
            instance_dimension=lambda x: x is not None)
        if not r_index_var:
            raise ValueError(
                'Could not find the "instance_dimension" attribute on any variables, '
                'is this a valid {}?'.format(self.__class__.__name__))
        else:
            r_index_var = r_index_var[0]
        p_dim = self.dimensions[r_index_var.dimensions[0]]  # Profile dimension

        # We should probably use this below to test for dimensionality of variables?
        # r_dim = self.dimensions[r_index_var.instance_dimension]  # Trajectory dimension

        # The count variable (row_size) contains the number of elements for
        # each profile, which must be written contiguously. The count variable
        # is identified by having an attribute with name sample_dimension whose
        # value is the sample dimension (obs in this example) being counted. It
        # must have the profile dimension as its sole dimension, and must be
        # type integer
        o_index_var = self.filter_by_attrs(
            sample_dimension=lambda x: x is not None)
        if not o_index_var:
            raise ValueError(
                'Could not find the "sample_dimension" attribute on any variables, '
                'is this a valid {}?'.format(self.__class__.__name__))
        else:
            o_index_var = o_index_var[0]
        o_dim = self.dimensions[
            o_index_var.sample_dimension]  # Sample dimension

        profile_indexes = normalize_countable_array(axv.profile,
                                                    count_if_none=p_dim.size)
        p = np.ma.masked_all(o_dim.size, dtype=profile_indexes.dtype)

        traj_indexes = normalize_countable_array(axv.trajectory)
        r = np.ma.masked_all(o_dim.size, dtype=traj_indexes.dtype)

        tvar = axv.t
        t = np.ma.masked_all(o_dim.size, dtype=tvar.dtype)

        xvar = axv.x
        x = np.ma.masked_all(o_dim.size, dtype=xvar.dtype)

        yvar = axv.y
        y = np.ma.masked_all(o_dim.size, dtype=yvar.dtype)
        si = 0

        # Sample (obs) dimension
        zvar = axv.z
        z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name))

        for i in np.arange(profile_indexes.size):
            ei = si + o_index_var[i]
            p[si:ei] = profile_indexes[i]
            r[si:ei] = np.array(traj_indexes[r_index_var[i]])
            t[si:ei] = tvar[i]
            x[si:ei] = xvar[i]
            y[si:ei] = yvar[i]
            si = ei

        #  T
        nt = get_masked_datetime_array(t, tvar).flatten()

        # X and Y
        x = generic_masked(x, minv=-180, maxv=180)
        y = generic_masked(y, minv=-90, maxv=90)

        df_data = OrderedDict([(axes.t, nt), (axes.x, x), (axes.y, y),
                               (axes.z, z), (axes.trajectory, r),
                               (axes.profile, p)])

        building_index_to_drop = np.ones(o_dim.size, dtype=bool)

        extract_vars = copy(self.variables)
        # Skip the traj and row index variables
        del extract_vars[o_index_var.name]
        del extract_vars[r_index_var.name]
        # Axes variables are already processed so skip them
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):

            # Profile dimensions
            if dvar.dimensions == (p_dim.name, ):
                vdata = np.ma.masked_all(o_dim.size, dtype=dvar.dtype)
                si = 0
                for j in np.arange(profile_indexes.size):
                    ei = si + o_index_var[j]
                    vdata[si:ei] = dvar[j]
                    si = ei

            # Sample dimensions
            elif dvar.dimensions == (o_dim.name, ):
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))

            else:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))
                # Carry through size 1 variables
                if vdata.size == 1:
                    if vdata[0] is np.ma.masked:
                        L.warning(
                            "Skipping variable {} that is completely masked".
                            format(dnam))
                        continue
                else:
                    L.warning(
                        "Skipping variable {} since it didn't match any dimension sizes"
                        .format(dnam))
                    continue

            # Mark rows with data so we don't remove them with clear_rows
            if vdata.size == building_index_to_drop.size:
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            # Handle scalars here at the end
            if vdata.size == 1:
                vdata = vdata[0]

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 4
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))
        axv = get_mapped_axes_variables(self, axes)

        # Profile dimension
        p_var = self.filter_by_attrs(cf_role='profile_id')[0]
        p_dim = self.dimensions[p_var.dimensions[0]]

        # Station dimension
        s_var = self.filter_by_attrs(cf_role='timeseries_id')[0]
        if s_var.ndim == 1:
            s_dim = self.dimensions[s_var.dimensions[0]]
        elif s_var.ndim == 0:
            s_dim = None
        else:
            raise ValueError('Number of dimension on the station (timeseries_id) must be 0 or 1')

        # Station index
        r_index_var = self.filter_by_attrs(instance_dimension=lambda x: x is not None)
        if not r_index_var:
            # A reduced netCDF file, set station to 0 so it pulls the first value
            # of the variable that identifies the stations
            r_index_var = [0]
        else:
            r_index_var = r_index_var[0]

        # Sample (obs) dimension
        o_index_var = self.filter_by_attrs(sample_dimension=lambda x: x is not None)
        if not o_index_var:
            raise ValueError(
                'Could not find the "sample_dimension" attribute on any variables, '
                'is this a valid {}?'.format(self.__class__.__name__)
            )
        else:
            o_index_var = o_index_var[0]

        # Sample dimension
        # Since this is a flat dataframe, everything is the length of the obs dimension
        row_sizes = o_index_var[:]
        o_dim = self.dimensions[o_index_var.sample_dimension]

        profile_indexes = normalize_countable_array(p_var, count_if_none=p_dim.size)
        p = np.repeat(profile_indexes, row_sizes)

        stat_indexes = normalize_countable_array(s_var, count_if_none=s_dim.size)
        r = np.ma.masked_all(o_dim.size, dtype=stat_indexes.dtype)

        # Lat and Lon are on the station dimension
        xvar = axv.x
        x = np.ma.masked_all(o_dim.size, dtype=xvar.dtype)
        yvar = axv.y
        y = np.ma.masked_all(o_dim.size, dtype=yvar.dtype)
        si = 0
        for i in np.arange(stat_indexes.size):
            ei = si + o_index_var[i]
            r[si:ei] = np.array(stat_indexes[r_index_var[i]])
            x[si:ei] = xvar[i]
            y[si:ei] = yvar[i]
            si = ei
        x = generic_masked(x, minv=-180, maxv=180)
        y = generic_masked(y, minv=-90, maxv=90)

        # Time and Z are on the sample (obs) dimension
        tvar = axv.t
        t = get_masked_datetime_array(
            generic_masked(tvar[:].flatten(), attrs=self.vatts(tvar.name)),
            tvar
        )
        z = generic_masked(axv.z[:].flatten(), attrs=self.vatts(axv.z.name))

        df_data = OrderedDict([
            (axes.t, t),
            (axes.x, x),
            (axes.y, y),
            (axes.z, z),
            (axes.station, r),
            (axes.profile, p)
        ])

        building_index_to_drop = np.ones(o_dim.size, dtype=bool)

        extract_vars = copy(self.variables)
        # Skip the station and row index variables
        del extract_vars[o_index_var.name]
        del extract_vars[r_index_var.name]
        # Axes variables are already processed so skip them
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):

            # Profile dimensions
            if dvar.dimensions == (p_dim.name,):
                vdata = generic_masked(
                    np.repeat(
                        dvar[:].flatten().astype(dvar.dtype),
                        row_sizes
                    ),
                    attrs=self.vatts(dnam)
                )

            # Sample dimensions
            elif dvar.dimensions == (o_dim.name,):
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))

            else:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
                # Carry through size 1 variables
                if vdata.size == 1:
                    if vdata[0] is np.ma.masked:
                        L.warning("Skipping variable {} that is completely masked".format(dnam))
                        continue
                else:
                    L.warning("Skipping variable {} since it didn't match any dimension sizes".format(dnam))
                    continue

            # Mark rows with data so we don't remove them with clear_rows
            if vdata.size == building_index_to_drop.size:
                building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa

            # Handle scalars here at the end
            if vdata.size == 1:
                vdata = vdata[0]

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 5
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self, axes)

        # Multiple profiles in the file
        pvar = axv.profile
        p_dim = self.dimensions[pvar.dimensions[0]]

        zvar = axv.z
        zs = len(self.dimensions[[
            d for d in zvar.dimensions if d != p_dim.name
        ][0]])

        # Profiles
        p = normalize_countable_array(pvar)
        p = p.repeat(zs)

        # Z
        z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name))

        # T
        tvar = axv.t
        t = tvar[:].repeat(zs)
        nt = get_masked_datetime_array(t, tvar).flatten()

        # X
        xvar = axv.x
        x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name))

        # Y
        yvar = axv.y
        y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name))

        df_data = OrderedDict([(axes.t, nt), (axes.x, x), (axes.y, y),
                               (axes.z, z), (axes.profile, p)])

        building_index_to_drop = np.ones(t.size, dtype=bool)

        extract_vars = copy(self.variables)
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):

            # Profile dimension
            if dvar.dimensions == pvar.dimensions:
                vdata = generic_masked(dvar[:].repeat(zs).astype(dvar.dtype),
                                       attrs=self.vatts(dnam))

            # Profile, z dimension
            elif dvar.dimensions == zvar.dimensions:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))

            else:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))
                # Carry through size 1 variables
                if vdata.size == 1:
                    if vdata[0] is np.ma.masked:
                        L.warning(
                            "Skipping variable {} that is completely masked".
                            format(dnam))
                        continue
                else:
                    L.warning(
                        "Skipping variable {} since it didn't match any dimension sizes"
                        .format(dnam))
                    continue

            # Mark rows with data so we don't remove them with clear_rows
            if vdata.size == building_index_to_drop.size:
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            # Handle scalars here at the end
            if vdata.size == 1:
                vdata = vdata[0]

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
    def to_dataframe(self, clean_cols=False, clean_rows=False, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self, axes)

        # T
        t = get_masked_datetime_array(axv.t[:], axv.t)

        # X
        x = generic_masked(axv.x[:].repeat(t.size),
                           attrs=self.vatts(axv.x.name))

        # Y
        y = generic_masked(axv.y[:].repeat(t.size),
                           attrs=self.vatts(axv.y.name))

        # Z
        if axv.z is not None:
            z = generic_masked(axv.z[:].repeat(t.size),
                               attrs=self.vatts(axv.z.name))
        else:
            z = None

        svar = axv.station
        s = normalize_countable_array(svar)
        s = np.repeat(s, t.size)

        # now repeat t per station
        # figure out if this is a single-station file by checking
        # the dimension size of the x dimension
        if axv.x.ndim == 1:
            t = np.repeat(t, len(svar))

        df_data = OrderedDict([
            (axes.t, t),
            (axes.x, x),
            (axes.y, y),
            (axes.z, z),
            (axes.station, s),
        ])

        building_index_to_drop = np.ma.zeros(t.size, dtype=bool)

        # Axes variables are already processed so skip them
        extract_vars = copy(self.variables)
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):
            vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                   attrs=self.vatts(dnam))

            # Carry through size 1 variables
            if vdata.size == 1:
                if vdata[0] is np.ma.masked:
                    L.warning("Skipping variable {} that is completely masked".
                              format(dnam))
                    continue
            else:
                if dvar[:].flatten().size != t.size:
                    L.warning("Variable {} is not the correct size, skipping.".
                              format(dnam))
                    continue

            # Mark rows with data so we don't remove them with clear_rows
            if vdata.size == building_index_to_drop.size:
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            # Handle scalars here at the end
            if vdata.size == 1:
                vdata = vdata[0]

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 7
0
    def to_dataframe(self, clean_cols=True, clean_rows=True):
        # The index variable (trajectory_index) is identified by having an
        # attribute with name of instance_dimension whose value is the instance
        # dimension name (trajectory in this example). The index variable must
        # have the profile dimension as its sole dimension, and must be type
        # integer. Each value in the index variable is the zero-based trajectory
        # index that the profile belongs to i.e. profile p belongs to trajectory
        # i=trajectory_index(p), as in section H.2.5.
        r_index_var = self.filter_by_attrs(instance_dimension=lambda x: x is not None)[0]
        p_dim = self.dimensions[r_index_var.dimensions[0]]       # Profile dimension
        r_dim = self.dimensions[r_index_var.instance_dimension]  # Trajectory dimension

        # The count variable (row_size) contains the number of elements for
        # each profile, which must be written contiguously. The count variable
        # is identified by having an attribute with name sample_dimension whose
        # value is the sample dimension (obs in this example) being counted. It
        # must have the profile dimension as its sole dimension, and must be
        # type integer
        o_index_var = self.filter_by_attrs(sample_dimension=lambda x: x is not None)[0]
        o_dim = self.dimensions[o_index_var.sample_dimension]  # Sample dimension

        try:
            rvar = self.filter_by_attrs(cf_role='trajectory_id')[0]
            traj_indexes = normalize_array(rvar)
            assert traj_indexes.size == r_dim.size
        except BaseException:
            logger.warning('Could not pull trajectory values a variable with "cf_role=trajectory_id", using a computed range.')
            traj_indexes = np.arange(r_dim.size)
        try:
            pvar = self.filter_by_attrs(cf_role='profile_id')[0]
            profile_indexes = normalize_array(pvar)
            assert profile_indexes.size == p_dim.size
        except BaseException:
            logger.warning('Could not pull profile values from a variable with "cf_role=profile_id", using a computed range.')
            profile_indexes = np.arange(p_dim.size)

        # Profile dimension
        tvars = self.t_axes()
        if len(tvars) > 1:
            tvar = [ v for v in self.t_axes() if v.dimensions == (p_dim.name,) and getattr(v, 'axis', '').lower() == 't' ][0]
        else:
            tvar = tvars[0]

        xvars = self.x_axes()
        if len(xvars) > 1:
            xvar = [ v for v in self.x_axes() if v.dimensions == (p_dim.name,) and getattr(v, 'axis', '').lower() == 'x' ][0]
        else:
            xvar = xvars[0]

        yvars = self.y_axes()
        if len(yvars) > 1:
            yvar = [ v for v in self.y_axes() if v.dimensions == (p_dim.name,) and getattr(v, 'axis', '').lower() == 'y' ][0]
        else:
            yvar = yvars[0]

        zvars = self.z_axes()
        if len(zvars) > 1:
            zvar = [ v for v in self.z_axes() if v.dimensions == (o_dim.name,) and getattr(v, 'axis', '').lower() == 'z' ][0]
        else:
            zvar = zvars[0]

        p = np.ma.masked_all(o_dim.size, dtype=profile_indexes.dtype)
        r = np.ma.masked_all(o_dim.size, dtype=traj_indexes.dtype)
        t = np.ma.masked_all(o_dim.size, dtype=tvar.dtype)
        x = np.ma.masked_all(o_dim.size, dtype=xvar.dtype)
        y = np.ma.masked_all(o_dim.size, dtype=yvar.dtype)
        si = 0

        for i in np.arange(profile_indexes.size):
            ei = si + o_index_var[i]
            p[si:ei] = profile_indexes[i]
            r[si:ei] = traj_indexes[r_index_var[i]]
            t[si:ei] = tvar[i]
            x[si:ei] = xvar[i]
            y[si:ei] = yvar[i]
            si = ei

        t_mask = False
        tfill = get_fill_value(tvar)
        if tfill is not None:
            t_mask = np.copy(np.ma.getmaskarray(t))
            t[t_mask] = 1

        t = np.ma.MaskedArray(
            nc4.num2date(t, tvar.units, getattr(tvar, 'calendar', 'standard'))
        )
        # Patch the time variable back to its original mask, since num2date
        # breaks any missing/fill values
        t[t_mask] = np.ma.masked

        # X and Y
        x = generic_masked(x, minv=-180, maxv=180).round(5)
        y = generic_masked(y, minv=-90, maxv=90).round(5)

        # Distance
        d = np.ma.zeros(o_dim.size, dtype=np.float64)
        d[1:] = great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance']
        d = generic_masked(np.cumsum(d), minv=0).round(2)

        # Sample dimension
        z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name)).round(5)

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'trajectory': r,
            'profile': p,
            'distance': d
        }

        building_index_to_drop = np.ones(o_dim.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):

            # Profile dimensions
            if dvar.dimensions == (p_dim.name,):
                vdata = np.ma.masked_all(o_dim.size, dtype=dvar.dtype)
                si = 0
                for j in np.arange(profile_indexes.size):
                    ei = si + o_index_var[j]
                    vdata[si:ei] = dvar[j]
                    si = ei

            # Sample dimensions
            elif dvar.dimensions == (o_dim.name,):
                vdata = generic_masked(dvar[:].flatten(), attrs=self.vatts(dvar.name)).round(3)

            else:
                logger.warning("Skipping variable {}... it didn't seem like a data variable".format(dvar))

            building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 8
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self,
                                        axes,
                                        skip=[axes.profile, axes.station])

        # T
        t = get_masked_datetime_array(axv.t[:], axv.t).flatten()

        # X
        x = generic_masked(axv.x[:], attrs=self.vatts(axv.x.name)).flatten()

        # Y
        y = generic_masked(axv.y[:], attrs=self.vatts(axv.y.name)).flatten()

        # Z
        z = generic_masked(axv.z[:], attrs=self.vatts(axv.z.name)).flatten()

        # Trajectories
        rvar = axv.trajectory
        p = normalize_countable_array(rvar)

        # The Dimension that the trajectory id variable doesn't have is what
        # the trajectory data needs to be repeated by
        dim_diff = self.dimensions[list(
            set(axv.t.dimensions).difference(set(rvar.dimensions)))[0]]
        if dim_diff:
            p = p.repeat(dim_diff.size)

        df_data = OrderedDict([(axes.t, t), (axes.x, x), (axes.y, y),
                               (axes.z, z), (axes.trajectory, p)])

        building_index_to_drop = np.ones(t.size, dtype=bool)

        # Axes variables are already processed so skip them
        extract_vars = copy(self.variables)
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):
            vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                   attrs=self.vatts(dnam))

            # Carry through size 1 variables
            if vdata.size == 1:
                if vdata[0] is np.ma.masked:
                    L.warning("Skipping variable {} that is completely masked".
                              format(dnam))
                    continue
                vdata = vdata[0]
            else:
                if dvar[:].flatten().size != t.size:
                    L.warning("Variable {} is not the correct size, skipping.".
                              format(dnam))
                    continue

                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 9
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self, axes)

        o_index_var = self.filter_by_attrs(
            sample_dimension=lambda x: x is not None)
        if not o_index_var:
            raise ValueError(
                'Could not find the "sample_dimension" attribute on any variables, '
                'is this a valid {}?'.format(self.__class__.__name__))
        else:
            o_index_var = o_index_var[0]
            o_dim = self.dimensions[
                o_index_var.sample_dimension]  # Sample dimension
            t_dim = o_index_var.dimensions

        # Trajectory
        row_sizes = o_index_var[:]
        traj_data = normalize_countable_array(axv.trajectory)
        traj_data = np.repeat(traj_data, row_sizes)

        # time
        time_data = get_masked_datetime_array(axv.t[:], axv.t).flatten()

        df_data = OrderedDict([(axes.t, time_data),
                               (axes.trajectory, traj_data)])

        building_index_to_drop = np.ones(o_dim.size, dtype=bool)

        extract_vars = copy(self.variables)
        # Skip the time and row index variables
        del extract_vars[o_index_var.name]
        del extract_vars[axes.t]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):

            # Trajectory dimensions
            if dvar.dimensions == t_dim:
                vdata = np.repeat(
                    generic_masked(dvar[:], attrs=self.vatts(dnam)), row_sizes)

            # Sample dimensions
            elif dvar.dimensions == (o_dim.name, ):
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))

            else:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))
                # Carry through size 1 variables
                if vdata.size == 1:
                    if vdata[0] is np.ma.masked:
                        L.warning(
                            "Skipping variable {} that is completely masked".
                            format(dnam))
                        continue
                else:
                    L.warning(
                        "Skipping variable {} since it didn't match any dimension sizes"
                        .format(dnam))
                    continue

            # Mark rows with data so we don't remove them with clear_rows
            if vdata.size == building_index_to_drop.size:
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            # Handle scalars here at the end
            if vdata.size == 1:
                vdata = vdata[0]

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 10
0
    def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
        axes = get_default_axes(kwargs.pop('axes', {}))

        axv = get_mapped_axes_variables(self, axes)

        zvar = axv.z
        zs = len(self.dimensions[zvar.dimensions[0]])

        # Profiles
        pvar = axv.profile
        p = normalize_countable_array(pvar)
        ps = p.size
        p = p.repeat(zs)

        # Z
        z = generic_masked(zvar[:], attrs=self.vatts(zvar.name))
        try:
            z = np.tile(z, ps)
        except ValueError:
            z = z.flatten()

        # T
        tvar = axv.t
        t = tvar[:].repeat(zs)
        nt = get_masked_datetime_array(t, tvar).flatten()

        # X
        xvar = axv.x
        x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name))

        # Y
        yvar = axv.y
        y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name))

        df_data = OrderedDict([(axes.t, nt), (axes.x, x), (axes.y, y),
                               (axes.z, z), (axes.profile, p)])

        building_index_to_drop = np.ones(t.size, dtype=bool)

        # Axes variables are already processed so skip them
        extract_vars = copy(self.variables)
        for ncvar in axv._asdict().values():
            if ncvar is not None and ncvar.name in extract_vars:
                del extract_vars[ncvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):

            # Profile dimension
            if dvar.dimensions == pvar.dimensions:
                vdata = generic_masked(dvar[:].repeat(zs).astype(dvar.dtype),
                                       attrs=self.vatts(dnam))
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            # Z dimension
            elif dvar.dimensions == zvar.dimensions:
                vdata = generic_masked(np.tile(dvar[:], ps).flatten().astype(
                    dvar.dtype),
                                       attrs=self.vatts(dnam))
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            # Profile, z dimension
            elif dvar.dimensions == pvar.dimensions + zvar.dimensions:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))
                building_index_to_drop = (building_index_to_drop == True) & (
                    vdata.mask == True)  # noqa

            else:
                vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype),
                                       attrs=self.vatts(dnam))
                # Carry through size 1 variables
                if vdata.size == 1:
                    if vdata[0] is np.ma.masked:
                        L.warning(
                            "Skipping variable {} that is completely masked".
                            format(dnam))
                        continue
                    vdata = vdata[0]
                else:
                    L.warning(
                        "Skipping variable {} since it didn't match any dimension sizes"
                        .format(dnam))
                    continue

            df_data[dnam] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 11
0
    def to_dataframe(self, clean_cols=True, clean_rows=True):

        zvar = self.z_axes()[0]
        zs = len(self.dimensions[zvar.dimensions[0]])

        # Profiles
        pvar = self.filter_by_attrs(cf_role='profile_id')[0]
        try:
            p = normalize_array(pvar)
        except ValueError:
            p = np.asarray(list(range(len(pvar))), dtype=np.integer)
        ps = p.size
        p = p.repeat(zs)
        logger.debug(['profile data size: ', p.size])

        # Z
        z = generic_masked(zvar[:], attrs=self.vatts(zvar.name)).round(5)
        try:
            z = np.tile(z, ps)
        except ValueError:
            z = z.flatten()
        logger.debug(['z data size: ', z.size])

        # T
        tvar = self.t_axes()[0]
        t = nc4.num2date(tvar[:], tvar.units,
                         getattr(tvar, 'calendar', 'standard'))
        if isinstance(t, datetime):
            # Size one
            t = np.array([t.isoformat()], dtype='datetime64')
        t = t.repeat(zs)
        logger.debug(['time data size: ', t.size])

        # X
        xvar = self.x_axes()[0]
        x = generic_masked(xvar[:].repeat(zs),
                           attrs=self.vatts(xvar.name)).round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = generic_masked(yvar[:].repeat(zs),
                           attrs=self.vatts(yvar.name)).round(5)
        logger.debug(['y data size: ', y.size])

        # Distance
        d = np.ma.zeros(y.size, dtype=np.float64)
        d[1:] = great_distance(start_latitude=y[0:-1],
                               end_latitude=y[1:],
                               start_longitude=x[0:-1],
                               end_longitude=x[1:])['distance']
        d = generic_masked(np.cumsum(d), minv=0).round(2)
        logger.debug(['distance data size: ', d.size])

        df_data = {'t': t, 'x': x, 'y': y, 'z': z, 'profile': p, 'distance': d}

        building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):
            vdata = np.ma.fix_invalid(
                np.ma.MaskedArray(dvar[:].round(3).flatten()))
            building_index_to_drop = (building_index_to_drop == True) & (
                vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Esempio n. 12
0
    def to_dataframe(self, clean_cols=False, clean_rows=False):

        # Don't pass around the attributes store them in the class

        # T
        tvar = self.t_axes()[0]
        t = nc4.num2date(tvar[:], tvar.units, getattr(tvar, 'calendar', 'standard'))
        if isinstance(t, datetime):
            # Size one
            t = np.array([t.isoformat()], dtype='datetime64')
        logger.debug(['time data size: ', t.size])

        svar = self.filter_by_attrs(cf_role='timeseries_id')[0]
        # Stations
        # TODO: Make sure there is a test for a file with multiple time variables
        try:
            s = normalize_array(svar)
        except ValueError:
            s = np.asarray(list(range(len(svar))), dtype=np.integer)
        s = np.repeat(s, t.size)
        logger.debug(['station data size: ', s.size])

        # X
        xvar = self.x_axes()[0]
        x = generic_masked(xvar[:].repeat(t.size), attrs=self.vatts(xvar.name)).round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = generic_masked(yvar[:].repeat(t.size), attrs=self.vatts(yvar.name)).round(5)
        logger.debug(['y data size: ', y.size])

        # Z
        zvar = self.z_axes()[0]
        z = generic_masked(zvar[:].repeat(t.size), attrs=self.vatts(zvar.name))
        logger.debug(['z data size: ', z.size])

        # now repeat t per station

        # figure out if this is a single-station file
        # do this by checking the dimensions of the Z var
        if zvar.ndim == 1:
            t = np.repeat(t, len(svar))

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'station': s,
        }

        #building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = copy(self.variables)
        del extract_vars[svar.name]
        del extract_vars[xvar.name]
        del extract_vars[yvar.name]
        del extract_vars[zvar.name]
        del extract_vars[tvar.name]

        for i, (dnam, dvar) in enumerate(extract_vars.items()):
            if dvar[:].flatten().size > t.size:
                logger.warning("Variable {} is not the correct size, skipping.".format(dnam))
                continue

            vdata = generic_masked(dvar[:].flatten(), attrs=self.vatts(dnam))
            if vdata.size == 1:
                vdata = vdata[0]
            #building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
            try:
                if re.match(r'.* since .*',dvar.units):
                    vdata = nc4.num2date(vdata[:], dvar.units, getattr(dvar, 'calendar', 'standard'))
            except AttributeError:
                pass
            df_data[dnam] = vdata
            #logger.info('{} - {}'.format(dnam, vdata.shape))

        df = pd.DataFrame()
        for k, v in df_data.items():
            df[k] = v

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        #if clean_rows:
        #    df = df.iloc[~building_index_to_drop]

        return df