Ejemplo n.º 1
0
    def read(self, path):
        """Read metadata and dataframe from HDF5 store."""

        with pd.HDFStore(path) as h5store:
            dataframe = h5store.get('dataframe')
            setattr_on_dataframe(
                dataframe, 'metadata',
                h5store.get_storer('dataframe').attrs.metadata)
        return dataframe
Ejemplo n.º 2
0
    def _run_tool(self):

        dataset = self.dataset

        input_ts = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
        orig_metadata = get_metadata(dataset)[dataset]
        parameter = orig_metadata['parameter']
        if orig_metadata['file_path'] is None:
            raise IOError('No data file available for this dataset')

        df = input_ts.read(orig_metadata['file_path'])

        # apply transformation

        # run filter
        # new_df = self._run(df, options)
        metadata = df.metadata
        if 'file_path' in metadata:
            del metadata['file_path']
        df.sort_values([parameter],
                       ascending=False,
                       na_position='last',
                       inplace=True)
        df['Rank'] = df[parameter].rank(method='min', ascending=False)
        df.dropna(inplace=True)
        df['Percent Exceeded'] = (df['Rank'] /
                                  (df[parameter].count() + 1)) * 100
        df.index = df['Percent Exceeded']

        setattr_on_dataframe(df, 'metadata', metadata)
        new_df = df
        # setup new dataset
        new_metadata = {
            'parameter': new_df.metadata.get('parameter'),
            'datatype': orig_metadata['datatype'],
            'options': self.set_options,
            'file_format': orig_metadata['file_format'],
            'unit': new_df.metadata.get('unit'),
        }

        new_dset, file_path, catalog_entry = self._create_new_dataset(
            old_dataset=dataset,
            ext='.h5',
            dataset_metadata=new_metadata,
        )

        # save dataframe
        output = load_plugins('io', 'xy-hdf5')['xy-hdf5']
        output.write(file_path, new_df, new_metadata)

        return {'datasets': new_dset, 'catalog_entries': catalog_entry}
Ejemplo n.º 3
0
    def _run(self, df):
        metadata = df.metadata
        if 'file_path' in metadata:
            del metadata['file_path']
        param = metadata['parameter']
        period = self.period
        method = self.method

        orig_param, orig_period, orig_method = (param.split(':') +
                                                [None, None])[:3]
        new_df = getattr(df.resample(periods[period], kind='period'), method)()

        new_param = '%s:%s:%s' % (orig_param, period, method)
        new_df.rename(
            columns={param: new_param},
            inplace=True)  #inplace must be set to True to make changes

        metadata.update({'parameter': new_param})
        setattr_on_dataframe(new_df, 'metadata', metadata)

        return new_df
Ejemplo n.º 4
0
    def _run(self, df):
        metadata = df.metadata
        if 'file_path' in metadata:
            del metadata['file_path']
        parameter = metadata['parameter']
        sigma = self.sigma
        if sigma is None:
            sigma = 3

        # remove anything 'sigma' standard deviations from median
        vmin = df[parameter].median() - float(sigma) * df[parameter].std()
        vmax = df[parameter].median() + float(sigma) * df[parameter].std()
        df = df[(df[parameter] > vmin)]
        df = df[(df[parameter] < vmax)]
        setattr_on_dataframe(df, 'metadata', metadata)

        #if despike:
        #    kw = dict(n1=2, n2=20, block=6)
        #    df = despike(df, **kw)
        #    new_df = df.resample(periods[period], how=method, kind='period')

        return df
Ejemplo n.º 5
0
    def _run(self, df):
        if self.to_units is None:
            raise ValueError('To_units cannot be None')

        metadata = df.metadata
        if 'file_path' in metadata:
            del metadata['file_path']

        reg = unit_registry()
        from_units = metadata['unit']
        if '/' in from_units and '/' not in self.to_units:
            beg = from_units.find('/')
            end = len(from_units)
            default_time = from_units[beg:end]
            to_units = self.to_units + default_time
        else:
            to_units = self.to_units
        conversion = reg.convert(1, src=from_units, dst=to_units)
        df[df.columns[1]] = df[df.columns[1]] * conversion
        metadata.update({'unit': to_units})
        setattr_on_dataframe(df, 'metadata', metadata)

        return df