コード例 #1
0
ファイル: ts_flow_duration.py プロジェクト: dloney/quest
    def _run_tool(self):

        dataset = self.dataset

        input_ts = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
        orig_metadata = get_metadata(dataset)[dataset]
        parameter = orig_metadata['parameter']
        if orig_metadata['file_path'] is None:
            raise IOError('No data file available for this dataset')

        df = input_ts.read(orig_metadata['file_path'])

        # apply transformation

        # run filter
        # new_df = self._run(df, options)
        metadata = df.metadata
        if 'file_path' in metadata:
            del metadata['file_path']
        df.sort_values([parameter],
                       ascending=False,
                       na_position='last',
                       inplace=True)
        df['Rank'] = df[parameter].rank(method='min', ascending=False)
        df.dropna(inplace=True)
        df['Percent Exceeded'] = (df['Rank'] /
                                  (df[parameter].count() + 1)) * 100
        df.index = df['Percent Exceeded']

        setattr_on_dataframe(df, 'metadata', metadata)
        new_df = df
        # setup new dataset
        new_metadata = {
            'parameter': new_df.metadata.get('parameter'),
            'datatype': orig_metadata['datatype'],
            'options': self.set_options,
            'file_format': orig_metadata['file_format'],
            'unit': new_df.metadata.get('unit'),
        }

        new_dset, file_path, catalog_entry = self._create_new_dataset(
            old_dataset=dataset,
            ext='.h5',
            dataset_metadata=new_metadata,
        )

        # save dataframe
        output = load_plugins('io', 'xy-hdf5')['xy-hdf5']
        output.write(file_path, new_df, new_metadata)

        return {'datasets': new_dset, 'catalog_entries': catalog_entry}
コード例 #2
0
ファイル: ts_base.py プロジェクト: douggallup/quest
    def _run_tool(self):
        dataset = self.dataset

        io = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
        orig_metadata = get_metadata(dataset)[dataset]
        if orig_metadata['file_path'] is None:
            raise IOError('No data file available for this dataset')

        df = io.read(orig_metadata['file_path'])

        # run filter
        new_df = self._run(df)


        # setup new dataset
        new_metadata = {
            'parameter': new_df.metadata.get('parameter'),
            'unit': new_df.metadata.get('unit'),
            'datatype': orig_metadata['datatype'],
            'file_format': orig_metadata['file_format'],
        }

        new_dset, file_path, catalog_entry = self._create_new_dataset(
            old_dataset=dataset,
            ext='.h5',
            dataset_metadata=new_metadata,
        )

        # save dataframe
        io.write(file_path, new_df, new_metadata)

        return {'datasets': new_dset}
コード例 #3
0
ファイル: noaa_coastwatch.py プロジェクト: douggallup/quest
    def download(self, catalog_id, file_path, dataset, **kwargs):
        p = param.ParamOverrides(self, kwargs)
        self.parameter = p.parameter
        self.end = pd.to_datetime(p.end)
        self.start = pd.to_datetime(p.start)
        self._catalog_id = catalog_id

        if dataset is None:
            dataset = 'station-' + catalog_id

        try:
            url = self.url
            logger.info('downloading data from %s' % url)
            data = pd.read_csv(url)

            if data.empty:
                raise ValueError('No Data Available')

            rename = {x: x.split()[0] for x in data.columns.tolist()}
            units = {x.split()[0]: x.split()[-1].strip('()').lower() for x in data.columns.tolist()}
            data.rename(columns=rename, inplace=True)
            data = data.set_index('time')
            data.index = pd.to_datetime(data.index)
            data.rename(columns={self.parameter_code: self.parameter})

            file_path = os.path.join(file_path, self.BASE_PATH, self.service_name, dataset, '{0}.h5'.format(dataset))

            metadata = {
                'file_path': file_path,
                'file_format': 'timeseries-hdf5',
                'datatype': 'timeseries',
                'parameter': p.parameter,
                'unit': units[self.parameter_code],
                'service_id': 'svc://noaa:{}/{}'.format(self.service_name, catalog_id)
            }

            # save data to disk
            io = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
            io.write(file_path, data, metadata)
            del metadata['service_id']

            return metadata

        except HTTPError as error:
            if error.code == 500:
                raise ValueError('No Data Available')
            elif error.code == 400:
                raise ValueError('Bad Request')
            else:
                raise error
コード例 #4
0
    def download(self, catalog_id, file_path, dataset, **kwargs):
        p = param.ParamOverrides(self, kwargs)
        self.parameter = p.parameter
        self.end = pd.to_datetime(p.end)
        self.start = pd.to_datetime(p.start)
        self._catalog_entry = catalog_id

        if dataset is None:
            dataset = 'station-' + catalog_id

        # if end is None:
        #     end = pd.datetime.now().strftime('%Y-%m-%d')
        #
        # if start is None:
        #     start = pd.to_datetime(end) - pd.datetools.timedelta(days=30)
        #     start = start.strftime('%Y-%m-%d')

        file_path = os.path.join(file_path, BASE_PATH, self.service_name,
                                 dataset, '{0}.h5'.format(dataset))

        metadata = {
            'file_path': file_path,
            'file_format': 'timeseries-hdf5',
            'datatype': DataType.TIMESERIES,
            'parameter': self.parameter,
            'unit': self._unit_map[self.parameter],
            'service_id': 'svc://ncdc:{}/{}'.format(self.service_name,
                                                    catalog_id)
        }

        # save data to disk
        io = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
        io.write(file_path, self.data, metadata)
        del metadata['service_id']

        return metadata
コード例 #5
0
ファイル: usgs_nwis.py プロジェクト: douggallup/quest
    def download(self, catalog_id, file_path, dataset, **kwargs):
        p = param.ParamOverrides(self, kwargs)

        parameter = p.parameter
        start = p.start
        end = p.end
        period = p.period

        if dataset is None:
            dataset = 'station-' + catalog_id

        if start and end:
            period = None

        pmap = self.parameter_map(invert=True)
        parameter_code, statistic_code = (pmap[parameter].split(':') +
                                          [None])[:2]

        data = nwis.get_site_data(catalog_id,
                                  parameter_code=parameter_code,
                                  statistic_code=statistic_code,
                                  start=start,
                                  end=end,
                                  period=period,
                                  service=self.service_name)

        # dict contains only one key since only one parameter/statistic was
        # downloaded, this would need to be changed if multiple
        # parameter/stat were downloaded together
        if not data:
            raise ValueError('No Data Available')

        data = list(data.values())[0]

        # convert to dataframe and cleanup bad data
        df = pd.DataFrame(data['values'])
        if df.empty:
            raise ValueError('No Data Available')
        df = df.set_index('datetime')
        df.value = df.value.astype(float)
        if statistic_code in ['00001', '00002', '00003']:
            df.index = pd.to_datetime(df.index).to_period('D')
        else:
            df.index = pd.to_datetime(df.index)  # this is in UTC

        df[df.values == -999999] = pd.np.nan
        df.rename(columns={'value': parameter}, inplace=True)

        file_path = os.path.join(file_path, BASE_PATH, self.service_name,
                                 dataset, '{0}.h5'.format(dataset))

        del data['values']

        metadata = {
            'name':
            dataset,
            'metadata':
            data,
            'file_path':
            file_path,
            'file_format':
            'timeseries-hdf5',
            'datatype':
            'timeseries',
            'parameter':
            parameter,
            'unit':
            data['variable']['units']['code'],
            'service_id':
            'svc://usgs-nwis:{}/{}'.format(self.service_name, catalog_id)
        }

        # save data to disk
        io = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
        io.write(file_path, df, metadata)
        del metadata['service_id']

        return metadata