コード例 #1
0
ファイル: sink_phildb.py プロジェクト: dmkent/sensor-feed
class PhilDBSink(BufferedSink):
    """
        A buffered sink using the PhilDB timeseries database.
    """
    def __init__(self, dbfile, *args, **kwargs):
        super(PhilDBSink, self).__init__(*args, **kwargs)

        try:
            create(dbfile)
        except AlreadyExistsError:
            pass  # Database already exists, so no creation required.

        self.db = PhilDB(dbfile)
        self.last_known_freq = None
        try:
            self.db.add_source('SENSOR', 'Data from hardware sensor')
        except DuplicateError:
            pass  # DuplicateError means the source already existed

    def write_buffer(self, param_name, series):
        """Write buffer of data to database."""
        if len(series) == 0:
            return

        try:
            self.db.add_measurand(param_name, param_name, param_name)
        except DuplicateError:
            pass  # DuplicateError means the measurand already existed

        try:
            self.db.add_timeseries(param_name)
        except DuplicateError:
            pass  # DuplicateError means the timeseries already existed

        freq = series.index.inferred_freq
        # need to handle special case where only one value being written
        # unable to calculate the frequency so we use the last known
        # value which in general should always be the same.
        if len(series) == 1:
            freq = self.last_known_freq
        elif freq is not None:
            self.last_known_freq = freq

        if freq is None:
            raise ValueError('Unable to determine sensor frequency')

        try:
            self.db.add_timeseries_instance(param_name,
                                            freq,
                                            'None',
                                            measurand=param_name,
                                            source='SENSOR')
        except DuplicateError:
            pass  # DuplicateError - the timeseries instance already existed

        self.db.write(param_name,
                      freq,
                      series,
                      measurand=param_name,
                      source='SENSOR')
コード例 #2
0
    def test_update_and_append(self):
        db = PhilDB(self.test_tsdb)
        db.write('410730',
                 'D',
                 pd.Series(index=[
                     datetime(2014, 1, 2),
                     datetime(2014, 1, 3),
                     datetime(2014, 1, 4),
                     datetime(2014, 1, 5),
                     datetime(2014, 1, 6)
                 ],
                           data=[2.5, 3.0, 4.0, 5.0, 6.0]),
                 measurand='Q',
                 source='DATA_SOURCE')

        data = db.read('410730', 'D', measurand='Q', source='DATA_SOURCE')
        self.assertEqual(1.0, data.values[0])
        self.assertEqual(2.5, data.values[1])
        self.assertEqual(3.0, data.values[2])
        self.assertEqual(4.0, data.values[3])
        self.assertEqual(5.0, data.values[4])
        self.assertEqual(6.0, data.values[5])
        self.assertEqual(datetime(2014, 1, 1), data.index[0].to_pydatetime())
        self.assertEqual(datetime(2014, 1, 2), data.index[1].to_pydatetime())
        self.assertEqual(datetime(2014, 1, 3), data.index[2].to_pydatetime())
        self.assertEqual(datetime(2014, 1, 4), data.index[3].to_pydatetime())
        self.assertEqual(datetime(2014, 1, 5), data.index[4].to_pydatetime())
        self.assertEqual(datetime(2014, 1, 6), data.index[5].to_pydatetime())
コード例 #3
0
    def test_new_write(self):
        db = PhilDB(self.test_tsdb)

        db.add_timeseries('410731')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='Q',
                                   source='DATA_SOURCE')
        db.write('410731',
                 'D',
                 pd.Series(index=[
                     datetime(2014, 1, 1),
                     datetime(2014, 1, 2),
                     datetime(2014, 1, 3)
                 ],
                           data=[1.0, 2.0, 3.0]),
                 measurand='Q',
                 source='DATA_SOURCE')

        results = db.read('410731', 'D', measurand='Q', source='DATA_SOURCE')

        self.assertEqual(results.index[0].year, 2014)
        self.assertEqual(results.index[0].month, 1)
        self.assertEqual(results.index[0].day, 1)
        self.assertEqual(results.index[1].day, 2)
        self.assertEqual(results.index[2].day, 3)

        self.assertEqual(results.values[0], 1.0)
        self.assertEqual(results.values[1], 2.0)
        self.assertEqual(results.values[2], 3.0)
コード例 #4
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_update_and_append(self):
        db = PhilDB(self.test_tsdb)
        db.write('410730', 'D', pd.Series(index = [datetime(2014,1,2), datetime(2014,1,3), datetime(2014,1,4), datetime(2014,1,5), datetime(2014,1,6)], data = [2.5, 3.0, 4.0, 5.0, 6.0]), measurand = 'Q', source = 'DATA_SOURCE')

        data = db.read('410730', 'D', measurand = 'Q', source = 'DATA_SOURCE')
        self.assertEqual(1.0, data.values[0])
        self.assertEqual(2.5, data.values[1])
        self.assertEqual(3.0, data.values[2])
        self.assertEqual(4.0, data.values[3])
        self.assertEqual(5.0, data.values[4])
        self.assertEqual(6.0, data.values[5])
        self.assertEqual(datetime(2014,1,1), data.index[0].to_pydatetime())
        self.assertEqual(datetime(2014,1,2), data.index[1].to_pydatetime())
        self.assertEqual(datetime(2014,1,3), data.index[2].to_pydatetime())
        self.assertEqual(datetime(2014,1,4), data.index[3].to_pydatetime())
        self.assertEqual(datetime(2014,1,5), data.index[4].to_pydatetime())
        self.assertEqual(datetime(2014,1,6), data.index[5].to_pydatetime())
コード例 #5
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_new_write(self):
        db = PhilDB(self.test_tsdb)

        db.add_timeseries('410731')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'Q', source = 'DATA_SOURCE')
        db.write('410731', 'D', pd.Series(index = [datetime(2014,1,1), datetime(2014,1,2), datetime(2014,1,3)], data = [1.0, 2.0, 3.0]), measurand = 'Q', source = 'DATA_SOURCE')

        results = db.read('410731', 'D', measurand = 'Q', source = 'DATA_SOURCE')

        self.assertEqual(results.index[0].year, 2014)
        self.assertEqual(results.index[0].month, 1)
        self.assertEqual(results.index[0].day, 1)
        self.assertEqual(results.index[1].day, 2)
        self.assertEqual(results.index[2].day, 3)

        self.assertEqual(results.values[0], 1.0)
        self.assertEqual(results.values[1], 2.0)
        self.assertEqual(results.values[2], 3.0)
コード例 #6
0
    def test_log_write(self):
        db = PhilDB(self.test_tsdb)

        db.add_timeseries('410731')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='Q',
                                   source='DATA_SOURCE')
        dates = [
            datetime(2014, 1, 1),
            datetime(2014, 1, 2),
            datetime(2014, 1, 3)
        ]
        db.write('410731',
                 'D',
                 pd.Series(index=dates, data=[1.0, 2.0, 3.0]),
                 measurand='Q',
                 source='DATA_SOURCE')

        db.write('410731',
                 'D',
                 pd.Series(index=dates, data=[1.0, 2.5, 3.0]),
                 measurand='Q',
                 source='DATA_SOURCE')

        db.write('410731',
                 'D',
                 pd.Series(index=[datetime(2014, 1, 4)], data=[4.0]),
                 measurand='Q',
                 source='DATA_SOURCE')

        results = db.read('410731', 'D')
        self.assertEqual(results.values[0], 1.0)
        self.assertEqual(results.values[1], 2.5)
        self.assertEqual(results.values[2], 3.0)
        self.assertEqual(results.values[3], 4.0)

        with tables.open_file(db.get_file_path('410731', 'D', ftype='hdf5'),
                              'r') as hdf5_file:
            log_grp = hdf5_file.get_node('/data')

            self.assertEqual(log_grp.log[0][0], 1388534400)
            self.assertEqual(log_grp.log[0][1], 1.0)
            self.assertEqual(log_grp.log[0][2], 0)

            self.assertEqual(log_grp.log[1][0], 1388620800)
            self.assertEqual(log_grp.log[1][1], 2.0)

            self.assertEqual(log_grp.log[2][0], 1388707200)
            self.assertEqual(log_grp.log[2][1], 3.0)

            self.assertEqual(log_grp.log[3][0], 1388620800)
            self.assertEqual(log_grp.log[3][1], 2.5)

            self.assertEqual(log_grp.log[4][0], 1388793600)
            self.assertEqual(log_grp.log[4][1], 4.0)
コード例 #7
0
def write_phildb(file_list, results_file, first_run = False):
    if first_run:
        create('hrs_phildb')

    db = PhilDB('hrs_phildb')

    if first_run:
        db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
        db.add_source('BOM_HRS', 'Bureau of Meteorology; Hydrological Reference Stations dataset.')

    write_times = []
    for filename in file_list:
        print("Processing file: ", filename, '...')
        station_id = os.path.basename(filename).split('_')[0]
        print("Using station ID: ", station_id, '...')
        streamflow = pd.read_csv(filename, parse_dates=True, index_col=0, header = None)
        if first_run:
            db.add_timeseries(station_id)
            db.add_timeseries_instance(station_id, freq, '', measurand = 'Q', source = 'BOM_HRS')
        start = time.time()
        db.write(station_id, freq, streamflow, measurand = 'Q', source = 'BOM_HRS')
        write_times.append(time.time() - start)

    np.savetxt(results_file, np.array(write_times))
コード例 #8
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_log_write(self):
        db = PhilDB(self.test_tsdb)

        db.add_timeseries('410731')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'Q', source = 'DATA_SOURCE')
        dates = [datetime(2014,1,1), datetime(2014,1,2), datetime(2014,1,3)]
        db.write('410731', 'D', pd.Series(index = dates, data = [1.0, 2.0, 3.0]), measurand = 'Q', source = 'DATA_SOURCE')

        db.write('410731', 'D', pd.Series(index = dates, data = [1.0, 2.5, 3.0]), measurand = 'Q', source = 'DATA_SOURCE')

        db.write('410731', 'D', pd.Series(index = [datetime(2014,1,4)], data = [4.0]), measurand = 'Q', source = 'DATA_SOURCE')

        results = db.read('410731', 'D')
        self.assertEqual(results.values[0], 1.0)
        self.assertEqual(results.values[1], 2.5)
        self.assertEqual(results.values[2], 3.0)
        self.assertEqual(results.values[3], 4.0)

        with tables.open_file(db.get_file_path('410731', 'D', ftype='hdf5'), 'r') as hdf5_file:
            log_grp = hdf5_file.get_node('/data')

            self.assertEqual(log_grp.log[0][0], 1388534400)
            self.assertEqual(log_grp.log[0][1], 1.0)
            self.assertEqual(log_grp.log[0][2], 0)

            self.assertEqual(log_grp.log[1][0], 1388620800)
            self.assertEqual(log_grp.log[1][1], 2.0)

            self.assertEqual(log_grp.log[2][0], 1388707200)
            self.assertEqual(log_grp.log[2][1], 3.0)

            self.assertEqual(log_grp.log[3][0], 1388620800)
            self.assertEqual(log_grp.log[3][1], 2.5)

            self.assertEqual(log_grp.log[4][0], 1388793600)
            self.assertEqual(log_grp.log[4][1], 4.0)
コード例 #9
0
ファイル: gen_db.py プロジェクト: cc272309126/phildb
db = PhilDB(test_tsdb_path)

db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
db.add_source('DATA_SOURCE', '')

db.add_timeseries('410730')
db.add_timeseries_instance('410730',
                           'D',
                           '',
                           measurand='Q',
                           source='DATA_SOURCE')
db.write('410730',
         'D',
         pd.Series(index=[
             datetime.date(2014, 1, 1),
             datetime.date(2014, 1, 2),
             datetime.date(2014, 1, 3)
         ],
                   data=[1, 2, 3]),
         source='DATA_SOURCE',
         measurand='Q')

db.add_timeseries('123456')
db.add_timeseries_instance('123456',
                           'D',
                           '',
                           measurand='Q',
                           source='DATA_SOURCE')
db.write('123456',
         'D',
         pd.Series(index=[
             datetime.date(2014, 1, 1),
コード例 #10
0
ファイル: load_bom_data.py プロジェクト: cc272309126/phildb
            data.append(ob[measurand])
    dates.reverse()
    data.reverse()

    station_id = station_json['observations']['header'][0]['ID']

    return station_id, pd.Series(data, dates)

measurand = 'air_temp'
source = 'BOM_OBS'
freq = '30min'

station_id, data = parse(json.load(open(sys.argv[2])), measurand)

db.add_measurand(measurand, measurand, 'Air Temperature')
db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations')

db.add_timeseries(station_id)
db.add_timeseries_instance(station_id, freq, 'None', measurand = measurand, source = source)
db.write(station_id, freq, data, measurand = measurand, source = source)

for i in range(3, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')

    try:
        x = parse(json.load(open(sys.argv[i])), measurand)
        db.write(station_id, freq, x, measurand = measurand, source = source)
    except ValueError as e:
        print('Could not parse: {0}'.format(sys.argv[i]))

コード例 #11
0
ファイル: load_bom_data.py プロジェクト: cc272309126/phildb
    station_id = station_json['observations']['header'][0]['ID']

    return station_id, pd.Series(data, dates)


measurand = 'air_temp'
source = 'BOM_OBS'
freq = '30min'

station_id, data = parse(json.load(open(sys.argv[2])), measurand)

db.add_measurand(measurand, measurand, 'Air Temperature')
db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations')

db.add_timeseries(station_id)
db.add_timeseries_instance(station_id,
                           freq,
                           'None',
                           measurand=measurand,
                           source=source)
db.write(station_id, freq, data, measurand=measurand, source=source)

for i in range(3, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')

    try:
        x = parse(json.load(open(sys.argv[i])), measurand)
        db.write(station_id, freq, x, measurand=measurand, source=source)
    except ValueError as e:
        print('Could not parse: {0}'.format(sys.argv[i]))
コード例 #12
0
ファイル: gen_db.py プロジェクト: amacd31/phildb
    shutil.rmtree(test_tsdb_path)
except OSError as e:
    if e.errno != 2: # Code 2: No such file or directory.
        raise

create(test_tsdb_path)
db = PhilDB(test_tsdb_path)

db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
db.add_source('DATA_SOURCE', '')

db.add_timeseries('410730')
db.add_timeseries_instance('410730', 'D', '', measurand = 'Q', source = 'DATA_SOURCE')
db.write('410730', 'D', pd.Series( index = [datetime.date(2014, 1, 1),
            datetime.date(2014, 1, 2),
            datetime.date(2014, 1, 3)],
            data = [1,2,3]),
            source = 'DATA_SOURCE',
            measurand = 'Q'
        )

db.add_timeseries('123456')
db.add_timeseries_instance('123456', 'D', '', measurand = 'Q', source = 'DATA_SOURCE')
db.write('123456', 'D', pd.Series(index = [datetime.date(2014, 1, 1),
            datetime.date(2014, 1, 2),
            datetime.date(2014, 1, 3)],
            data = [1,2,3]),
            source = 'DATA_SOURCE',
            measurand = 'Q'
        )
コード例 #13
0
import os
import sys
import datetime
import pandas as pd
from phildb.database import PhilDB

print("Writing to PhilDB({0})".format(sys.argv[1]))
db = PhilDB(sys.argv[1])
db.add_measurand('maxT', 'MAXIMUM_TEMPERATURE', 'Maximum Temperature')
db.add_measurand('minT', 'MINIMUM_TEMPERATURE', 'Minimum Temperature')
db.add_source('BOM_ACORN_SAT', 'Bureau of Meteorology; Hydrological Reference Stations dataset.')

freq = 'D'

for i in range(2, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')
    station_id = "{0:06d}".format(int(os.path.basename(sys.argv[i])))
    print("Using station ID: ", station_id, '...')

    db.add_timeseries(station_id)
    for variable in ['minT', 'maxT']:
        input_file = 'data/acorn.sat.{0}.{1}.daily.txt'.format(variable, station_id)
        df = pd.read_csv(input_file, parse_dates=[0], index_col=0, header=None, skiprows=1, sep=r"\s+", na_values='99999.9', names=['Date',variable])
        db.add_timeseries_instance(station_id, freq, 'ACORN-SAT', measurand = variable, source = 'BOM_ACORN_SAT')
        db.write(station_id, freq, df[variable], measurand = variable, source = 'BOM_ACORN_SAT')