class PhilDBSink(BufferedSink): """ A buffered sink using the PhilDB timeseries database. """ def __init__(self, dbfile, *args, **kwargs): super(PhilDBSink, self).__init__(*args, **kwargs) try: create(dbfile) except AlreadyExistsError: pass # Database already exists, so no creation required. self.db = PhilDB(dbfile) self.last_known_freq = None try: self.db.add_source('SENSOR', 'Data from hardware sensor') except DuplicateError: pass # DuplicateError means the source already existed def write_buffer(self, param_name, series): """Write buffer of data to database.""" if len(series) == 0: return try: self.db.add_measurand(param_name, param_name, param_name) except DuplicateError: pass # DuplicateError means the measurand already existed try: self.db.add_timeseries(param_name) except DuplicateError: pass # DuplicateError means the timeseries already existed freq = series.index.inferred_freq # need to handle special case where only one value being written # unable to calculate the frequency so we use the last known # value which in general should always be the same. if len(series) == 1: freq = self.last_known_freq elif freq is not None: self.last_known_freq = freq if freq is None: raise ValueError('Unable to determine sensor frequency') try: self.db.add_timeseries_instance(param_name, freq, 'None', measurand=param_name, source='SENSOR') except DuplicateError: pass # DuplicateError - the timeseries instance already existed self.db.write(param_name, freq, series, measurand=param_name, source='SENSOR')
def test_update_and_append(self): db = PhilDB(self.test_tsdb) db.write('410730', 'D', pd.Series(index=[ datetime(2014, 1, 2), datetime(2014, 1, 3), datetime(2014, 1, 4), datetime(2014, 1, 5), datetime(2014, 1, 6) ], data=[2.5, 3.0, 4.0, 5.0, 6.0]), measurand='Q', source='DATA_SOURCE') data = db.read('410730', 'D', measurand='Q', source='DATA_SOURCE') self.assertEqual(1.0, data.values[0]) self.assertEqual(2.5, data.values[1]) self.assertEqual(3.0, data.values[2]) self.assertEqual(4.0, data.values[3]) self.assertEqual(5.0, data.values[4]) self.assertEqual(6.0, data.values[5]) self.assertEqual(datetime(2014, 1, 1), data.index[0].to_pydatetime()) self.assertEqual(datetime(2014, 1, 2), data.index[1].to_pydatetime()) self.assertEqual(datetime(2014, 1, 3), data.index[2].to_pydatetime()) self.assertEqual(datetime(2014, 1, 4), data.index[3].to_pydatetime()) self.assertEqual(datetime(2014, 1, 5), data.index[4].to_pydatetime()) self.assertEqual(datetime(2014, 1, 6), data.index[5].to_pydatetime())
def test_new_write(self): db = PhilDB(self.test_tsdb) db.add_timeseries('410731') db.add_timeseries_instance('410731', 'D', 'Foo', measurand='Q', source='DATA_SOURCE') db.write('410731', 'D', pd.Series(index=[ datetime(2014, 1, 1), datetime(2014, 1, 2), datetime(2014, 1, 3) ], data=[1.0, 2.0, 3.0]), measurand='Q', source='DATA_SOURCE') results = db.read('410731', 'D', measurand='Q', source='DATA_SOURCE') self.assertEqual(results.index[0].year, 2014) self.assertEqual(results.index[0].month, 1) self.assertEqual(results.index[0].day, 1) self.assertEqual(results.index[1].day, 2) self.assertEqual(results.index[2].day, 3) self.assertEqual(results.values[0], 1.0) self.assertEqual(results.values[1], 2.0) self.assertEqual(results.values[2], 3.0)
def test_update_and_append(self): db = PhilDB(self.test_tsdb) db.write('410730', 'D', pd.Series(index = [datetime(2014,1,2), datetime(2014,1,3), datetime(2014,1,4), datetime(2014,1,5), datetime(2014,1,6)], data = [2.5, 3.0, 4.0, 5.0, 6.0]), measurand = 'Q', source = 'DATA_SOURCE') data = db.read('410730', 'D', measurand = 'Q', source = 'DATA_SOURCE') self.assertEqual(1.0, data.values[0]) self.assertEqual(2.5, data.values[1]) self.assertEqual(3.0, data.values[2]) self.assertEqual(4.0, data.values[3]) self.assertEqual(5.0, data.values[4]) self.assertEqual(6.0, data.values[5]) self.assertEqual(datetime(2014,1,1), data.index[0].to_pydatetime()) self.assertEqual(datetime(2014,1,2), data.index[1].to_pydatetime()) self.assertEqual(datetime(2014,1,3), data.index[2].to_pydatetime()) self.assertEqual(datetime(2014,1,4), data.index[3].to_pydatetime()) self.assertEqual(datetime(2014,1,5), data.index[4].to_pydatetime()) self.assertEqual(datetime(2014,1,6), data.index[5].to_pydatetime())
def test_new_write(self): db = PhilDB(self.test_tsdb) db.add_timeseries('410731') db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'Q', source = 'DATA_SOURCE') db.write('410731', 'D', pd.Series(index = [datetime(2014,1,1), datetime(2014,1,2), datetime(2014,1,3)], data = [1.0, 2.0, 3.0]), measurand = 'Q', source = 'DATA_SOURCE') results = db.read('410731', 'D', measurand = 'Q', source = 'DATA_SOURCE') self.assertEqual(results.index[0].year, 2014) self.assertEqual(results.index[0].month, 1) self.assertEqual(results.index[0].day, 1) self.assertEqual(results.index[1].day, 2) self.assertEqual(results.index[2].day, 3) self.assertEqual(results.values[0], 1.0) self.assertEqual(results.values[1], 2.0) self.assertEqual(results.values[2], 3.0)
def test_log_write(self): db = PhilDB(self.test_tsdb) db.add_timeseries('410731') db.add_timeseries_instance('410731', 'D', 'Foo', measurand='Q', source='DATA_SOURCE') dates = [ datetime(2014, 1, 1), datetime(2014, 1, 2), datetime(2014, 1, 3) ] db.write('410731', 'D', pd.Series(index=dates, data=[1.0, 2.0, 3.0]), measurand='Q', source='DATA_SOURCE') db.write('410731', 'D', pd.Series(index=dates, data=[1.0, 2.5, 3.0]), measurand='Q', source='DATA_SOURCE') db.write('410731', 'D', pd.Series(index=[datetime(2014, 1, 4)], data=[4.0]), measurand='Q', source='DATA_SOURCE') results = db.read('410731', 'D') self.assertEqual(results.values[0], 1.0) self.assertEqual(results.values[1], 2.5) self.assertEqual(results.values[2], 3.0) self.assertEqual(results.values[3], 4.0) with tables.open_file(db.get_file_path('410731', 'D', ftype='hdf5'), 'r') as hdf5_file: log_grp = hdf5_file.get_node('/data') self.assertEqual(log_grp.log[0][0], 1388534400) self.assertEqual(log_grp.log[0][1], 1.0) self.assertEqual(log_grp.log[0][2], 0) self.assertEqual(log_grp.log[1][0], 1388620800) self.assertEqual(log_grp.log[1][1], 2.0) self.assertEqual(log_grp.log[2][0], 1388707200) self.assertEqual(log_grp.log[2][1], 3.0) self.assertEqual(log_grp.log[3][0], 1388620800) self.assertEqual(log_grp.log[3][1], 2.5) self.assertEqual(log_grp.log[4][0], 1388793600) self.assertEqual(log_grp.log[4][1], 4.0)
def write_phildb(file_list, results_file, first_run = False): if first_run: create('hrs_phildb') db = PhilDB('hrs_phildb') if first_run: db.add_measurand('Q', 'STREAMFLOW', 'Streamflow') db.add_source('BOM_HRS', 'Bureau of Meteorology; Hydrological Reference Stations dataset.') write_times = [] for filename in file_list: print("Processing file: ", filename, '...') station_id = os.path.basename(filename).split('_')[0] print("Using station ID: ", station_id, '...') streamflow = pd.read_csv(filename, parse_dates=True, index_col=0, header = None) if first_run: db.add_timeseries(station_id) db.add_timeseries_instance(station_id, freq, '', measurand = 'Q', source = 'BOM_HRS') start = time.time() db.write(station_id, freq, streamflow, measurand = 'Q', source = 'BOM_HRS') write_times.append(time.time() - start) np.savetxt(results_file, np.array(write_times))
def test_log_write(self): db = PhilDB(self.test_tsdb) db.add_timeseries('410731') db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'Q', source = 'DATA_SOURCE') dates = [datetime(2014,1,1), datetime(2014,1,2), datetime(2014,1,3)] db.write('410731', 'D', pd.Series(index = dates, data = [1.0, 2.0, 3.0]), measurand = 'Q', source = 'DATA_SOURCE') db.write('410731', 'D', pd.Series(index = dates, data = [1.0, 2.5, 3.0]), measurand = 'Q', source = 'DATA_SOURCE') db.write('410731', 'D', pd.Series(index = [datetime(2014,1,4)], data = [4.0]), measurand = 'Q', source = 'DATA_SOURCE') results = db.read('410731', 'D') self.assertEqual(results.values[0], 1.0) self.assertEqual(results.values[1], 2.5) self.assertEqual(results.values[2], 3.0) self.assertEqual(results.values[3], 4.0) with tables.open_file(db.get_file_path('410731', 'D', ftype='hdf5'), 'r') as hdf5_file: log_grp = hdf5_file.get_node('/data') self.assertEqual(log_grp.log[0][0], 1388534400) self.assertEqual(log_grp.log[0][1], 1.0) self.assertEqual(log_grp.log[0][2], 0) self.assertEqual(log_grp.log[1][0], 1388620800) self.assertEqual(log_grp.log[1][1], 2.0) self.assertEqual(log_grp.log[2][0], 1388707200) self.assertEqual(log_grp.log[2][1], 3.0) self.assertEqual(log_grp.log[3][0], 1388620800) self.assertEqual(log_grp.log[3][1], 2.5) self.assertEqual(log_grp.log[4][0], 1388793600) self.assertEqual(log_grp.log[4][1], 4.0)
db = PhilDB(test_tsdb_path) db.add_measurand('Q', 'STREAMFLOW', 'Streamflow') db.add_source('DATA_SOURCE', '') db.add_timeseries('410730') db.add_timeseries_instance('410730', 'D', '', measurand='Q', source='DATA_SOURCE') db.write('410730', 'D', pd.Series(index=[ datetime.date(2014, 1, 1), datetime.date(2014, 1, 2), datetime.date(2014, 1, 3) ], data=[1, 2, 3]), source='DATA_SOURCE', measurand='Q') db.add_timeseries('123456') db.add_timeseries_instance('123456', 'D', '', measurand='Q', source='DATA_SOURCE') db.write('123456', 'D', pd.Series(index=[ datetime.date(2014, 1, 1),
data.append(ob[measurand]) dates.reverse() data.reverse() station_id = station_json['observations']['header'][0]['ID'] return station_id, pd.Series(data, dates) measurand = 'air_temp' source = 'BOM_OBS' freq = '30min' station_id, data = parse(json.load(open(sys.argv[2])), measurand) db.add_measurand(measurand, measurand, 'Air Temperature') db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations') db.add_timeseries(station_id) db.add_timeseries_instance(station_id, freq, 'None', measurand = measurand, source = source) db.write(station_id, freq, data, measurand = measurand, source = source) for i in range(3, len(sys.argv)): print("Processing file: ", sys.argv[i], '...') try: x = parse(json.load(open(sys.argv[i])), measurand) db.write(station_id, freq, x, measurand = measurand, source = source) except ValueError as e: print('Could not parse: {0}'.format(sys.argv[i]))
station_id = station_json['observations']['header'][0]['ID'] return station_id, pd.Series(data, dates) measurand = 'air_temp' source = 'BOM_OBS' freq = '30min' station_id, data = parse(json.load(open(sys.argv[2])), measurand) db.add_measurand(measurand, measurand, 'Air Temperature') db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations') db.add_timeseries(station_id) db.add_timeseries_instance(station_id, freq, 'None', measurand=measurand, source=source) db.write(station_id, freq, data, measurand=measurand, source=source) for i in range(3, len(sys.argv)): print("Processing file: ", sys.argv[i], '...') try: x = parse(json.load(open(sys.argv[i])), measurand) db.write(station_id, freq, x, measurand=measurand, source=source) except ValueError as e: print('Could not parse: {0}'.format(sys.argv[i]))
shutil.rmtree(test_tsdb_path) except OSError as e: if e.errno != 2: # Code 2: No such file or directory. raise create(test_tsdb_path) db = PhilDB(test_tsdb_path) db.add_measurand('Q', 'STREAMFLOW', 'Streamflow') db.add_source('DATA_SOURCE', '') db.add_timeseries('410730') db.add_timeseries_instance('410730', 'D', '', measurand = 'Q', source = 'DATA_SOURCE') db.write('410730', 'D', pd.Series( index = [datetime.date(2014, 1, 1), datetime.date(2014, 1, 2), datetime.date(2014, 1, 3)], data = [1,2,3]), source = 'DATA_SOURCE', measurand = 'Q' ) db.add_timeseries('123456') db.add_timeseries_instance('123456', 'D', '', measurand = 'Q', source = 'DATA_SOURCE') db.write('123456', 'D', pd.Series(index = [datetime.date(2014, 1, 1), datetime.date(2014, 1, 2), datetime.date(2014, 1, 3)], data = [1,2,3]), source = 'DATA_SOURCE', measurand = 'Q' )
import os import sys import datetime import pandas as pd from phildb.database import PhilDB print("Writing to PhilDB({0})".format(sys.argv[1])) db = PhilDB(sys.argv[1]) db.add_measurand('maxT', 'MAXIMUM_TEMPERATURE', 'Maximum Temperature') db.add_measurand('minT', 'MINIMUM_TEMPERATURE', 'Minimum Temperature') db.add_source('BOM_ACORN_SAT', 'Bureau of Meteorology; Hydrological Reference Stations dataset.') freq = 'D' for i in range(2, len(sys.argv)): print("Processing file: ", sys.argv[i], '...') station_id = "{0:06d}".format(int(os.path.basename(sys.argv[i]))) print("Using station ID: ", station_id, '...') db.add_timeseries(station_id) for variable in ['minT', 'maxT']: input_file = 'data/acorn.sat.{0}.{1}.daily.txt'.format(variable, station_id) df = pd.read_csv(input_file, parse_dates=[0], index_col=0, header=None, skiprows=1, sep=r"\s+", na_values='99999.9', names=['Date',variable]) db.add_timeseries_instance(station_id, freq, 'ACORN-SAT', measurand = variable, source = 'BOM_ACORN_SAT') db.write(station_id, freq, df[variable], measurand = variable, source = 'BOM_ACORN_SAT')