Example #1
0
class PhilDBSink(BufferedSink):
    """
        A buffered sink using the PhilDB timeseries database.
    """
    def __init__(self, dbfile, *args, **kwargs):
        super(PhilDBSink, self).__init__(*args, **kwargs)

        try:
            create(dbfile)
        except AlreadyExistsError:
            pass  # Database already exists, so no creation required.

        self.db = PhilDB(dbfile)
        self.last_known_freq = None
        try:
            self.db.add_source('SENSOR', 'Data from hardware sensor')
        except DuplicateError:
            pass  # DuplicateError means the source already existed

    def write_buffer(self, param_name, series):
        """Write buffer of data to database."""
        if len(series) == 0:
            return

        try:
            self.db.add_measurand(param_name, param_name, param_name)
        except DuplicateError:
            pass  # DuplicateError means the measurand already existed

        try:
            self.db.add_timeseries(param_name)
        except DuplicateError:
            pass  # DuplicateError means the timeseries already existed

        freq = series.index.inferred_freq
        # need to handle special case where only one value being written
        # unable to calculate the frequency so we use the last known
        # value which in general should always be the same.
        if len(series) == 1:
            freq = self.last_known_freq
        elif freq is not None:
            self.last_known_freq = freq

        if freq is None:
            raise ValueError('Unable to determine sensor frequency')

        try:
            self.db.add_timeseries_instance(param_name,
                                            freq,
                                            'None',
                                            measurand=param_name,
                                            source='SENSOR')
        except DuplicateError:
            pass  # DuplicateError - the timeseries instance already existed

        self.db.write(param_name,
                      freq,
                      series,
                      measurand=param_name,
                      source='SENSOR')
Example #2
0
    def test_ts_list_sorted(self):
        """
            Test that the list of IDs is sorted.
        """
        db = PhilDB(self.test_tsdb)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')

        db.add_timeseries_instance('410730',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='DATA_SOURCE')

        db.add_timeseries('410731')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='DATA_SOURCE')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='Q',
                                   source='DATA_SOURCE')

        ts_list = db.ts_list()
        self.assertEqual(['123456', '410730', '410731'], ts_list)
Example #3
0
    def test_read_non_unique(self):
        db = PhilDB(self.test_tsdb)

        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410730', 'D', 'Foo', measurand = 'P', source = 'DATA_SOURCE')

        with self.assertRaises(MultipleResultsFound) as context:
            results = db.read('410730', 'D')
Example #4
0
    def test_ts_list_ids(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'P', source = 'DATA_SOURCE')

        ts_list = db.list_ids()
        self.assertEqual(['123456', '410730', '410731'], ts_list)
Example #5
0
    def test_measurand_list_sorted(self):
        """
            Test that the list of measurand short IDs is sorted.
        """
        db = PhilDB(self.test_tsdb)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')

        ts_list = db.list_measurands()
        self.assertEqual(['P', 'Q'], ts_list)
Example #6
0
    def test_measurand_list_sorted(self):
        """
            Test that the list of measurand short IDs is sorted.
        """
        db = PhilDB(self.test_tsdb)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')

        ts_list = db.list_measurands()
        self.assertEqual(['P', 'Q'], ts_list)
Example #7
0
    def test_ts_list_measurand_and_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'P', source = 'EXAMPLE_SOURCE')

        ts_list = db.ts_list(source = 'EXAMPLE_SOURCE', measurand = 'P')
        self.assertEqual(['410731'], ts_list)
Example #8
0
    def test_ts_list_unique_ids(self):
        """
            Test that IDs don't appear multiple times due to different combinations.
        """
        db = PhilDB(self.test_tsdb)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410730', 'D', 'Foo', measurand = 'P', source = 'DATA_SOURCE')

        ts_list = db.ts_list()
        self.assertEqual(['123456', '410730'], ts_list)
Example #9
0
    def test_get_ts_instance(self):
        db = PhilDB(self.test_tsdb)
        ts_instance = db._PhilDB__get_ts_instance('410730', 'D', measurand = 'Q', source = 'DATA_SOURCE')
        self.assertEqual('410730', ts_instance.timeseries.primary_id)
        self.assertEqual('Q', ts_instance.measurand.short_id)

        self.assertRaises(MissingDataError, db._PhilDB__get_ts_instance, '410731', 'D', measurand = 'Q', source = 'DATA_SOURCE')

        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        self.assertRaises(MissingDataError, db._PhilDB__get_ts_instance, '410730', 'D', measurand = 'P', source = 'DATA_SOURCE')
Example #10
0
    def test_ts_list_ids(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='DATA_SOURCE')

        ts_list = db.list_ids()
        self.assertEqual(['123456', '410730', '410731'], ts_list)
Example #11
0
    def test_read_non_unique(self):
        db = PhilDB(self.test_tsdb)

        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410730',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='DATA_SOURCE')

        with self.assertRaises(MultipleResultsFound) as context:
            results = db.read('410730', 'D')
Example #12
0
    def test_ts_list_measurand_and_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='EXAMPLE_SOURCE')

        ts_list = db.ts_list(source='EXAMPLE_SOURCE', measurand='P')
        self.assertEqual(['410731'], ts_list)
Example #13
0
    def test_add_duplicates(self):
        db = PhilDB(self.test_tsdb)
        with self.assertRaises(DuplicateError) as context:
            db.add_source('DATA_SOURCE', 'Duplicate source')

        with self.assertRaises(DuplicateError) as context:
            db.add_measurand('Q', 'STREAMFLOW', 'Duplicate measurand')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries('410730')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries_instance('410730', 'D', '', source='DATA_SOURCE', measurand='Q')
Example #14
0
    def test_add_measurand_entry(self):
        create(self.temp_dir)
        db = PhilDB(self.temp_dir)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')

        conn = sqlite3.connect(db._PhilDB__meta_data_db())
        c = conn.cursor()
        c.execute("SELECT * FROM measurand;")
        pk, measurand_short_id, measurand_long_id, measurand_description = c.fetchone();

        self.assertEqual(measurand_short_id, 'P')
        self.assertEqual(measurand_long_id, 'PRECIPITATION')
        self.assertEqual(measurand_description, 'Precipitation')
Example #15
0
    def test_add_measurand_entry(self):
        create(self.temp_dir)
        db = PhilDB(self.temp_dir)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')

        conn = sqlite3.connect(db._PhilDB__meta_data_db())
        c = conn.cursor()
        c.execute("SELECT * FROM measurand;")
        pk, measurand_short_id, measurand_long_id, measurand_description = c.fetchone(
        )

        self.assertEqual(measurand_short_id, 'P')
        self.assertEqual(measurand_long_id, 'PRECIPITATION')
        self.assertEqual(measurand_description, 'Precipitation')
Example #16
0
    def test_ts_list_unique_ids(self):
        """
            Test that IDs don't appear multiple times due to different combinations.
        """
        db = PhilDB(self.test_tsdb)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410730',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='DATA_SOURCE')

        ts_list = db.ts_list()
        self.assertEqual(['123456', '410730'], ts_list)
Example #17
0
    def test_ts_list_sorted(self):
        """
            Test that the list of IDs is sorted.
        """
        db = PhilDB(self.test_tsdb)
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')

        db.add_timeseries_instance('410730', 'D', 'Foo', measurand = 'P', source = 'DATA_SOURCE')

        db.add_timeseries('410731')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'P', source = 'DATA_SOURCE')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'Q', source = 'DATA_SOURCE')

        ts_list = db.ts_list()
        self.assertEqual(['123456', '410730', '410731'], ts_list)
Example #18
0
    def test_add_duplicates(self):
        db = PhilDB(self.test_tsdb)
        with self.assertRaises(DuplicateError) as context:
            db.add_source('DATA_SOURCE', 'Duplicate source')

        with self.assertRaises(DuplicateError) as context:
            db.add_measurand('Q', 'STREAMFLOW', 'Duplicate measurand')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries('410730')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries_instance('410730',
                                       'D',
                                       '',
                                       source='DATA_SOURCE',
                                       measurand='Q')
Example #19
0
    def test_get_ts_instance(self):
        db = PhilDB(self.test_tsdb)
        ts_instance = db._PhilDB__get_ts_instance('410730',
                                                  'D',
                                                  measurand='Q',
                                                  source='DATA_SOURCE')
        self.assertEqual('410730', ts_instance.timeseries.primary_id)
        self.assertEqual('Q', ts_instance.measurand.short_id)

        self.assertRaises(MissingDataError,
                          db._PhilDB__get_ts_instance,
                          '410731',
                          'D',
                          measurand='Q',
                          source='DATA_SOURCE')

        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        self.assertRaises(MissingDataError,
                          db._PhilDB__get_ts_instance,
                          '410730',
                          'D',
                          measurand='P',
                          source='DATA_SOURCE')
def write_phildb(file_list, results_file, first_run = False):
    if first_run:
        create('hrs_phildb')

    db = PhilDB('hrs_phildb')

    if first_run:
        db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
        db.add_source('BOM_HRS', 'Bureau of Meteorology; Hydrological Reference Stations dataset.')

    write_times = []
    for filename in file_list:
        print("Processing file: ", filename, '...')
        station_id = os.path.basename(filename).split('_')[0]
        print("Using station ID: ", station_id, '...')
        streamflow = pd.read_csv(filename, parse_dates=True, index_col=0, header = None)
        if first_run:
            db.add_timeseries(station_id)
            db.add_timeseries_instance(station_id, freq, '', measurand = 'Q', source = 'BOM_HRS')
        start = time.time()
        db.write(station_id, freq, streamflow, measurand = 'Q', source = 'BOM_HRS')
        write_times.append(time.time() - start)

    np.savetxt(results_file, np.array(write_times))
Example #21
0
import pandas as pd
from phildb.database import PhilDB
from phildb.create import create

test_tsdb_path = os.path.join(os.path.dirname(__file__), 'test_tsdb')

try:
    shutil.rmtree(test_tsdb_path)
except OSError as e:
    if e.errno != 2: # Code 2: No such file or directory.
        raise

create(test_tsdb_path)
db = PhilDB(test_tsdb_path)

db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
db.add_source('DATA_SOURCE', '')

db.add_timeseries('410730')
db.add_timeseries_instance('410730', 'D', '', measurand = 'Q', source = 'DATA_SOURCE')
db.write('410730', 'D', pd.Series( index = [datetime.date(2014, 1, 1),
            datetime.date(2014, 1, 2),
            datetime.date(2014, 1, 3)],
            data = [1,2,3]),
            source = 'DATA_SOURCE',
            measurand = 'Q'
        )

db.add_timeseries('123456')
db.add_timeseries_instance('123456', 'D', '', measurand = 'Q', source = 'DATA_SOURCE')
db.write('123456', 'D', pd.Series(index = [datetime.date(2014, 1, 1),
Example #22
0
import pandas as pd
from phildb.database import PhilDB
from phildb.create import create

test_tsdb_path = os.path.join(os.path.dirname(__file__), 'test_tsdb')

try:
    shutil.rmtree(test_tsdb_path)
except OSError as e:
    if e.errno != 2:  # Code 2: No such file or directory.
        raise

create(test_tsdb_path)
db = PhilDB(test_tsdb_path)

db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
db.add_source('DATA_SOURCE', '')

db.add_timeseries('410730')
db.add_timeseries_instance('410730',
                           'D',
                           '',
                           measurand='Q',
                           source='DATA_SOURCE')
db.write('410730',
         'D',
         pd.Series(index=[
             datetime.date(2014, 1, 1),
             datetime.date(2014, 1, 2),
             datetime.date(2014, 1, 3)
         ],
Example #23
0
            dates.append(the_date)
            data.append(ob[measurand])
    dates.reverse()
    data.reverse()

    station_id = station_json['observations']['header'][0]['ID']

    return station_id, pd.Series(data, dates)

measurand = 'air_temp'
source = 'BOM_OBS'
freq = '30min'

station_id, data = parse(json.load(open(sys.argv[2])), measurand)

db.add_measurand(measurand, measurand, 'Air Temperature')
db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations')

db.add_timeseries(station_id)
db.add_timeseries_instance(station_id, freq, 'None', measurand = measurand, source = source)
db.write(station_id, freq, data, measurand = measurand, source = source)

for i in range(3, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')

    try:
        x = parse(json.load(open(sys.argv[i])), measurand)
        db.write(station_id, freq, x, measurand = measurand, source = source)
    except ValueError as e:
        print('Could not parse: {0}'.format(sys.argv[i]))
Example #24
0
from datetime import date

from phildb.create import create
from phildb.database import PhilDB

create('pypi_downloads')

from count import write_downloads

db = PhilDB('pypi_downloads')
db.add_source('pypi', 'pypi.python.org')
db.add_measurand('last_day', 'last_day', 'Downloads in the last day')
db.add_measurand('last_week', 'last_week',  'Downloads in the last week')
db.add_measurand('last_month', 'last_month',  'Downloads in the last month')
db.add_measurand('total', 'total',  'Total downloads')

# Write some download information I had manually collected over the last few days
write_downloads(
    {
        'info': {
            'name': 'PhilDB',
            'downloads': {'last_day': 6, 'last_month': 572, 'last_week': 74}
        }
    }, date(2015, 11, 12)
)
write_downloads(
    {
        'info': {
            'name': 'PhilDB',
            'downloads': {'last_day': 20, 'last_month': 596, 'last_week': 92}
        }
Example #25
0
            data.append(ob[measurand])
    dates.reverse()
    data.reverse()

    station_id = station_json['observations']['header'][0]['ID']

    return station_id, pd.Series(data, dates)


measurand = 'air_temp'
source = 'BOM_OBS'
freq = '30min'

station_id, data = parse(json.load(open(sys.argv[2])), measurand)

db.add_measurand(measurand, measurand, 'Air Temperature')
db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations')

db.add_timeseries(station_id)
db.add_timeseries_instance(station_id,
                           freq,
                           'None',
                           measurand=measurand,
                           source=source)
db.write(station_id, freq, data, measurand=measurand, source=source)

for i in range(3, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')

    try:
        x = parse(json.load(open(sys.argv[i])), measurand)
Example #26
0
import os
import sys
import datetime
import pandas as pd
from phildb.database import PhilDB

print("Writing to PhilDB({0})".format(sys.argv[1]))
db = PhilDB(sys.argv[1])
db.add_measurand('maxT', 'MAXIMUM_TEMPERATURE', 'Maximum Temperature')
db.add_measurand('minT', 'MINIMUM_TEMPERATURE', 'Minimum Temperature')
db.add_source('BOM_ACORN_SAT', 'Bureau of Meteorology; Hydrological Reference Stations dataset.')

freq = 'D'

for i in range(2, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')
    station_id = "{0:06d}".format(int(os.path.basename(sys.argv[i])))
    print("Using station ID: ", station_id, '...')

    db.add_timeseries(station_id)
    for variable in ['minT', 'maxT']:
        input_file = 'data/acorn.sat.{0}.{1}.daily.txt'.format(variable, station_id)
        df = pd.read_csv(input_file, parse_dates=[0], index_col=0, header=None, skiprows=1, sep=r"\s+", na_values='99999.9', names=['Date',variable])
        db.add_timeseries_instance(station_id, freq, 'ACORN-SAT', measurand = variable, source = 'BOM_ACORN_SAT')
        db.write(station_id, freq, df[variable], measurand = variable, source = 'BOM_ACORN_SAT')