コード例 #1
0
    def test_add_attribute_with_value(self):
        db = PhilDB(self.test_tsdb)
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_attribute('provider', 'Data provider')
        db.add_attribute_value('provider', 'EXAMPLE_PROVIDER')
        db.add_timeseries_instance('410730',
                                   'D',
                                   'Foo',
                                   measurand='Q',
                                   source='EXAMPLE_SOURCE')

        Session.configure(bind=db._PhilDB__engine)
        session = Session()

        timeseries = db._PhilDB__get_record_by_id('410730', session)
        measurand = db._PhilDB__get_attribute('measurand', 'Q', session)
        source = db._PhilDB__get_attribute('source', 'EXAMPLE_SOURCE', session)
        provider = db._PhilDB__get_attribute('provider', 'EXAMPLE_PROVIDER',
                                             session)

        query = session.query(TimeseriesInstance). \
                filter_by(measurand = measurand, source=source, timeseries=timeseries)

        record = query.one()
        self.assertEqual(record.timeseries.primary_id, '410730')
        self.assertEqual(record.measurand.short_id, 'Q')
        self.assertEqual(record.source.short_id, 'EXAMPLE_SOURCE')
コード例 #2
0
ファイル: sink_phildb.py プロジェクト: dmkent/sensor-feed
class PhilDBSink(BufferedSink):
    """
        A buffered sink using the PhilDB timeseries database.
    """
    def __init__(self, dbfile, *args, **kwargs):
        super(PhilDBSink, self).__init__(*args, **kwargs)

        try:
            create(dbfile)
        except AlreadyExistsError:
            pass  # Database already exists, so no creation required.

        self.db = PhilDB(dbfile)
        self.last_known_freq = None
        try:
            self.db.add_source('SENSOR', 'Data from hardware sensor')
        except DuplicateError:
            pass  # DuplicateError means the source already existed

    def write_buffer(self, param_name, series):
        """Write buffer of data to database."""
        if len(series) == 0:
            return

        try:
            self.db.add_measurand(param_name, param_name, param_name)
        except DuplicateError:
            pass  # DuplicateError means the measurand already existed

        try:
            self.db.add_timeseries(param_name)
        except DuplicateError:
            pass  # DuplicateError means the timeseries already existed

        freq = series.index.inferred_freq
        # need to handle special case where only one value being written
        # unable to calculate the frequency so we use the last known
        # value which in general should always be the same.
        if len(series) == 1:
            freq = self.last_known_freq
        elif freq is not None:
            self.last_known_freq = freq

        if freq is None:
            raise ValueError('Unable to determine sensor frequency')

        try:
            self.db.add_timeseries_instance(param_name,
                                            freq,
                                            'None',
                                            measurand=param_name,
                                            source='SENSOR')
        except DuplicateError:
            pass  # DuplicateError - the timeseries instance already existed

        self.db.write(param_name,
                      freq,
                      series,
                      measurand=param_name,
                      source='SENSOR')
コード例 #3
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_ts_list_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'Q', source = 'EXAMPLE_SOURCE')

        ts_list = db.ts_list(source = 'EXAMPLE_SOURCE')
        self.assertEqual(['410731'], ts_list)
コード例 #4
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_ts_list_measurand_and_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410731', 'D', 'Foo', measurand = 'P', source = 'EXAMPLE_SOURCE')

        ts_list = db.ts_list(source = 'EXAMPLE_SOURCE', measurand = 'P')
        self.assertEqual(['410731'], ts_list)
コード例 #5
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_source_list_sorted(self):
        """
            Test that the list of source short IDs is sorted.
        """
        db = PhilDB(self.test_tsdb)
        db.add_source('EXAMPLE_SOURCE', 'Example source.')

        ts_list = db.list_sources()
        self.assertEqual(['DATA_SOURCE', 'EXAMPLE_SOURCE'], ts_list)
コード例 #6
0
    def test_source_list_sorted(self):
        """
            Test that the list of source short IDs is sorted.
        """
        db = PhilDB(self.test_tsdb)
        db.add_source('EXAMPLE_SOURCE', 'Example source.')

        ts_list = db.list_sources()
        self.assertEqual(['DATA_SOURCE', 'EXAMPLE_SOURCE'], ts_list)
コード例 #7
0
    def test_ts_list_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='Q',
                                   source='EXAMPLE_SOURCE')

        ts_list = db.ts_list(source='EXAMPLE_SOURCE')
        self.assertEqual(['410731'], ts_list)
コード例 #8
0
    def test_ts_list_measurand_and_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_timeseries('410731')
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_measurand('P', 'PRECIPITATION', 'Precipitation')
        db.add_timeseries_instance('410731',
                                   'D',
                                   'Foo',
                                   measurand='P',
                                   source='EXAMPLE_SOURCE')

        ts_list = db.ts_list(source='EXAMPLE_SOURCE', measurand='P')
        self.assertEqual(['410731'], ts_list)
コード例 #9
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_add_duplicates(self):
        db = PhilDB(self.test_tsdb)
        with self.assertRaises(DuplicateError) as context:
            db.add_source('DATA_SOURCE', 'Duplicate source')

        with self.assertRaises(DuplicateError) as context:
            db.add_measurand('Q', 'STREAMFLOW', 'Duplicate measurand')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries('410730')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries_instance('410730', 'D', '', source='DATA_SOURCE', measurand='Q')
コード例 #10
0
    def test_add_duplicates(self):
        db = PhilDB(self.test_tsdb)
        with self.assertRaises(DuplicateError) as context:
            db.add_source('DATA_SOURCE', 'Duplicate source')

        with self.assertRaises(DuplicateError) as context:
            db.add_measurand('Q', 'STREAMFLOW', 'Duplicate measurand')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries('410730')

        with self.assertRaises(DuplicateError) as context:
            db.add_timeseries_instance('410730',
                                       'D',
                                       '',
                                       source='DATA_SOURCE',
                                       measurand='Q')
コード例 #11
0
ファイル: test_database.py プロジェクト: cc272309126/phildb
    def test_add_source(self):
        db = PhilDB(self.test_tsdb)
        db.add_source('EXAMPLE_SOURCE', 'Example source, i.e. a dataset')
        db.add_timeseries_instance('410730', 'D', 'Foo', measurand = 'Q', source = 'EXAMPLE_SOURCE')

        Session.configure(bind=db._PhilDB__engine)
        session = Session()

        timeseries = db._PhilDB__get_record_by_id('410730', session)
        measurand = db._PhilDB__get_attribute('measurand', 'Q', session)
        source = db._PhilDB__get_attribute('source', 'EXAMPLE_SOURCE', session)

        query = session.query(TimeseriesInstance). \
                filter_by(measurand = measurand, source=source, timeseries=timeseries)

        record = query.one()
        self.assertEqual(record.timeseries.primary_id, '410730')
        self.assertEqual(record.measurand.short_id, 'Q')
        self.assertEqual(record.source.short_id, 'EXAMPLE_SOURCE')
コード例 #12
0
def write_phildb(file_list, results_file, first_run = False):
    if first_run:
        create('hrs_phildb')

    db = PhilDB('hrs_phildb')

    if first_run:
        db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
        db.add_source('BOM_HRS', 'Bureau of Meteorology; Hydrological Reference Stations dataset.')

    write_times = []
    for filename in file_list:
        print("Processing file: ", filename, '...')
        station_id = os.path.basename(filename).split('_')[0]
        print("Using station ID: ", station_id, '...')
        streamflow = pd.read_csv(filename, parse_dates=True, index_col=0, header = None)
        if first_run:
            db.add_timeseries(station_id)
            db.add_timeseries_instance(station_id, freq, '', measurand = 'Q', source = 'BOM_HRS')
        start = time.time()
        db.write(station_id, freq, streamflow, measurand = 'Q', source = 'BOM_HRS')
        write_times.append(time.time() - start)

    np.savetxt(results_file, np.array(write_times))
コード例 #13
0
ファイル: gen_db.py プロジェクト: cc272309126/phildb
from phildb.database import PhilDB
from phildb.create import create

test_tsdb_path = os.path.join(os.path.dirname(__file__), 'test_tsdb')

try:
    shutil.rmtree(test_tsdb_path)
except OSError as e:
    if e.errno != 2:  # Code 2: No such file or directory.
        raise

create(test_tsdb_path)
db = PhilDB(test_tsdb_path)

db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
db.add_source('DATA_SOURCE', '')

db.add_timeseries('410730')
db.add_timeseries_instance('410730',
                           'D',
                           '',
                           measurand='Q',
                           source='DATA_SOURCE')
db.write('410730',
         'D',
         pd.Series(index=[
             datetime.date(2014, 1, 1),
             datetime.date(2014, 1, 2),
             datetime.date(2014, 1, 3)
         ],
                   data=[1, 2, 3]),
コード例 #14
0
ファイル: load_bom_data.py プロジェクト: cc272309126/phildb
            data.append(ob[measurand])
    dates.reverse()
    data.reverse()

    station_id = station_json['observations']['header'][0]['ID']

    return station_id, pd.Series(data, dates)

measurand = 'air_temp'
source = 'BOM_OBS'
freq = '30min'

station_id, data = parse(json.load(open(sys.argv[2])), measurand)

db.add_measurand(measurand, measurand, 'Air Temperature')
db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations')

db.add_timeseries(station_id)
db.add_timeseries_instance(station_id, freq, 'None', measurand = measurand, source = source)
db.write(station_id, freq, data, measurand = measurand, source = source)

for i in range(3, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')

    try:
        x = parse(json.load(open(sys.argv[i])), measurand)
        db.write(station_id, freq, x, measurand = measurand, source = source)
    except ValueError as e:
        print('Could not parse: {0}'.format(sys.argv[i]))

コード例 #15
0
from datetime import date

from phildb.create import create
from phildb.database import PhilDB

create('pypi_downloads')

from count import write_downloads

db = PhilDB('pypi_downloads')
db.add_source('pypi', 'pypi.python.org')
db.add_measurand('last_day', 'last_day', 'Downloads in the last day')
db.add_measurand('last_week', 'last_week',  'Downloads in the last week')
db.add_measurand('last_month', 'last_month',  'Downloads in the last month')
db.add_measurand('total', 'total',  'Total downloads')

# Write some download information I had manually collected over the last few days
write_downloads(
    {
        'info': {
            'name': 'PhilDB',
            'downloads': {'last_day': 6, 'last_month': 572, 'last_week': 74}
        }
    }, date(2015, 11, 12)
)
write_downloads(
    {
        'info': {
            'name': 'PhilDB',
            'downloads': {'last_day': 20, 'last_month': 596, 'last_week': 92}
        }
コード例 #16
0
ファイル: gen_db.py プロジェクト: amacd31/phildb
from phildb.database import PhilDB
from phildb.create import create

test_tsdb_path = os.path.join(os.path.dirname(__file__), 'test_tsdb')

try:
    shutil.rmtree(test_tsdb_path)
except OSError as e:
    if e.errno != 2: # Code 2: No such file or directory.
        raise

create(test_tsdb_path)
db = PhilDB(test_tsdb_path)

db.add_measurand('Q', 'STREAMFLOW', 'Streamflow')
db.add_source('DATA_SOURCE', '')

db.add_timeseries('410730')
db.add_timeseries_instance('410730', 'D', '', measurand = 'Q', source = 'DATA_SOURCE')
db.write('410730', 'D', pd.Series( index = [datetime.date(2014, 1, 1),
            datetime.date(2014, 1, 2),
            datetime.date(2014, 1, 3)],
            data = [1,2,3]),
            source = 'DATA_SOURCE',
            measurand = 'Q'
        )

db.add_timeseries('123456')
db.add_timeseries_instance('123456', 'D', '', measurand = 'Q', source = 'DATA_SOURCE')
db.write('123456', 'D', pd.Series(index = [datetime.date(2014, 1, 1),
            datetime.date(2014, 1, 2),
コード例 #17
0
ファイル: load_bom_data.py プロジェクト: cc272309126/phildb
    dates.reverse()
    data.reverse()

    station_id = station_json['observations']['header'][0]['ID']

    return station_id, pd.Series(data, dates)


measurand = 'air_temp'
source = 'BOM_OBS'
freq = '30min'

station_id, data = parse(json.load(open(sys.argv[2])), measurand)

db.add_measurand(measurand, measurand, 'Air Temperature')
db.add_source('BOM_OBS', 'Australian Bureau of Meteorology Observations')

db.add_timeseries(station_id)
db.add_timeseries_instance(station_id,
                           freq,
                           'None',
                           measurand=measurand,
                           source=source)
db.write(station_id, freq, data, measurand=measurand, source=source)

for i in range(3, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')

    try:
        x = parse(json.load(open(sys.argv[i])), measurand)
        db.write(station_id, freq, x, measurand=measurand, source=source)
コード例 #18
0
import os
import sys
import datetime
import pandas as pd
from phildb.database import PhilDB

print("Writing to PhilDB({0})".format(sys.argv[1]))
db = PhilDB(sys.argv[1])
db.add_measurand('maxT', 'MAXIMUM_TEMPERATURE', 'Maximum Temperature')
db.add_measurand('minT', 'MINIMUM_TEMPERATURE', 'Minimum Temperature')
db.add_source('BOM_ACORN_SAT', 'Bureau of Meteorology; Hydrological Reference Stations dataset.')

freq = 'D'

for i in range(2, len(sys.argv)):
    print("Processing file: ", sys.argv[i], '...')
    station_id = "{0:06d}".format(int(os.path.basename(sys.argv[i])))
    print("Using station ID: ", station_id, '...')

    db.add_timeseries(station_id)
    for variable in ['minT', 'maxT']:
        input_file = 'data/acorn.sat.{0}.{1}.daily.txt'.format(variable, station_id)
        df = pd.read_csv(input_file, parse_dates=[0], index_col=0, header=None, skiprows=1, sep=r"\s+", na_values='99999.9', names=['Date',variable])
        db.add_timeseries_instance(station_id, freq, 'ACORN-SAT', measurand = variable, source = 'BOM_ACORN_SAT')
        db.write(station_id, freq, df[variable], measurand = variable, source = 'BOM_ACORN_SAT')