Ejemplo n.º 1
0
def correlate(chan_id, weather_variable, subplot_index, annotate_y):
    # 25 = lighting circuit # (R^2 = 0.443)
    # 8 = kitchen lights (R^2 = 0.194)
    # 2 = boiler (versus radiation R^2 = 0.052, 
    #             versus mean_temp R^2 = 0.298,
    #             versus max_temp  R^2 = 0.432,
    #             versus min_temp  R^2 = 0.212)
    # 3 = solar (R^2 = 0.798)
    # 12 = fridge vs min_temp R^2 = 0.255 (with on_power_threshold = 20)
    
    print("Opening channel data...")
    channel = Channel(DATA_DIR, chan_id)

    print("Calculating...")
    channel.on_power_threshold = 20
    hours_on = channel.usage_per_period('D', tz_convert='UTC').hours_on
    hours_on = hours_on[hours_on > ON_DURATION_THRESHOLD]
    hours_on.description = 'hours on'
    print("Got {} days of data from usage_per_period.".format(hours_on.size))

    print("Plotting...")

    x_aligned, y_aligned = pda.stats.align(weather_variable, hours_on)
    print(x_aligned.description)
    slope, intercept, r_value, p_value, std_err = linregress(x_aligned.values,
                                                             y_aligned.values)
    ax = fig.add_subplot(2,2,subplot_index)
    ax = spfl.format_axes(ax)
    ax = pda.stats.plot_regression_line(ax, x_aligned, y_aligned, slope,
                                        intercept, r_value, 
                                        annotate_y=annotate_y)
    print("R^2={:.3f}".format(r_value**2))
    ax.set_title('Correlation between ' + channel.get_long_name() + ' and ' + 
                 metoffice.get_long_name(weather_variable.name))
Ejemplo n.º 2
0
def test(data_input='random'):
    # First, we will specify the prior.  We will then generate some fake data
    # from the prior specification.  We will then perform inference. Then
    # we'll plot some things.

    def hazard_func(r):
        return constant_hazard(r, _lambda=200)

    if data_input == 'random':
        # generate test data
        N = 100  # how many data points to generate?
        x, changepoints = generate_test_data(N, hazard_func)
    elif data_input == 'ones':
        x = np.ones(N)
        changepoints = []
    elif data_input == 'signature':
        from pda.channel import Channel
        from os import path
        DATA_DIR = '/data/mine/domesticPowerData/BellendenRd/wattsUp'
        #SIG_DATA_FILENAME = 'breadmaker1.csv'
        SIG_DATA_FILENAME = 'washingmachine1.csv'
        chan = Channel()
        chan.load_wattsup(path.join(DATA_DIR, SIG_DATA_FILENAME))
        x = chan.series.values[142:1647]
        N = x.size

    # plot
    fig = plt.figure()
    ax = fig.add_subplot(2, 1, 1)
    ax.plot(x)
    ylim = ax.get_ylim()
    for cp in changepoints:
        ax.plot([cp, cp], ylim, color='k')

    # do inference
    beliefs, maxes = inference(x, hazard_func)

    # plot beliefs
    beliefs = beliefs.astype(np.float32)
    # print(beliefs)
    ax2 = fig.add_subplot(2, 1, 2, sharex=ax)
    ax2.imshow(-np.log(beliefs),
               interpolation='none',
               aspect='auto',
               origin='lower',
               cmap=plt.cm.Blues)
    ax2.plot(maxes, color='r')
    ax2.set_xlim([0, N])
    ax2.set_ylim([0, ax2.get_ylim()[1]])
    plt.draw()
    return beliefs, maxes
Ejemplo n.º 3
0
def test(data_input='random'):
    # First, we will specify the prior.  We will then generate some fake data
    # from the prior specification.  We will then perform inference. Then
    # we'll plot some things.

    hazard_func = lambda r: constant_hazard(r, _lambda=200)

    if data_input == 'random':
        # generate test data
        N = 100 # how many data points to generate?
        x, changepoints = generate_test_data(N, hazard_func)
    elif data_input == 'ones':
        x = np.ones(N)
        changepoints = []
    elif data_input == 'signature':
        from pda.channel import Channel
        from os import path
        DATA_DIR = '/data/mine/domesticPowerData/BellendenRd/wattsUp'
        #SIG_DATA_FILENAME = 'breadmaker1.csv'
        SIG_DATA_FILENAME = 'washingmachine1.csv'
        chan = Channel()
        chan.load_wattsup(path.join(DATA_DIR, SIG_DATA_FILENAME))
        x = chan.series.values[142:1647]
        N = x.size

    # plot
    fig = plt.figure()
    ax = fig.add_subplot(2,1,1)
    ax.plot(x)
    ylim = ax.get_ylim()
    for cp in changepoints:
        ax.plot([cp, cp], ylim, color='k')

    # do inference
    beliefs, maxes = inference(x, hazard_func)

    # plot beliefs
    beliefs = beliefs.astype(np.float32)
    #print(beliefs)
    ax2 = fig.add_subplot(2,1,2, sharex=ax)
    ax2.imshow(-np.log(beliefs), interpolation='none', aspect='auto',
               origin='lower', cmap=plt.cm.Blues)
    ax2.plot(maxes, color='r')
    ax2.set_xlim([0, N])
    ax2.set_ylim([0, ax2.get_ylim()[1]])
    plt.draw()
    return beliefs, maxes
Ejemplo n.º 4
0
def load_dataset(data_dir=DD,
                 ignore_chans=None,
                 only_load_chans=None,
                 start_date=None,
                 end_date=None):
    """Loads an entire dataset directory.

    Args:
        data_dir (str)
        ignore_chans (list of ints or label strings): optional.  
            Don't load these channels.
        only_load_chans (list of ints or label strings): optional.

    Returns:
        list of Channels
    """

    if ignore_chans is not None:
        assert (isinstance(ignore_chans, list))

    channels = []
    labels = load_labels(data_dir)
    print("Found", len(labels), "entries in labels.dat")
    for chan, label in labels.iteritems():
        if ignore_chans is not None:
            if chan in ignore_chans or label in ignore_chans:
                print("Ignoring chan", chan, label)
                continue

        if only_load_chans is not None:
            if chan not in only_load_chans and label not in only_load_chans:
                print("Ignoring chan", chan, label)
                continue

        print("Attempting to load chan", chan, label, "...", end=" ")
        sys.stdout.flush()
        try:
            c = Channel(data_dir,
                        chan,
                        start_date=start_date,
                        end_date=end_date)
        except IOError:
            print("FAILED!")
        else:
            channels.append(c)
            print("success.")

    return channels
Ejemplo n.º 5
0
from __future__ import print_function, division
from pda.channel import Channel
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import numpy as np
import os, datetime
import setupPlottingForLaTeX as spfl

NORMALISED_BAR_COLOR = 'gray'
UNNORMALISED_LINE_COLOR = 'k'

DATA_DIR = '/data/mine/vadeec/merged/house1'
FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing'
                                 '/papers/tetc2013/figures/')
LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'power_histograms.pdf')
voltage = Channel()
voltage.load_high_freq_mains(os.path.join(DATA_DIR, 'mains.dat'), 'volts')

CHAN_IDS = [24,5,12,22,7,9,8,11,42,14,16,4]
spfl.setup(columns=2)
TITLE_Y = 0.7
MINIMUM_BIN_COUNT = 100

chans = []
normalised = []

for chan_id in CHAN_IDS:
    # Get channel data
    print("loading channel", chan_id)
    c = Channel(DATA_DIR, chan_id)
    chans.append(c)
Ejemplo n.º 6
0
    TIMESPAN = 'W' # D (daily) or W (weekly)
    CHAN_IDS = [14,22]
    spfl.setup()
    GRID = False
    TITLE_Y = 0.75
    XTICKS_ON = True
    LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH,
                                             'weekly_usage_histograms'+FIGURE_SUFFIX)
else:
    CHAN_IDS = []

CHANS = []
for chan_id in CHAN_IDS:
    # Get channel data
    print("Loading channel", chan_id)
    c = Channel(DATA_DIR, chan_id)
    c = c.crop(START_DATE, END_DATE)
    CHANS.append(c)

if FIGURE_PRESET == 'boiler seasons':
    BIN_SIZE = 'T' # D (daily) or H (hourly) or T (minutely)
    TIMESPAN = 'D' # D (daily) or W (weekly)
    spfl.setup()
    GRID = False
    TITLE_Y = 0.7
    XTICKS_ON = True
    LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH,
                                             'seasonal_variation'+FIGURE_SUFFIX)
    print("Loading winter boiler data...")
    winter_boiler = Channel(DATA_DIR, 2)
    winter_boiler = winter_boiler.crop(datetime.datetime(year=2013, month=2, day=1),
Ejemplo n.º 7
0
def init_aggregate_and_appliance_dataset_figure(
        start_date,
        end_date,
        n_subplots=2,
        aggregate_type='one second',
        plot_both_aggregate_signals=False,
        data_dir=DD,
        plot_appliance_ground_truth=True,
        ignore_chans=None,
        **kwargs):
    """Initialise a basic figure with multiple subplots.  Plot aggregate
    data.  Optionally plot appliance ground truth dataset.

    Args:
        start_date, end_date (str): Required.  e.g. '2013/6/4 18:00'
        n_subplots (int): Required.  Must be >= 1.  Includes aggregate and 
            appliance ground truth plots.
        aggregate_type (str): 'one second' or 'current cost'.  The flavour of 
            aggregate data to load, plot and return.
        plot_both_aggregate_signals (bool): Default==False. Plot both flavours
            of aggregate data?  Has no effect on which flavour is returned.
        data_dir (str): Default=DD
        plot_appliance_ground_truth (bool): Default==True
        ignore_chans (list of strings or ints): Defaults to a standard list of 
            channels to ignore.
        **kwargs: passed to ax.plot
    Returns:
        subplots (list of axes), 
        chan (pda.Channel)

    """
    if plot_appliance_ground_truth:
        assert (n_subplots >= 2)
    else:
        assert (n_subplots >= 1)

    # Initialise figure and subplots
    fig = plt.figure()
    fig.canvas.set_window_title(start_date + ' - ' + end_date)
    subplots = [fig.add_subplot(n_subplots, 1, 1)]
    for i in range(2, n_subplots + 1):
        subplots.append(fig.add_subplot(n_subplots, 1, i, sharex=subplots[0]))

    # Load and plot aggregate channel(s)
    if aggregate_type == 'one second' or plot_both_aggregate_signals:
        print('Loading high freq mains...')
        one_sec = Channel()
        one_sec.load_normalised(data_dir,
                                high_freq_param='active',
                                start_date=start_date,
                                end_date=end_date)
        one_sec.plot(subplots[0], color='k', **kwargs)

    if aggregate_type == 'current cost' or plot_both_aggregate_signals:
        print('Loading Current Cost aggregate...')
        cc = Channel(data_dir,
                     'aggregate',
                     start_date=start_date,
                     end_date=end_date)  # cc = Current cost
        cc.plot(subplots[0], color='r', **kwargs)

    subplots[0].set_title('Aggregate. 1s active power, normalised.')
    subplots[0].legend()
    chan = one_sec if aggregate_type == 'one second' else cc

    if plot_appliance_ground_truth:
        print('Loading appliance ground truth dataset...')
        if ignore_chans is None:
            ignore_chans = [
                'aggregate', 'amp_livingroom', 'adsl_router',
                'livingroom_s_lamp', 'gigE_&_USBhub', 'livingroom_s_lamp2',
                'iPad_charger', 'subwoofer_livingroom', 'livingroom_lamp_tv',
                'DAB_radio_livingroom', 'kitchen_lamp2',
                'kitchen_phone&stereo', 'utilityrm_lamp', 'samsung_charger',
                'kitchen_radio', 'bedroom_chargers', 'data_logger_pc',
                'childs_table_lamp', 'baby_monitor_tx', 'battery_charger',
                'office_lamp1', 'office_lamp2', 'office_lamp3', 'gigE_switch'
            ]
        ds = load_dataset(data_dir,
                          ignore_chans=ignore_chans,
                          start_date=start_date,
                          end_date=end_date)
        print("Removing inactive channels...")
        ds = remove_inactive_channels(ds)
        print("Plotting dataset ground truth...")
        plot_each_channel_activity(subplots[1], ds)

    return subplots, chan
Ejemplo n.º 8
0
from __future__ import print_function, division
from pda.channel import Channel
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import numpy as np
import os, datetime
import setupPlottingForLaTeX as spfl

NORMALISED_BAR_COLOR = 'gray'
UNNORMALISED_LINE_COLOR = 'k'

DATA_DIR = '/data/mine/vadeec/merged/house1'
FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing'
                                 '/papers/tetc2013/figures/')
LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'power_histograms.pdf')
voltage = Channel()
voltage.load_high_freq_mains(os.path.join(DATA_DIR, 'mains.dat'), 'volts')

CHAN_IDS = [24, 5, 12, 22, 7, 9, 8, 11, 42, 14, 16, 4]
spfl.setup(columns=2)
TITLE_Y = 0.7
MINIMUM_BIN_COUNT = 100

chans = []
normalised = []

for chan_id in CHAN_IDS:
    # Get channel data
    print("loading channel", chan_id)
    c = Channel(DATA_DIR, chan_id)
    chans.append(c)
Ejemplo n.º 9
0
DATA_DIR = '/data/mine/vadeec/merged/house1'
FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing'
                                 '/papers/tetc2013/figures/')
LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'on_durations.pdf')

CHAN_IDS = [6,5,12,22,39,9,8,11,42,13,16,4]
spfl.setup(columns=2)
TITLE_Y = 0.7

chans = []

for chan_id in CHAN_IDS:
    # Get channel data
    print("loading channel", chan_id)
    c = Channel(DATA_DIR, chan_id)
    chans.append(c)

#-------------------------------------------

fig = plt.figure()

n_subplots = len(chans)
for c in chans:
    subplot_index = chans.index(c) + 1
    ignore_n_off_samples = {'breadmaker': 600, 
                            'washing_machine': 10,
                            'dishwasher': 10}
    on_durations = c.durations('on', 
                               ignore_n_off_samples=ignore_n_off_samples.get(c.name))
Ejemplo n.º 10
0
#!/bin/python
from __future__ import print_function, division
from pda.channel import Channel
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib import animation
import numpy as np
import datetime

BIN_SIZE = 'H'  # H (hourly) or T (minutely)
c = Channel('/data/mine/vadeec/jack-merged', 2)
START_PERIOD = c.series.index[0].to_period('W')

width = 1440 if BIN_SIZE=='T' else 24

fig = plt.figure()
ax = fig.add_subplot(111)
COLOR = 'b'
x = np.arange(width)
y = np.zeros(width)
rects = ax.bar(x, y, facecolor=COLOR, edgecolor=COLOR)
ax.set_xlim([0, width])
ax.set_ylim([0, 10])
ax.xaxis.set_major_locator(ticker.MultipleLocator(width / 12))

def format_time(x, pos=None):
    if BIN_SIZE == 'T': #minutely
        hours = x // 60
    else:
        hours = x
    return '{:d}'.format(int(hours))
Ejemplo n.º 11
0
DATA_DIR = '/data/mine/vadeec/merged/house1'
FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing'
                                 '/papers/tetc2013/figures/')
LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'on_durations.pdf')

CHAN_IDS = [6, 5, 12, 22, 39, 9, 8, 11, 42, 13, 16, 4]
spfl.setup(columns=2)
TITLE_Y = 0.7

chans = []

for chan_id in CHAN_IDS:
    # Get channel data
    print("loading channel", chan_id)
    c = Channel(DATA_DIR, chan_id)
    chans.append(c)

#-------------------------------------------

fig = plt.figure()

n_subplots = len(chans)
for c in chans:
    subplot_index = chans.index(c) + 1
    ignore_n_off_samples = {
        'breadmaker': 600,
        'washing_machine': 10,
        'dishwasher': 10
    }
    on_durations = c.durations('on',
Ejemplo n.º 12
0
def init_aggregate_and_appliance_dataset_figure(
        start_date, end_date, n_subplots=2, 
        aggregate_type='one second', plot_both_aggregate_signals=False, 
        data_dir=DD, plot_appliance_ground_truth=True, ignore_chans=None,
        **kwargs):
    """Initialise a basic figure with multiple subplots.  Plot aggregate
    data.  Optionally plot appliance ground truth dataset.

    Args:
        start_date, end_date (str): Required.  e.g. '2013/6/4 18:00'
        n_subplots (int): Required.  Must be >= 1.  Includes aggregate and 
            appliance ground truth plots.
        aggregate_type (str): 'one second' or 'current cost'.  The flavour of 
            aggregate data to load, plot and return.
        plot_both_aggregate_signals (bool): Default==False. Plot both flavours
            of aggregate data?  Has no effect on which flavour is returned.
        data_dir (str): Default=DD
        plot_appliance_ground_truth (bool): Default==True
        ignore_chans (list of strings or ints): Defaults to a standard list of 
            channels to ignore.
        **kwargs: passed to ax.plot
    Returns:
        subplots (list of axes), 
        chan (pda.Channel)

    """
    if plot_appliance_ground_truth:
        assert(n_subplots >= 2)
    else:
        assert(n_subplots >= 1)

    # Initialise figure and subplots
    fig = plt.figure()
    fig.canvas.set_window_title(start_date + ' - ' + end_date)
    subplots = [fig.add_subplot(n_subplots, 1, 1)]
    for i in range(2, n_subplots+1):
        subplots.append(fig.add_subplot(n_subplots, 1, i, sharex=subplots[0]))

    # Load and plot aggregate channel(s)
    if aggregate_type=='one second' or plot_both_aggregate_signals:
        print('Loading high freq mains...')
        one_sec = Channel()
        one_sec.load_normalised(data_dir, high_freq_param='active', 
                                start_date=start_date, end_date=end_date)
        one_sec.plot(subplots[0], color='k', **kwargs)

    if aggregate_type=='current cost' or plot_both_aggregate_signals:
        print('Loading Current Cost aggregate...')
        cc = Channel(data_dir, 'aggregate', 
                     start_date=start_date, end_date=end_date) # cc = Current cost
        cc.plot(subplots[0], color='r', **kwargs)

    subplots[0].set_title('Aggregate. 1s active power, normalised.')
    subplots[0].legend()
    chan = one_sec if aggregate_type=='one second' else cc

    if plot_appliance_ground_truth:
        print('Loading appliance ground truth dataset...')
        if ignore_chans is None:
            ignore_chans=['aggregate', 'amp_livingroom', 'adsl_router',
                          'livingroom_s_lamp', 'gigE_&_USBhub',
                          'livingroom_s_lamp2', 'iPad_charger', 
                          'subwoofer_livingroom', 'livingroom_lamp_tv',
                          'DAB_radio_livingroom', 'kitchen_lamp2',
                          'kitchen_phone&stereo', 'utilityrm_lamp', 
                          'samsung_charger', 'kitchen_radio', 
                          'bedroom_chargers', 'data_logger_pc', 
                          'childs_table_lamp', 'baby_monitor_tx',
                          'battery_charger', 'office_lamp1', 'office_lamp2',
                          'office_lamp3', 'gigE_switch']
        ds = load_dataset(data_dir, ignore_chans=ignore_chans, 
                          start_date=start_date, end_date=end_date)
        print("Removing inactive channels...")
        ds = remove_inactive_channels(ds)
        print("Plotting dataset ground truth...")
        plot_each_channel_activity(subplots[1], ds)

    return subplots, chan
Ejemplo n.º 13
0
    TIMESPAN = 'W'  # D (daily) or W (weekly)
    CHAN_IDS = [14, 22]
    spfl.setup()
    GRID = False
    TITLE_Y = 0.75
    XTICKS_ON = True
    LATEX_PDF_OUTPUT_FILENAME = os.path.join(
        FIGURE_PATH, 'weekly_usage_histograms' + FIGURE_SUFFIX)
else:
    CHAN_IDS = []

CHANS = []
for chan_id in CHAN_IDS:
    # Get channel data
    print("Loading channel", chan_id)
    c = Channel(DATA_DIR, chan_id)
    c = c.crop(START_DATE, END_DATE)
    CHANS.append(c)

if FIGURE_PRESET == 'boiler seasons':
    BIN_SIZE = 'T'  # D (daily) or H (hourly) or T (minutely)
    TIMESPAN = 'D'  # D (daily) or W (weekly)
    spfl.setup()
    GRID = False
    TITLE_Y = 0.7
    XTICKS_ON = True
    LATEX_PDF_OUTPUT_FILENAME = os.path.join(
        FIGURE_PATH, 'seasonal_variation' + FIGURE_SUFFIX)
    print("Loading winter boiler data...")
    winter_boiler = Channel(DATA_DIR, 2)
    winter_boiler = winter_boiler.crop(