Ejemplo n.º 1
0
    def __init__(self,
                 obs_df,
                 ref_name,
                 var_name,
                 period,
                 how=np.mean,
                 annual_rule='A'):
        self.period = period
        self.var_name = var_name
        self.ref_name = ref_name
        self.aggr_how = how
        self.annual_rule = annual_rule

        obs_df = obs_df.loc[(obs_df.index >= period[0])
                            & (obs_df.index <= period[-1])]
        self.obs = New()
        self.obs.data = obs_df
        self.obs.freq = infer_freq(obs_df)
        if self.obs.freq != 'y':
            self.obs.monthly = self.obs.data.resample(rule='m',
                                                      how=self.aggr_how)
        self.obs.annual = self.obs.data.resample(rule='A', how=self.aggr_how)
        #        self.obs.freq = infer_freq(obs_df)
        self.models = ObjectDict()
        self._cmap = plt.get_cmap('gist_rainbow')
        self.selection = Selector([])
Ejemplo n.º 2
0
def test_SplitFileWriterNode():
    from awrams.utils import extents
    from awrams.utils import datetools as dt

    import awrams.models.awral.description
    awrams.models.awral.description.CLIMATE_DATA = os.path.join(
        os.path.dirname(__file__), '..', '..', 'test_data', 'simulation')

    from awrams.utils.nodegraph import nodes
    from awrams.simulation.ondemand import OnDemandSimulator
    from awrams.models import awral

    input_map = awral.get_default_mapping()

    from awrams.utils.nodegraph import nodes
    from awrams.utils.metatypes import ObjectDict

    # output_path = './'
    mapping = {}
    mapping['qtot'] = nodes.write_to_annual_ncfile('./', 'qtot')

    output_map = ObjectDict(
        mapping=ObjectDict(mapping))  #,output_path=output_path)

    runner = OnDemandSimulator(awral,
                               input_map.mapping,
                               omapping=output_map.mapping)

    period = dt.dates('2010-2011')
    extent = extents.from_cell_offset(200, 200)
    r = runner.run(period, extent)
Ejemplo n.º 3
0
def dimensions_from_georef(gr):
    idim = NCDimension(dtype=np.dtype('int32'),
                       size=None,
                       data=None,
                       meta=ObjectDict(units="days since 1900-01-01",
                                       calendar="gregorian",
                                       name="time",
                                       long_name="time",
                                       standard_name='time'))

    ydim = NCDimension(
        size=gr['nlats'],
        dtype=np.dtype('float64'),
        meta=ObjectDict(standard_name='latitude',
                        long_name='latitude',
                        name='latitude',
                        units='degrees_north'),
        data=gr['lat_origin'] -
        gr['cellsize'] * np.arange(gr['nlats'], dtype=np.float64))

    xdim = NCDimension(
        size=gr['nlons'],
        dtype=np.dtype('float64'),
        meta=ObjectDict(standard_name='longitude',
                        long_name='longitude',
                        name='longitude',
                        units='degrees_east'),
        data=gr['lon_origin'] +
        gr['cellsize'] * np.arange(gr['nlons'], dtype=np.float64))

    return (idim, ydim, xdim)
Ejemplo n.º 4
0
def test_multiple_catchment():
    import os
    import pandas as pd
    import pickle
    import sys

    import awrams.calibration.calibration as c
    from awrams.models import awral
    from awrams.utils.metatypes import ObjectDict

    path = os.path.join(os.path.dirname(__file__), '..', '..', 'test_data',
                        'calibration')
    awral.CLIMATE_DATA = path

    cal = c.CalibrationInstance(awral)
    print(sys.argv)
    if sys.argv[0].endswith('nosetests') or sys.argv[1].endswith('nosetests'):
        cal.node_settings.num_workers = 1
        cal.num_nodes = 1
        cal.termp.max_iter = 40

    cal.node_settings.catchment_ids = ['105001', '145003'
                                       ]  #['4508', '105001'] #, '145003']
    cal.node_settings.catchment_extents = pickle.load(
        open(os.path.join(path, 'cal_catchment_extents.pkl'), 'rb'))

    # cal.node_settings.run_period = pd.date_range("01/01/1950", "31/12/2011")
    # cal.node_settings.eval_period = pd.date_range("01/01/1981", "31/12/2011")
    cal.node_settings.run_period = pd.date_range("1 jan 2005", "31 dec 2010")
    cal.node_settings.eval_period = pd.date_range("1 jan 2005", "31 dec 2010")

    cal.node_settings.output_variables = ['qtot', 'etot', 'w0']
    awral.set_outputs({
        'OUTPUTS_CELL': ['qtot'],
        'OUTPUTS_HRU': [],
        'OUTPUTS_AVG': ['etot', 'w0']
    })

    cal.node_settings.observations.qtot = ObjectDict()
    cal.node_settings.observations.etot = ObjectDict()
    cal.node_settings.observations.w0 = ObjectDict()
    cal.node_settings.observations.qtot.source_type = 'csv'
    cal.node_settings.observations.etot.source_type = 'csv'
    cal.node_settings.observations.w0.source_type = 'csv'
    # HostPath is a portable paths API; it allows you specify common bases on multiple systems that are resovled at runtime
    cal.node_settings.observations.qtot.filename = os.path.join(
        path, 'q_obs.csv')
    cal.node_settings.observations.etot.filename = os.path.join(
        path, 'cmrset_obs.csv')
    cal.node_settings.observations.w0.filename = os.path.join(
        path, 'sm_amsre_obs.csv')
    # View the localised hostpath...
    # cal.node_settings.objective.localf.filename = os.path.join(os.path.dirname(__file__),'objectives','multivar_objectives.py')
    cal.node_settings.objective.localf.classname = 'TestLocalMulti'
    # cal.node_settings.objective.globalf.filename = os.path.join(os.path.dirname(__file__),'objectives','multivar_objectives.py')
    cal.node_settings.objective.globalf.classname = 'GlobalMultiEval'

    cal.setup_local()

    cal.run_local()
Ejemplo n.º 5
0
 def get_best(self):
     index = np.where(self.fh['global_score'][...] ==
                      self.fh['global_score'][...].min())[0][0]
     score = self.fh['global_score'][index]
     parameters = dict(
         zip(self.parameters, self.fh['parameter_values'][index]))
     return ObjectDict(index=index, score=score, parameters=parameters)
Ejemplo n.º 6
0
    def __init__(self, model):
        ns = default_node_settings(model)
        self.node_settings = ObjectDict(ns)

        # self.hyperp = ObjectDict(complex_sz=43,n_complexes=14,sub_sz=22,n_offspring=1,n_evol=43,min_complexes=1)
        self.hyperp = ObjectDict(complex_sz=5,
                                 n_complexes=5,
                                 sub_sz=2,
                                 n_offspring=1,
                                 n_evol=10,
                                 min_complexes=2)
        self.num_nodes = 4

        self.termp = default_term_params()
        self.params = ns['default_params']  #default_cal_params(model)
        self.server = None
Ejemplo n.º 7
0
    def load(self, csv_path, id_list=None, convert_units=1.0):
        """
        load observed data from csv

        :param csv_path: path to csvs containing observations
                         expects to find csv files: sm_top.csv,sm_shallow.csv,sm_middle.csv,sm_deep.csv,sm_profile.csv
        :param id_list: None for comparison of all ids in csv or list of ids for subset
        :param convert_units: factor to apply to observations
        :return:
        """
        self.benchmark = ObjectDict()

        for layer in self.layers:
            df = self._load_data(os.path.join(csv_path, layer + '.csv'),
                                 id_list, convert_units)

            self.benchmark[layer] = ComparisonSet(df,
                                                  self.obs_name,
                                                  self.var_name,
                                                  self.period,
                                                  how=self.how,
                                                  annual_rule=self.annual_rule)

        self.sites = list(self._extents.keys())
        self.cfg = {s: self.sites_meta[s] for s in self.sites}

        self.add_model = self._add_model
Ejemplo n.º 8
0
    def _add_model(self,model_df,name,freq='d'):
        self.selection._add(name)
        m = ObjectDict(freq=SAMPLE_RATE[freq])
        self.models[name] = m
        m.name = name

        m.data = ObjectDict()
        m.obs  = ObjectDict()

        m.data.raw = model_df.loc[self.period]

        if self.obs.freq != 'y':
            if freq == 'd' and self.obs.freq == 'd':
                m.data.daily,m.obs.daily = self._intersect(m.data.raw, self.obs.data)
                m.data.monthly = resample_to_months_df(m.data.daily, self.aggr_how)
                m.obs.monthly  = resample_to_months_df(m.obs.daily, self.aggr_how)

            elif freq == 'm' or self.obs.freq == 'm':
                if freq == 'm':
                    _mod = m.data.raw.resample(rule='m', how=self.aggr_how)
                else: # assume must be daily
                    _mod = resample_to_months_df(m.data.raw, self.aggr_how)
                if self.obs.freq == 'm':
                    _obs = self.obs.data.resample(rule='m', how=self.aggr_how)
                else:
                    _obs = resample_to_months_df(self.obs.data, self.aggr_how)

                m.data.monthly,m.obs.monthly = self._intersect(_mod,_obs)

            else:
                raise Exception('model freq is %s' % repr(freq))

            m.data.annual = resample_to_years_df(m.data.monthly, self.aggr_how, min_months=6)
            m.obs.annual = resample_to_years_df(m.obs.monthly, self.aggr_how, min_months=6)
            m.data.annual,m.obs.annual = self._intersect(m.data.annual,m.obs.annual)

        else: #obs are annual (recharge)
            m.data.annual = m.data.raw.resample(rule='a', how=self.aggr_how)
            m.obs.annual = self.obs.data

        m.stats = ObjectDict(freq=freq)
        m.stats.daily = None
        m.stats.monthly = None
        if freq == 'd' and self.obs.freq == 'd':
            m.stats.daily = build_stats_df(m.obs.daily, m.data.daily, m.obs.daily.keys())
        if self.obs.freq != 'y':
            m.stats.monthly = build_stats_df(m.obs.monthly, m.data.monthly, m.obs.monthly.keys())
        if m.obs.annual is not None and m.data.annual is not None:
            m.stats.annual = build_stats_df(m.obs.annual, m.data.annual, m.obs.annual.keys())

        self.build_objfunc_stats(m.stats)
        self._assign_colours()
Ejemplo n.º 9
0
def get_output_nodes(template):
    from awrams.utils.nodegraph import nodes
    from awrams.utils.metatypes import ObjectDict
    from . import ffi_wrapper as fw

    outputs = dict((k,template[k]) for k in ['OUTPUTS_HRU','OUTPUTS_AVG','OUTPUTS_CELL'])

    mapping = {}
    output_vars = []
    for v in outputs['OUTPUTS_AVG'] + outputs['OUTPUTS_CELL']:
        output_vars.append(v)
    for v in outputs['OUTPUTS_HRU']:
        output_vars.extend([v+'_sr',v+'_dr'])
    for v in output_vars:
        mapping[v] = nodes.model_output(v)

    return ObjectDict(mapping=ObjectDict(mapping)) #,output_path=output_path)
Ejemplo n.º 10
0
def get_default_output_mapping(path='./'):
    from awrams.utils.nodegraph import nodes
    from awrams.utils.metatypes import ObjectDict

    #+++ not dealing with sr and dr versions of HRUS
    outputs = dict((k,_DT[k]) for k in ['OUTPUTS_HRU','OUTPUTS_AVG','OUTPUTS_CELL'])

    mapping = {}
    output_vars = []
    for v in outputs['OUTPUTS_AVG'] + outputs['OUTPUTS_CELL']:
        output_vars.append(v)
    for v in outputs['OUTPUTS_HRU']:
        output_vars.extend([v+'_sr',v+'_dr'])
    for v in output_vars:
            mapping[v] = nodes.write_to_ncfile(path,v)

    return ObjectDict(mapping=ObjectDict(mapping)) #,output_path=output_path)
Ejemplo n.º 11
0
 def __init__(self, model_version, results_name=None):
     self._variables = VariableGroup(self)
     self.extent = None
     self.period = None
     self.name = results_name
     self._path = None
     self._model_version = model_version
     self.parameters = ObjectDict()
     self.parameters.spatial = None
     self.parameters.landscape = None
Ejemplo n.º 12
0
def default_term_params():
    tp = ObjectDict()

    tp.max_shuffle = 1000  # Max shuffling loops
    tp.max_iter = 20000  # Max model evaluations
    tp.target_score = 1e-8
    tp.max_nsni = 5  # Max shuffle without improvement (as defined below)
    tp.min_imp = 0.01  # Minimum change required for 'improvement' metric

    return tp
Ejemplo n.º 13
0
def _index_results(results_folder):
    filenames = _identify_results_files(results_folder)
    expected_variables = _identify_variables(filenames)
    result = ObjectDict()

    result.metadata_from = filenames[0]
    result.name = None
    result.extent = _infer_extent(result.metadata_from, results_folder)
    result.period = _infer_time_period(result.metadata_from, results_folder)
    result.variables = expected_variables
    result.model_version = _infer_model_version(result.metadata_from,
                                                results_folder)

    return result
Ejemplo n.º 14
0
def _infer_time_period(filename, results_folder):
    filenames = sorted(
        glob(os.path.join(results_folder,
                          _variable_name(filename) + '*.nc')))

    start = start_date(managed_dataset(filenames[0], 'r'))
    end = end_date(managed_dataset(filenames[-1], 'r'))
    freq = dataset_frequency(managed_dataset(filenames[0], 'r'))

    return ObjectDict(type=freq,
                      start=start.strftime('%Y-%m-%d'),
                      end=end.strftime('%Y-%m-%d'),
                      representation='YYYY-MM-DD')
Ejemplo n.º 15
0
def default_node_settings(model):
    import imp
    import awrams.calibration.objectives.multivar_objectives as w
    from awrams.utils.nodegraph import nodes

    ns = {}

    ns['run_period'] = 'UNSPECIFIED'  # the period for which the model is actually run
    ns['eval_period'] = 'UNSPECIFIED'  # the period over which it is evaluated against observations

    from multiprocessing import cpu_count
    ns['num_workers'] = 2  #cpu_count()

    ns['inputs'] = model.get_default_mapping()
    data_path = model.CLIMATE_DATA  #'/data/cwd_awra_data/awra_inputs/climate_generated/'
    FORCING = {
        'tmin': ('temp_min', 'temp_min_day'),
        'tmax': ('temp_max', 'temp_max_day'),
        'precip': ('rain', 'rain_day'),
        'solar': ('solar', 'solar_exposure_day')
    }
    for k, v in FORCING.items():
        ns['inputs'].mapping[k + '_f'] = nodes.forcing_from_ncfiles(
            data_path + '/', v[0] + '*', v[1], cache=False)

    # Example with single catchment...
    ns['catchment_ids'] = []
    ns['catchment_extents'] = {}

    ns['logfile'] = 'calibration.h5'

    # All cal catchments
    #ns['catchment_ids'] = [cid.strip() for cid in open('./Catchment_IDs.csv').readlines()[2:]]

    from .calibrate import get_parameter_df
    ns['default_params'] = get_parameter_df(ns['inputs'].mapping)

    ns['observations'] = ObjectDict(qtot=ObjectDict())

    ns['observations'].qtot.source_type = 'csv'
    ns['observations'].qtot.filename = '/mnt/awramsi_test_data/Calibration/Catchment_Qobs.csv'

    ns['objective'] = ObjectDict({
        'localf': ObjectDict(),
        'globalf': ObjectDict()
    })

    imp.load_source('lobjf_mod', w.__file__)
    ns['objective']['localf']['filename'] = w.__file__
    ns['objective']['localf']['classname'] = 'LocalEval'
    # Any arguments required by the evaluator are stored in this dict
    ns['objective']['localf']['arguments'] = ObjectDict()
    # e.g
    # ns['objective']['localf']['arguments']['min_valid'] = 15

    ns['objective']['globalf']['filename'] = w.__file__
    ns['objective']['globalf']['classname'] = 'GlobalMultiEval'

    return ns
Ejemplo n.º 16
0
    def _build(self, names):
        from functools import partial

        def select(n):
            if not n in self._sel:
                self._sel.append(n)

        def unselect(n):
            if n in self._sel:
                self._sel.remove(n)

        for name in names:
            self._sel.append(name)
            self.__dict__[name] = ObjectDict()
            self.__dict__[name]['select'] = partial(select, name)
            self.__dict__[name]['unselect'] = partial(unselect, name)
Ejemplo n.º 17
0
def get_all_settings():
    from awrams.utils.metatypes import ObjectDict
    import importlib.machinery
    import types

    HOME = os.path.expanduser('~')

    sdict = ObjectDict()

    import glob
    local_setting_files = glob.glob(os.path.join(HOME, '.awrams/*.py'))
    for f in local_setting_files:
        submod = os.path.split(f)[-1].split('.')[0]
        modname = 'awrams.{submod}.settings'.format(**locals())
        try:
            mod = importlib.import_module(modname)
            sdict[submod] = mod
        except:
            loader = importlib.machinery.SourceFileLoader(submod, f)
            mod = types.ModuleType(loader.name)
            loader.exec_module(mod)
            sdict[submod] = mod

    return sdict
Ejemplo n.º 18
0
def dict_to_od(d):
    '''
    Simple JSON hook to make sure we get tab completeable dict objects
    '''
    return ObjectDict(d)
Ejemplo n.º 19
0
 def __init__(self, **pars):
     self.nc_par = ObjectDict(zlib=False)  #fill_value=-999., zlib=False)
     self.update(pars)
Ejemplo n.º 20
0
def get_default_mapping():
    import json
    from awrams.utils.nodegraph import graph, nodes
    from awrams.utils.metatypes import ObjectDict
    from . import transforms
    import numpy as np

    dparams = json.load(open(DEFAULT_PARAMETER_FILE,'r'))
    #dparams = dict([(k.lower(),v) for k,v in dparams.items()])
    for entry in dparams: 
        entry['MemberName'] = entry['MemberName'].lower()

    mapping = {}

#    for k,v in dparams.items():
#        mapping[k] = nodes.const(v)

    for entry in dparams:
        tmp = entry.copy()
        tmp.pop('MemberName')
        tmp.pop('Value')
        mapping[entry['MemberName']] = nodes.const(entry['Value'],**tmp)
    # Setup a new-style functional input map

    import h5py
    ds = h5py.File(SPATIAL_FILE,mode='r')
    SPATIAL_GRIDS = list(ds['parameters'])
    ds.close()

    # FORCING = {
    #     'tmin': ('tmin*','temp_min_day'),
    #     'tmax': ('tmax*','temp_max_day'),
    #     'precip': ('rr*','rain_day'),
    #     'solar': ('solar*','solar_exposure_day')
    # }

    FORCING = {
        'tmin': ('temp_min*','temp_min_day'),
        'tmax': ('temp_max*','temp_max_day'),
        'precip': ('rain*','rain_day'),
        'solar': ('solar*','solar_exposure_day')
    }
    for k,v in FORCING.items():
        mapping[k+'_f'] = nodes.forcing_from_ncfiles(CLIMATE_DATA,v[0],v[1])
        
    for grid in SPATIAL_GRIDS:
        if grid == 'height':
            mapping['height'] = nodes.hypso_from_hdf5(SPATIAL_FILE,'parameters/height')
        else:
            mapping[grid.lower()+'_grid'] = nodes.spatial_from_hdf5(SPATIAL_FILE,'parameters/%s' % grid)

    mapping.update({
        'tmin': nodes.transform(np.minimum,['tmin_f','tmax_f']),
        'tmax': nodes.transform(np.maximum,['tmin_f','tmax_f']),
        'hypsperc_f': nodes.const_from_hdf5(SPATIAL_FILE,'dimensions/hypsometric_percentile',['hypsometric_percentile']),
        'hypsperc': nodes.mul('hypsperc_f',0.01), # Model needs 0-1.0, file represents as 0-100
        'fday': transforms.fday(),
        'u2t': transforms.u2t('windspeed_grid','fday')
    })

    mapping['er_frac_ref_hrusr'] = nodes.mul('er_frac_ref_hrudr',0.5)

    mapping['k_rout'] = nodes.transform(transforms.k_rout,('k_rout_scale','k_rout_int','meanpet_grid'))
    mapping['k_gw'] = nodes.mul('k_gw_scale','k_gw_grid')

    mapping['s0max'] = nodes.mul('s0max_scale','s0fracawc_grid',100.)
    mapping['ssmax'] = nodes.mul('ssmax_scale','ssfracawc_grid',900.)
    mapping['sdmax'] = nodes.mul('ssmax_scale','sdmax_scale','ssfracawc_grid',5000.)

    mapping['k0sat'] = nodes.mul('k0sat_scale','k0sat_v5_grid')
    mapping['kssat'] = nodes.mul('kssat_scale','kssat_v5_grid')
    mapping['kdsat'] = nodes.mul('kdsat_scale','kdsat_v5_grid')

    mapping['kr_0s'] = nodes.transform(transforms.interlayer_k,('k0sat','kssat'))
    mapping['kr_sd'] = nodes.transform(transforms.interlayer_k,('kssat','kdsat'))

    mapping['prefr'] = nodes.mul('pref_gridscale','pref_grid')
    mapping['fhru_hrusr'] = nodes.sub(1.0,'f_tree_grid')
    mapping['fhru_hrudr'] = nodes.assign('f_tree_grid')
    mapping['ne'] = nodes.mul('ne_scale','ne_grid')
    mapping['slope'] = nodes.assign('slope_grid')
    mapping['hveg_hrudr'] = nodes.assign('hveg_dr_grid')
    mapping['hveg_hrusr'] = nodes.const(0.5)

    mapping['laimax_hrusr'] = nodes.assign('lai_max_grid')
    mapping['laimax_hrudr'] = nodes.assign('lai_max_grid')

    mapping['pair'] = nodes.const(97500.)

    mapping['pt'] = nodes.assign('precip_f')
    mapping['rgt'] = nodes.transform(np.maximum,['solar_f',0.1])
    mapping['tat'] = nodes.mix('tmin','tmax',0.75)
    mapping['avpt'] = nodes.transform(transforms.pe,'tmin')
    mapping['radcskyt'] = transforms.radcskyt()

    mapping['init_sr'] = nodes.const(0.0)
    mapping['init_sg'] = nodes.const(100.0)
    for hru in ('_hrusr','_hrudr'):
        mapping['init_mleaf'+hru] = nodes.div(2.0,'sla'+hru)
        for state in ["s0","ss","sd"]:
            mapping['init_'+state+hru] = nodes.mul(state+'max',0.5)

    # +++dims only required due to having to allocate shared-memory buffer before running...
    dims = ObjectDict(hypsometric_percentile=20,latitude=None,longitude=None,time=None)

    return ObjectDict(mapping=ObjectDict(mapping),dimensions=dims)
Ejemplo n.º 21
0
 def __init__(self, source, name, units):
     self.source = source
     self.name = name
     self.units = units
     self.meta = ObjectDict()
Ejemplo n.º 22
0
class ComparisonSet(object):
    def __init__(self,
                 obs_df,
                 ref_name,
                 var_name,
                 period,
                 how=np.mean,
                 annual_rule='A'):
        self.period = period
        self.var_name = var_name
        self.ref_name = ref_name
        self.aggr_how = how
        self.annual_rule = annual_rule

        obs_df = obs_df.loc[(obs_df.index >= period[0])
                            & (obs_df.index <= period[-1])]
        self.obs = New()
        self.obs.data = obs_df
        self.obs.freq = infer_freq(obs_df)
        if self.obs.freq != 'y':
            self.obs.monthly = self.obs.data.resample(rule='m',
                                                      how=self.aggr_how)
        self.obs.annual = self.obs.data.resample(rule='A', how=self.aggr_how)
        #        self.obs.freq = infer_freq(obs_df)
        self.models = ObjectDict()
        self._cmap = plt.get_cmap('gist_rainbow')
        self.selection = Selector([])

    def _assign_colours(self):
        n = len(self.models)
        n = 10 if n < 10 else n
        m_colours = np.linspace(0., 1., n)
        for i, m in enumerate(self.models.values()):
            m.colour = np.array(self._cmap(m_colours[i])) * np.array(
                (0.95, 0.75, 0.9, 1.0))

    def _intersect(self, mod, obs):
        _mod = {}
        _obs = {}

        for site in self.obs.data.columns:
            try:
                if obs[site] is None or mod[site] is None:
                    #logger.warning("no data for sitemissing site in model dataframe...skipping :%s",site)
                    continue

                valid_isect_idx = valid_only(obs[site]).index.intersection(
                    valid_only(mod[site]).index)
                _mod[site] = mod[site].loc[valid_isect_idx]
                _obs[site] = obs[site].loc[valid_isect_idx]
            except KeyError:
                pass
                #logger.warning("missing site in model dataframe...skipping :%s",site)
        return _mod, _obs

    def _add_model(self, model_df, name, freq='d'):
        self.selection._add(name)
        m = ObjectDict(freq=SAMPLE_RATE[freq])
        self.models[name] = m
        m.name = name

        m.data = ObjectDict()
        m.obs = ObjectDict()

        m.data.raw = model_df.loc[self.period]

        if self.obs.freq != 'y':
            if freq == 'd' and self.obs.freq == 'd':
                m.data.daily, m.obs.daily = self._intersect(
                    m.data.raw, self.obs.data)
                m.data.monthly = resample_to_months_df(m.data.daily,
                                                       self.aggr_how)
                m.obs.monthly = resample_to_months_df(m.obs.daily,
                                                      self.aggr_how)

            elif freq == 'm' or self.obs.freq == 'm':
                if freq == 'm':
                    _mod = m.data.raw.resample(rule='m', how=self.aggr_how)
                else:  # assume must be daily
                    _mod = resample_to_months_df(m.data.raw, self.aggr_how)
                if self.obs.freq == 'm':
                    _obs = self.obs.data.resample(rule='m', how=self.aggr_how)
                else:
                    _obs = resample_to_months_df(self.obs.data, self.aggr_how)

                m.data.monthly, m.obs.monthly = self._intersect(_mod, _obs)

            else:
                raise Exception('model freq is %s' % repr(freq))

            m.data.annual = resample_to_years_df(m.data.monthly,
                                                 self.aggr_how,
                                                 min_months=6)
            m.obs.annual = resample_to_years_df(m.obs.monthly,
                                                self.aggr_how,
                                                min_months=6)
            m.data.annual, m.obs.annual = self._intersect(
                m.data.annual, m.obs.annual)

        else:  #obs are annual (recharge)
            m.data.annual = m.data.raw.resample(rule='a', how=self.aggr_how)
            m.obs.annual = self.obs.data

        m.stats = ObjectDict(freq=freq)
        m.stats.daily = None
        m.stats.monthly = None
        if freq == 'd' and self.obs.freq == 'd':
            m.stats.daily = build_stats_df(m.obs.daily, m.data.daily,
                                           m.obs.daily.keys())
        if self.obs.freq != 'y':
            m.stats.monthly = build_stats_df(m.obs.monthly, m.data.monthly,
                                             m.obs.monthly.keys())
        if m.obs.annual is not None and m.data.annual is not None:
            m.stats.annual = build_stats_df(m.obs.annual, m.data.annual,
                                            m.obs.annual.keys())

        self.build_objfunc_stats(m.stats)
        self._assign_colours()

    def build_objfunc_stats(self, stats):
        if stats.daily is not None:
            for site in stats.daily.columns:
                try:
                    stats.daily.loc['fobj', site] = (
                        stats.monthly.loc['nse', site] +
                        stats.daily.loc['nse', site]) / 2. - 5 * (np.abs(
                            np.log(1 + stats.daily.loc['bias_relative',
                                                       site])))**2.5
                except KeyError:
                    pass
        if stats.monthly is not None:
            for site in stats.monthly.columns:
                try:
                    stats.monthly.loc[
                        'fobj',
                        site] = stats.monthly.loc['nse', site] - 5 * (np.abs(
                            np.log(1 + stats.monthly.loc['bias_relative',
                                                         site])))**2.5
                except KeyError:
                    pass
        for site in stats.annual.columns:
            try:
                stats.annual.loc[
                    'fobj',
                    site] = stats.annual.loc['nse', site] - 5 * (np.abs(
                        np.log(1 +
                               stats.annual.loc['bias_relative', site])))**2.5
            except KeyError:
                pass

    def _iter_models(self, freq):
        def miter():
            for name in self.selection():
                m = self.models[name]
                if m.freq <= SAMPLE_RATE[freq]:
                    yield m

        return miter()

    def _get_ax(self, kwargs):
        if 'ax' in kwargs:
            ax = kwargs['ax']
            del kwargs['ax']
        else:
            plt.figure(figsize=cfg.FIG_SIZE)
            ax = plt.subplot(1, 1, 1)
        return ax

    def plot_timeseries(self, site, freq='m', model=None, **kwargs):
        '''
        Plot timeseries of data at the specified site and frequency
        '''
        from functools import partial

        ax = self._get_ax(kwargs)

        # def _plot(ax,series,label,colour):
        def _plot(series, label, colour):
            #+++ fix for pandas 0.16.1 legend label bug (see https://github.com/pydata/pandas/issues/10119)
            series.name = label
            # series.plot(legend=True,axes=ax,color=colour)
            series.plot(legend=True, color=colour)

        # plot = partial(_plot,ax=ax)
        plot = partial(_plot)

        if freq == 'raw':
            #self.obs.data[site].plot(legend=True,axes=ax,color='black',label=self.ref_name)
            plot(series=self.obs.data[site],
                 label=self.ref_name,
                 colour='black')
            for name in self.selection():
                m = self.models[name]
                #m.data.raw[site].plot(legend=True,axes=ax,color=m.colour,label=m.name)
                plot(series=m.data.raw[site], label=m.name, colour=m.colour)
        else:
            tf = dt.validate_timeframe(freq).lower()
            _freq = freq == 'y' and 'A' or freq

            if model is not None:
                if not model in self.models:
                    logger.critical("%s not found in %s", model, self.models)
                    return None
                else:
                    plot(series=self.models[model].obs[tf][site].resample(
                        _freq),
                         label=self.ref_name,
                         colour='black')
                    plot(series=self.models[model].data[tf][site].resample(
                        _freq),
                         label=self.models[model].name,
                         colour=self.models[model].colour)
            else:
                if freq == 'd':
                    plot(series=self.obs.data[site].resample(_freq),
                         label=self.ref_name,
                         colour='black')
                elif freq == 'm':
                    plot(series=self.obs.monthly[site].resample(_freq),
                         label=self.ref_name,
                         colour='black')
                elif freq == 'y':
                    plot(series=self.obs.annual[site].resample(_freq),
                         label=self.ref_name,
                         colour='black')

                for m in self._iter_models(freq):
                    try:
                        plot(series=m.data[tf][site].resample(_freq),
                             label=m.name,
                             colour=m.colour)
                    except:
                        logger.warning("no data to plot for %s site %s",
                                       m.name, site)

        ax.legend(loc='best')
        ax.set_title("%s" % site)
        ax.set_ylabel(self.var_name)
        ax.set(**kwargs)
        ax.grid()
        return ax

    def plot_cdf(self, statistic='pearsons_r', freq='m', **kwargs):
        '''
        Plot the empirical CDF for the specified statistic and frequency
        '''
        tf = dt.validate_timeframe(freq).lower()

        ax = self._get_ax(kwargs)

        for m in self._iter_models(freq):
            y = sorted(m.stats[tf].loc[statistic,
                                       m.stats[tf].columns != 'all'].dropna()
                       )  # temporary fix for broken cdf's
            ax.plot(np.linspace(0, 1., len(y)),
                    y,
                    color=m.colour,
                    label=m.name)

        ax.set_xlabel("Catchments below (%)")
        ax.set_ylabel(statistic)
        ax.legend(loc='best')
        ax.set(**kwargs)
        ax.grid()
        return ax

    def plot_box(self, statistic, freq='m', **kwargs):
        '''
        Show a box-plot for the specified statistic and timeframe
        '''
        tf = dt.validate_timeframe(freq).lower()

        ax = self._get_ax(kwargs)

        data = []
        colours = []
        names = []

        for m in self._iter_models(freq):
            data.append(m.stats[tf].loc[statistic,
                                        m.stats[tf].columns != 'all'])
            colours.append(m.colour)
            names.append(m.name)

        box = ax.boxplot(data, patch_artist=True)

        ax.set_ylabel(statistic)

        for patch, colour in zip(box['boxes'], colours):
            patch.set_facecolor(colour)

        ax.set_xticklabels(names, rotation=90, fontsize=8)

        for k, v in kwargs.items():
            try:
                ax.set(**{k: v})
            except:
                pass
        ax.grid()
        return ax, box

    def plot_regression(self,
                        site=None,
                        freq='m',
                        title="",
                        size=20,
                        **kwargs):
        '''
        Plot the model regression(s) for the specified site and frequency
        '''
        if site is None:
            site = list(self.obs.data.columns)
            stats_index = 'all'
        else:
            stats_index = site
            site = [site]

        tf = dt.validate_timeframe(freq).lower()

        ax = self._get_ax(kwargs)

        for m in self._iter_models(freq):
            _site_list = []
            for _site in site:
                if _site in m.data[tf].keys():
                    _site_list.append(_site)

            model_data = pd.DataFrame.from_dict(m.data[tf])[_site_list]
            obs_data = pd.DataFrame.from_dict(m.obs[tf])[_site_list]
            ax.scatter(obs_data, model_data, color=m.colour, s=size)

        ax.set_ylabel('model ' + self.var_name)
        ax.set_xlabel(str(self.ref_name))
        if isinstance(site, list):
            ax.set_title(title)
        else:
            ax.set_title(title + " %s" % site)

        ax.set(**kwargs)
        ax.grid()

        # plot regression lines and 1:1 line
        rl = get_ax_limit(ax)

        for m in self._iter_models(freq):
            try:
                mstats = m.stats[tf][stats_index]
            except KeyError:
                continue
            regress_line = mstats.loc[
                'r_intercept'] + rl * mstats.loc['r_slope']
            ax.plot(rl, regress_line, color=m.colour, label=m.name)
        ax.plot(rl, rl, linestyle='--', color='black', label='1:1')

        ax.legend(loc='best')
        return ax

    def stat(self, statistic='mean', freq='m'):
        tf = dt.validate_timeframe(freq).lower()
        df = pd.DataFrame()

        for m in self._iter_models(freq):
            df[m.name] = m.stats[tf].loc[statistic]
        if statistic == 'mean':
            df[self.ref_name] = m.stats[tf].loc['obs_mean']

        return df

    def stat_percentiles(self, statistic='pearsons_r', freq='m', pctiles=None):
        '''
        Print a summary of percentiles for the specified statistic and timeframe
        '''
        if pctiles is None:
            pctiles = [0, 5, 25, 50, 75, 95, 100]
        tf = dt.validate_timeframe(freq).lower()
        df = pd.DataFrame()

        for m in self._iter_models(freq):
            if statistic == "grand_f":
                m_data = m.stats[tf].loc['fobj', m.stats[tf].columns != 'all']
                try:
                    stats = standard_percentiles(m_data)
                    df[m.name] = pd.Series(
                        index=['grand_f'],
                        data=[(stats['25%'] + stats['50%'] + stats['75%'] +
                               stats['100%']) / 4])
                except IndexError:
                    logger.warning("no stats for model: %s", m.name)
            else:
                m_data = m.stats[tf].loc[statistic,
                                         m.stats[tf].columns != 'all']
                try:
                    df[m.name] = standard_percentiles(m_data, pctiles)
                except IndexError:
                    logger.warning("no stats for model: %s", m.name)

        return df.transpose()

    def data_percentiles(self, freq='m', pctiles=None):
        '''
        Print a summary of percentiles for the actual data values
        '''
        if pctiles is None:
            pctiles = [0, 5, 25, 50, 75, 95, 100]
        tf = dt.validate_timeframe(freq).lower()
        df = pd.DataFrame()

        if freq == 'd':  # obs won't match model.obs since different obs.valid_idx for each model
            obs_series = self.obs.data.mean().values.flatten()
        else:
            pd_tf = dt.pandas_tf_dict[tf]
            obs_series = self.obs.data.resample(
                rule=pd_tf, how=self.aggr_how).mean().values.flatten()

        df[self.ref_name] = standard_percentiles(obs_series, pctiles)

        for m in self._iter_models(freq):
            m_data = pd.DataFrame.from_dict(m.data[tf]).mean().values.flatten()
            try:
                df[m.name] = standard_percentiles(m_data, pctiles)
            except IndexError:
                logger.warning("no stats for model: %s", m.name)

        return df.transpose()