def load_site(cls,
                  config,
                  site,
                  data_range=None,
                  meta_src=None,
                  data_src=None):
        """%%aug%%

        Expects meta_src, data_src to be pickled pandas DataFrame objects.
        """

        LMRdb_cfg = config.proxies.LMRdb
        if meta_src is None:
            meta_src = load_data_frame(LMRdb_cfg.metafile_proxy)
        if data_src is None:
            data_src = load_data_frame(LMRdb_cfg.datafile_proxy)
            data_src = data_src.to_dense()

        site_meta = meta_src[meta_src['Proxy ID'] == site]
        pid = site_meta['Proxy ID'].iloc[0]
        pmeasure = site_meta['Proxy measurement'].iloc[0]
        LMRdb_type = site_meta['Archive type'].iloc[0]
        try:
            proxy_type = LMRdb_cfg.proxy_type_mapping[(LMRdb_type, pmeasure)]
        except (KeyError, ValueError) as e:
            print('Proxy type/measurement not found in mapping: {}'.format(e))
            raise ValueError(e)

        start_yr = site_meta['Youngest (C.E.)'].iloc[0]
        end_yr = site_meta['Oldest (C.E.)'].iloc[0]
        lat = site_meta['Lat (N)'].iloc[0]
        lon = site_meta['Lon (E)'].iloc[0]
        elev = site_meta['Elev'].iloc[0]
        site_data = data_src[site]
        seasonality = site_meta['Seasonality'].iloc[0]
        # make sure a list is returned
        if type(seasonality) is not list:
            seasonality = ast.literal_eval(seasonality)

        if data_range is not None:
            start, finish = data_range
            values = site_data[(site_data.index >= start)
                               & (site_data.index <= finish)]
        else:
            values = site_data

        # Might need to remove following line
        values = values[values.notnull()]
        times = values.index.values

        # transform in "anomalies" (time-mean removed) if option activated
        if config.proxies.LMRdb.proxy_timeseries_kind == 'anom':
            values = values - values.mean()

        if len(values) == 0:
            raise ValueError('No observations in specified time range.')

        return cls(config, pid, proxy_type, start_yr, end_yr, lat, lon, elev,
                   seasonality, values, times)
    def load_all_annual_no_filtering(cls,
                                     config,
                                     meta_src=None,
                                     data_src=None):
        """
        Method created to facilitate the loading of all possible proxy records
        that can be calibrated with annual resolution.

        Note: This is still subject to constraints from the PSM calibration (
        i.e. if there is an r_crit or not enough calibration data the proxy
        will not be loaded)

        Returns
        -------
        proxy_objs: list(BaseProxyObject like)
        """

        # Load source data files
        if meta_src is None:
            meta_src = load_data_frame(config.proxies.LMRdb.metafile_proxy)
        if data_src is None:
            data_src = load_data_frame(config.proxies.LMRdb.datafile_proxy)
            data_src = data_src.to_dense()

        # TODO: For now hard coded to annual resolution - AP
        useable = meta_src['Resolution (yr)'] == 1.0

        proxy_ids = meta_src['Proxy ID'][useable].values

        proxy_objs = []
        for site in proxy_ids:
            try:
                pobj = cls.load_site(config,
                                     site,
                                     meta_src=meta_src,
                                     data_src=data_src)
                proxy_objs.append(pobj)
            except ValueError as e:
                print(e)

        return proxy_objs
    def load_all(cls, config, data_range, meta_src=None, data_src=None):
        """%%aug%%

        Expects meta_src, data_src to be pickled pandas DataFrame objects.
        """

        # Load source data files
        if meta_src is None:
            meta_src = load_data_frame(config.proxies.NCDCdtda.metafile_proxy)
        if data_src is None:
            data_src = load_data_frame(config.proxies.NCDCdtda.datafile_proxy)

        filters = config.proxies.NCDCdtda.simple_filters
        proxy_order = config.proxies.NCDCdtda.proxy_order
        ptype_filters = config.proxies.NCDCdtda.proxy_assim2
        dbase_filters = config.proxies.NCDCdtda.database_filter
        proxy_blacklist = config.proxies.NCDCdtda.proxy_blacklist
        availability_filter = config.proxies.NCDCdtda.proxy_availability_filter
        availability_fraction = config.proxies.NCDCdtda.proxy_availability_fraction

        # initial mask all true before filtering
        useable = meta_src[meta_src.columns[0]] == 0
        useable |= True
        availability_mask = meta_src[meta_src.columns[0]] == 0
        availability_mask |= True

        # Find indices matching simple filter specifications
        for colname, filt_list in filters.items():

            simple_mask = meta_src[colname] == 0
            simple_mask &= False

            for value in filt_list:
                if colname == 'Resolution (yr)' and type(value) is tuple:
                    for i in range(len(meta_src[colname].index)):
                        simple_mask[i] |= (value[0] <= meta_src[colname][i] <=
                                           value[1])
                else:
                    simple_mask |= meta_src[colname] == value

            useable &= simple_mask

        # Filtering proxy records on conditions of availability during
        # the reconstruction period (recon_period in configuration, or
        # data_range here).
        if availability_filter:  # if not None
            start, finish = data_range
            # Checking proxy metadata's period of availability against
            # reconstruction period.
            availability_mask = ((meta_src['Oldest (C.E.)'] <= start) &
                                 (meta_src['Youngest (C.E.)'] >= finish))
            # Checking level of completeness of record within the reconstruction
            # period (ignore record if fraction of available data is below user-defined
            # threshold (proxy_availability_fraction in config).
            maxnb = (finish - start) + 1
            proxies_to_test = meta_src['Proxy ID'][availability_mask
                                                   & useable].values
            for prx in proxies_to_test.tolist():
                values = data_src[prx][(data_src[:].index >= start)
                                       & (data_src[:].index <= finish)]
                values = values[values.notnull()]
                frac_available = float(len(values)) / float(maxnb)
                if frac_available < availability_fraction:
                    availability_mask[meta_src[meta_src['Proxy ID'] ==
                                               prx].index] = False

        # Find indices matching **database filter** specifications
        database_col = 'Databases'

        # dbase_filters not "None" or empty list (some selection on db has been activated)
        if dbase_filters:
            # define boolean array with right dimension & set all to False
            dbase_mask = meta_src[database_col] == 0
            # set mask to True for proxies matching all databases found in dbase_filters
            for i in range(len(meta_src[database_col])):
                if meta_src[database_col][i]:
                    #dbase_mask[i] = set(meta_src[database_col][i]).isdisjoint(dbase_filters) # oldold code
                    #dbase_mask[i] = set(dbase_filters).issubset(meta_src[database_col][i]) # old code
                    dbase_mask[i] = bool(
                        set(meta_src[database_col][i]).intersection(
                            set(dbase_filters)))
                else:
                    dbase_mask[i] = False
        else:
            # selection on db has NOT been activated:
            # define boolean array with right dimension & set all to True
            dbase_mask = meta_src[database_col] != 0

        # Define mask of proxies listed in a user-defined "blacklist"
        # (see LMR_config).
        # boolean array set with right dimension & all set to True
        blacklist_mask = meta_src['Proxy ID'] != ' '
        if proxy_blacklist:
            # If site listed in blacklist, modify corresponding elements of
            # boolean array to False
            for pbl in proxy_blacklist:
                tmp = meta_src['Proxy ID'].map(lambda x: x.startswith(pbl))
                inds = meta_src['Proxy ID'][tmp].index
                blacklist_mask[inds] = False

        # Create proxy id lists
        proxy_id_by_type = {}
        all_proxy_ids = []

        type_col = 'Archive type'
        measure_col = 'Proxy measurement'
        for name in proxy_order:

            type_mask = meta_src[type_col] == 0
            type_mask |= True

            # Filter to proxies of a certain type
            ptype = name.split('_', 1)[0]
            type_mask &= meta_src[type_col] == ptype

            # Reduce to listed measures
            measure_mask = meta_src[measure_col] == 0
            measure_mask &= False

            for measure in ptype_filters[name]:
                measure_mask |= meta_src[measure_col] == measure

            # Extract proxy ids using mask and append to lists
            proxies = meta_src['Proxy ID'][measure_mask & type_mask
                                           & dbase_mask & blacklist_mask
                                           & availability_mask
                                           & useable].values

            # If we have ids after filtering add them to the type list
            if len(proxies) > 0:
                proxy_id_by_type[name] = proxies.tolist()

            all_proxy_ids += proxies.tolist()

        # Create proxy objects list
        all_proxies = []
        for site in all_proxy_ids:
            try:
                pobj = cls.load_site(config,
                                     site,
                                     data_range,
                                     meta_src=meta_src,
                                     data_src=data_src)
                all_proxies.append(pobj)
            except ValueError as e:
                # Proxy had no obs or didn't meet psm r crit
                for group in list(proxy_id_by_type.values()):
                    if site in group:
                        group.remove(site)
                        break  # Should only be one instance

        return proxy_id_by_type, all_proxies
    def load_all(cls, config, data_range, meta_src=None, data_src=None):
        """%%aug%%

        Expects meta_src, data_src to be pickled pandas DataFrame objects.
        """

        # Load source data files
        if meta_src is None:
            meta_src = load_data_frame(config.proxies.PAGES2kv1.metafile_proxy)
        if data_src is None:
            data_src = load_data_frame(config.proxies.PAGES2kv1.datafile_proxy)
            data_src = data_src.to_dense()

        filters = config.proxies.PAGES2kv1.simple_filters
        proxy_order = config.proxies.PAGES2kv1.proxy_order
        ptype_filters = config.proxies.PAGES2kv1.proxy_assim2
        availability_filter = config.proxies.PAGES2kv1.proxy_availability_filter
        availability_fraction = config.proxies.PAGES2kv1.proxy_availability_fraction

        # initial masks all true before filtering
        useable = meta_src[meta_src.columns[0]] == 0
        useable |= True
        availability_mask = meta_src[meta_src.columns[0]] == 0
        availability_mask |= True

        # Find indices matching filter specifications
        for colname, filt_list in filters.items():
            simple_mask = meta_src[colname] == 0
            simple_mask &= False

            for value in filt_list:
                simple_mask |= meta_src[colname] == value

            useable &= simple_mask

        # Filtering proxy records on conditions of availability during
        # the reconstruction period (recon_period in configuration, or
        # data_range here).
        if availability_filter:  # if not None
            start, finish = data_range
            # Checking proxy metadata's period of availability against
            # reconstruction period.
            availability_mask = ((meta_src['Oldest (C.E.)'] <= start) &
                                 (meta_src['Youngest (C.E.)'] >= finish))
            # Checking level of completeness of record within the reconstruction
            # period (ignore record if fraction of available data is below user-defined
            # threshold (proxy_availability_fraction in config).
            maxnb = (finish - start) + 1
            proxies_to_test = meta_src['Proxy ID'][availability_mask
                                                   & useable].values
            for prx in proxies_to_test.tolist():
                values = data_src[prx][(data_src[:].index >= start)
                                       & (data_src[:].index <= finish)]
                values = values[values.notnull()]
                frac_available = float(len(values)) / float(maxnb)
                if frac_available < availability_fraction:
                    availability_mask[meta_src[meta_src['Proxy ID'] ==
                                               prx].index] = False

        # Create proxy id lists
        proxy_id_by_type = {}
        all_proxy_ids = []

        type_col = 'Archive type'
        measure_col = 'Proxy measurement'
        for name in proxy_order:

            type_mask = meta_src[type_col] == 0
            type_mask |= True

            # Filter to proxies of a certain type
            ptype = name.split('_', 1)[0]
            type_mask &= meta_src[type_col] == ptype

            # Reduce to listed measures
            measure_mask = meta_src[measure_col] == 0
            measure_mask &= False

            for measure in ptype_filters[name]:
                measure_mask |= meta_src[measure_col] == measure

            # Extract proxy ids using mask and append to lists
            proxies = meta_src['Proxy ID'][measure_mask & type_mask
                                           & availability_mask
                                           & useable].values

            # If we have ids after filtering add them to the type list
            if len(proxies) > 0:
                proxy_id_by_type[name] = proxies.tolist()

            all_proxy_ids += proxies.tolist()

        # Create proxy objects list
        all_proxies = []
        for site in all_proxy_ids:
            try:
                pobj = cls.load_site(config,
                                     site,
                                     data_range,
                                     meta_src=meta_src,
                                     data_src=data_src)
                all_proxies.append(pobj)
            except ValueError as e:
                # Proxy had no obs or didn't meet psm r crit
                for group in list(proxy_id_by_type.values()):
                    if site in group:
                        group.remove(site)
                        break  # Should only be one instance

        return proxy_id_by_type, all_proxies