def bspline_clean_dataset(dataset, genome, loci, prediction_steps):
    """Clean a dataset containing temperatures and loads using cleaning
    parameters from the genome. The dataset is expected to contain NaNs in the
    last *prediction_steps* elements of the Load series"""
    # Having the smoother as a global is not nice, but it speeds up things A
    # LOT, because pickling the smoother caches takes a long time for large
    # matrices (long time series).
    global _smoother, _temp_cache, _load_cache
    if _smoother is None:
        _smoother = cln.BSplineSmoother(dataset, smoothness=1)
    clean_data = dataset.copy()
    key = (_get_dataset_hash(dataset["Temperature"]), 
           genome[loci.t_smooth], genome[loci.t_zscore])
    try:
        _temp_mutex.acquire()
        clean_data['Temperature'] = _temp_cache[key].copy()
        # print "Got temp from cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
    except KeyError:
        _temp_mutex.release()
        # print "Storing temp to cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
        clean_data['Temperature'] = \
          cln.bspline_clean(dataset['Temperature'], 
                            genome[loci.t_smooth], 
                            genome[loci.t_zscore], _smoother)
        _temp_mutex.acquire()
        _temp_cache[key] = clean_data['Temperature'].copy()
    _temp_mutex.release()
    key = (_get_dataset_hash(dataset["Load"]), 
           genome[loci.l_smooth], genome[loci.l_zscore])
    try:
        _load_mutex.acquire()
        clean_data['Load'][:-prediction_steps] = _load_cache[key].copy()
        # print "Got load from cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
    except KeyError:
        _load_mutex.release()
        # print "Storing load to cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
        clean_data['Load'][:-prediction_steps] = \
          cln.bspline_clean(dataset['Load'][:-prediction_steps], 
                            genome[loci.l_smooth], 
                            genome[loci.l_zscore], _smoother)
        _load_mutex.acquire()
        _load_cache[key] = clean_data['Load'][:-prediction_steps].copy()
    _load_mutex.release()
    return clean_data
Example #2
0
def bspline_clean_dataset(dataset, genome, loci, prediction_steps):
    """Clean a dataset containing temperatures and loads using cleaning
    parameters from the genome. The dataset is expected to contain NaNs in the
    last *prediction_steps* elements of the Load series"""
    # Having the smoother as a global is not nice, but it speeds up things A
    # LOT, because pickling the smoother caches takes a long time for large
    # matrices (long time series).
    global _smoother, _temp_cache, _load_cache
    if _smoother is None:
        _smoother = cln.BSplineSmoother(dataset, smoothness=1)
    clean_data = dataset.copy()
    key = (_get_dataset_hash(dataset["Temperature"]), genome[loci.t_smooth],
           genome[loci.t_zscore])
    try:
        _temp_mutex.acquire()
        clean_data['Temperature'] = _temp_cache[key].copy()
        # print "Got temp from cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
    except KeyError:
        _temp_mutex.release()
        # print "Storing temp to cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
        clean_data['Temperature'] = \
          cln.bspline_clean(dataset['Temperature'],
                            genome[loci.t_smooth],
                            genome[loci.t_zscore], _smoother)
        _temp_mutex.acquire()
        _temp_cache[key] = clean_data['Temperature'].copy()
    _temp_mutex.release()
    key = (_get_dataset_hash(dataset["Load"]), genome[loci.l_smooth],
           genome[loci.l_zscore])
    try:
        _load_mutex.acquire()
        clean_data['Load'][:-prediction_steps] = _load_cache[key].copy()
        # print "Got load from cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
    except KeyError:
        _load_mutex.release()
        # print "Storing load to cache: <dataset_hash>", key[1], key[2]
        # sys.stdout.flush()
        clean_data['Load'][:-prediction_steps] = \
          cln.bspline_clean(dataset['Load'][:-prediction_steps],
                            genome[loci.l_smooth],
                            genome[loci.l_zscore], _smoother)
        _load_mutex.acquire()
        _load_cache[key] = clean_data['Load'][:-prediction_steps].copy()
    _load_mutex.release()
    return clean_data
def bspline_clean_dataset_no_cache(dataset, genome, loci, prediction_steps):
    """Clean a dataset containing temperatures and loads using cleaning
    parameters from the genome. The dataset is expected to contain NaNs in the
    last *prediction_steps* elements of the Load series"""
    # Having the smoother as a global is not nice, but it speeds up things A
    # LOT, because pickling the smoother caches takes a long time for large
    # matrices (long time series).
    global _smoother
    if _smoother is None:
        _smoother = cln.BSplineSmoother(dataset, smoothness=1)
    clean_data = dataset.copy()
    clean_data['Temperature'] = cln.bspline_clean(dataset['Temperature'], 
                                                  genome[loci.t_smooth], 
                                                  genome[loci.t_zscore], _smoother)
    clean_data['Load'][:-prediction_steps] = \
      cln.bspline_clean(dataset['Load'][:-prediction_steps], 
                        genome[loci.l_smooth], 
                        genome[loci.l_zscore], _smoother)
    return clean_data
Example #4
0
def bspline_clean_dataset_no_cache(dataset, genome, loci, prediction_steps):
    """Clean a dataset containing temperatures and loads using cleaning
    parameters from the genome. The dataset is expected to contain NaNs in the
    last *prediction_steps* elements of the Load series"""
    # Having the smoother as a global is not nice, but it speeds up things A
    # LOT, because pickling the smoother caches takes a long time for large
    # matrices (long time series).
    global _smoother
    if _smoother is None:
        _smoother = cln.BSplineSmoother(dataset, smoothness=1)
    clean_data = dataset.copy()
    clean_data['Temperature'] = cln.bspline_clean(dataset['Temperature'],
                                                  genome[loci.t_smooth],
                                                  genome[loci.t_zscore],
                                                  _smoother)
    clean_data['Load'][:-prediction_steps] = \
      cln.bspline_clean(dataset['Load'][:-prediction_steps],
                        genome[loci.l_smooth],
                        genome[loci.l_zscore], _smoother)
    return clean_data