def test_maximum_subsequence_no_windows(): np.random.seed(9999) ts = np.random.uniform(size=2**10) w = 2**6 subq = ts[0:w] ts[0:w] = subq ts[w + 100:w + 100 + w] = subq with pytest.raises(NoSolutionPossible) as excinfo: upper = maximum_subsequence(ts, 1.0) assert 'no windows' in str(excinfo.value)
def test_maximum_subsequence_68(): np.random.seed(9999) ts = np.random.uniform(size=2**10) w = 2**6 subq = ts[0:w] ts[0:w] = subq ts[w + 100:w + 100 + w] = subq upper = maximum_subsequence(ts, 0.98) assert (upper == 68)
def test_maximum_subsequence_no_windows(): np.random.seed(9999) ts = np.random.uniform(size=2**10) w = 2**6 subq = ts[0:w] ts[0:w] = subq ts[w + 100:w + 100 + w] = subq with pytest.warns(RuntimeWarning) as record: upper = maximum_subsequence(ts, 1.0) assert (np.isnan(upper)) assert ('No windows found with given threshold' in record[0].message.args[0])
def compute(ts, windows=None, query=None, sample_pct=1, threshold=0.98, n_jobs=1): """ Computes the exact or approximate MatrixProfile based on the sample percent specified. Currently, MPX and SCRIMP++ is used for the exact and approximate algorithms respectively. When multiple windows are passed, the Pan-MatrixProfile is computed and returned. By default, only passing in a time series (ts), the Pan-MatrixProfile is computed based on the maximum upper window algorithm with a correlation threshold of 0.98. Note ---- When multiple windows are passed and the Pan-MatrixProfile is computed, the query is ignored! Parameters ---------- ts : array_like The time series to analyze. windows : int or array_like The window(s) to compute the MatrixProfile. Note that it may be an int for a single matrix profile computation or an array of ints for computing the pan matrix profile. query : array_like, Optional The query to analyze. Note that when computing the PMP the query is ignored! sample_pct : float, default = 1 A float between 0 and 1 representing how many samples to compute for the MP or PMP. When it is 1, the exact algorithm is used. threshold : float, Default 0.98 The correlation coefficient used as the threshold. It should be between 0 and 1. This is used to compute the upper window size when no window(s) is given. n_jobs : int, Default = 1 Number of cpu cores to use. Returns ------- dict : profile The profile computed. """ result = None multiple_windows = core.is_array_like(windows) and len(windows) > 1 no_windows = isinstance(windows, type(None)) has_threshold = isinstance(threshold, float) if no_windows and not has_threshold: raise ValueError('compute requires a threshold or window(s) to be set!') if core.is_array_like(windows) and len(windows) == 1: windows = windows[0] # compute the upper window and pmp if no_windows and has_threshold: profile = maximum_subsequence(ts, threshold, include_pmp=True) # determine windows to be computed # from 8 in steps of 2 until upper w start = 8 windows = range(start, profile['upper_window'] + 1) # compute the pmp result = skimp(ts, windows=windows, sample_pct=sample_pct, pmp_obj=profile) # compute the pmp elif multiple_windows: if core.is_array_like(query): logger.warn('Computing PMP - query is ignored!') result = skimp(ts, windows=windows, sample_pct=1, n_jobs=n_jobs) # compute exact mp elif sample_pct >= 1: result = mpx(ts, windows, query=query, n_jobs=n_jobs) # compute approximate mp else: result = scrimp_plus_plus(ts, windows, query=query, n_jobs=n_jobs, sample_pct=sample_pct) return result
def compute(ts, windows=None, query=None, sample_pct=1, threshold=0.98, n_jobs=1, preprocessing_kwargs=None): """ Computes the exact or approximate MatrixProfile based on the sample percent specified. Currently, MPX and SCRIMP++ is used for the exact and approximate algorithms respectively. When multiple windows are passed, the Pan-MatrixProfile is computed and returned. By default, only passing in a time series (ts), the Pan-MatrixProfile is computed based on the maximum upper window algorithm with a correlation threshold of 0.98. Notes ----- When multiple windows are passed and the Pan-MatrixProfile is computed, the query is ignored! Parameters ---------- ts : array_like The time series to analyze. windows : int, array_like The window(s) to compute the MatrixProfile. Note that it may be an int for a single matrix profile computation or an array of ints for computing the pan matrix profile. query : array_like, optional The query to analyze. Note that when computing the PMP the query is ignored! sample_pct : float, default 1 A float between 0 and 1 representing how many samples to compute for the MP or PMP. When it is 1, the exact algorithm is used. threshold : float, default 0.98 The correlation coefficient used as the threshold. It should be between 0 and 1. This is used to compute the upper window size when no window(s) is given. n_jobs : int, default = 1 Number of cpu cores to use. preprocessing_kwargs : dict, default = None A dictionary object to sets parameters for preprocess function. A valid preprocessing_kwargs should have the following structure: >>> { >>> 'window': The window size to compute the mean/median/minimum/maximum value, >>> 'method': A string indicating the data imputation method, which should be >>> 'mean', 'median', 'min' or 'max', >>> 'direction': A string indicating the data imputation direction, which should be >>> 'forward', 'fwd', 'f', 'backward', 'bwd', 'b'. If the direction is >>> forward, we use previous data for imputation; if the direction is >>> backward, we use subsequent data for imputation., >>> 'add_noise': A boolean value indicating whether noise needs to be added into the >>> time series >>> } To disable preprocessing procedure, set the preprocessing_kwargs to None/False/""/{}. Returns ------- dict : profile The profile computed. """ result = None multiple_windows = core.is_array_like(windows) and len(windows) > 1 no_windows = isinstance(windows, type(None)) has_threshold = isinstance(threshold, float) if no_windows and not has_threshold: raise ValueError( 'compute requires a threshold or window(s) to be set!') # Check to make sure all window sizes are greater than 3, return a ValueError if not. if (isinstance(windows, int) and windows < 4) or (multiple_windows and np.any(np.unique(windows) < 4)): raise ValueError( 'Compute requires all window sizes to be greater than 3!') if core.is_array_like(windows) and len(windows) == 1: windows = windows[0] # preprocess the time series preprocessing_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) if preprocessing_kwargs: ts = preprocess( ts, window=preprocessing_kwargs['window'], impute_method=preprocessing_kwargs['impute_method'], impute_direction=preprocessing_kwargs['impute_direction'], add_noise=preprocessing_kwargs['add_noise']) # compute the upper window and pmp if no_windows and has_threshold: profile = maximum_subsequence(ts, threshold, include_pmp=True) # determine windows to be computed # from 8 in steps of 2 until upper w start = 4 #start = 8 windows = range(start, profile['upper_window'] + 1) # compute the pmp result = skimp(ts, windows=windows, sample_pct=sample_pct, pmp_obj=profile) # compute the pmp elif multiple_windows: if core.is_array_like(query): logger.warn('Computing PMP - query is ignored!') result = skimp(ts, windows=windows, sample_pct=1, n_jobs=n_jobs) # compute exact mp elif sample_pct >= 1: result = mpx(ts, windows, query=query, n_jobs=n_jobs) # compute approximate mp else: result = scrimp_plus_plus(ts, windows, query=query, n_jobs=n_jobs, sample_pct=sample_pct) return result