def sax(feature, n_bins=None): f = feature.T const_idxs = np.where(np.var(f, axis=1) < 0.01)[0] f = np.array([f[i] for i in range(len(f)) if i not in const_idxs]) transformer = SymbolicAggregateApproximation(n_bins=(n_bins or feature.shape[0]), strategy='uniform') tokens = [''.join(i) for i in transformer.transform(f)] return ' '.join(tokens)
def _sax(self): # Todo: rework docstring """ Computes the Symbolic Aggregate Approximation of a time series using the 'normal' strategy. This is a wrapper around the SymbolicAggregateApproximation() class from the pyts package. :return: nothing, all objects are stored internally in the class. """ sax = SymbolicAggregateApproximation(n_bins=self.nb_bins, strategy='normal') self.sax_data = sax.fit_transform(self.paa_data)
def ts3sax(_ts): # obtains a SAX representation of the time series ts, (mu, sd) = normalise(_ts) binmap = { 'a': mu - sd * norm.ppf(1 / 3), 'b': mu, 'c': mu - sd * norm.ppf(2 / 3) } model = SymbolicAggregateApproximation(n_bins=3, strategy='normal') sax_ts = model.fit_transform(ts.reshape( 1, -1)) # data comprises a single sample return (''.join(sax_ts[0]), binmap)
def sax_bop_windows(features, labels, n_bins=4): new_features, new_labels = list(), list() for i in range(features.shape[0]): sax = SymbolicAggregateApproximation(n_bins=n_bins, alphabet='ordinal', strategy='uniform') pattern_row = sax.fit_transform(features[i, :, :]) row = [[ sum(pattern_row[j, :] == k) for k in range(max(pattern_row[j, :]) + 1) ] for j in range(len(pattern_row))] print(row) new_features.append(row) new_labels.append(labels[i]) return np.array(new_features), np.array(new_labels)
def shannon_entropy(current_observation: pd.DataFrame, raw_key: str): result = None try: sax = SymbolicAggregateApproximation() time_series = [current_observation[raw_key].to_numpy()] symbolic_representation = sax.transform(time_series) _, counts = np.unique(symbolic_representation, return_counts=True) frequencies = counts / len(symbolic_representation[0]) result = entropy(pk=frequencies) except ValueError as e: message = str(e) if message == 'At least one sample is constant.': result = 0 return result
class Data_Transformer(): SAX = SymbolicAggregateApproximation(strategy= 'uniform', alphabet= 'ordinal') Xtr_SAX = SAX.fit_transform(Xtr) Xte_SAX = SAX.fit_transform(Xte) SFA = SymbolicFourierApproximation(alphabet= 'ordinal') Xtr_SFA = SFA.fit_transform(Xtr) Xte_SFA = SFA.fit_transform(Xte)
def sax(indices, time_point): cluster_summed = np.zeros_like(globe.CONN_NORM[0]) cluster = [] # fetch the data for idx in indices: cluster_summed = np.add(cluster_summed, globe.CONN_NORM[idx]) cluster.append(globe.CONN_NORM[idx]) # ROI = cluster_summed / len(indices) # averaged within ROI conn_matrix = np.vstack(cluster) # downsample conn_norm_ds = np.transpose( ss.resample(np.transpose(conn_matrix), time_point)) # SAX transformer = SymbolicAggregateApproximation(n_bins=8, strategy='uniform') # MAKE ONE TIME # make a letter dict letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] letter_dict = {} for i, l in enumerate(letter_list): letter_dict[l] = i data = {} # initialize the sax dictionary solution for letter in letter_dict: for i in range(70): data['{}_{}'.format(i, letter_dict[letter])] = ({ 'time': '{}'.format(i), 'letter': str(letter_dict[letter]), 'value': 0 }) # apply SAX for i in range(conn_norm_ds.shape[0]): # ROI x time-point tmp_sax = transformer.transform(conn_norm_ds[i, :].reshape(1, -1)) for j in range(tmp_sax.shape[1]): data['{}_{}'.format(j, letter_dict[tmp_sax[0, j]])]['value'] += 1 data = list(data.values()) return data # data is in the format that the observable expecting
import numpy as np import matplotlib.lines as mlines import matplotlib.pyplot as plt from scipy.stats import norm from pyts.approximation import SymbolicAggregateApproximation from numpy.testing import rundocs n_samples = 100 # liczba próbek: parametry do stworzenia przykładowego zestawu danych n_timestamps = 24 # znaczniki czasowe rgn = np.random.RandomState(41) X = rgn.randn(n_samples, n_timestamps) # generowanie losowych danych n_bins = 5 # liczba interwałów kwantyzacji sax = SymbolicAggregateApproximation(n_bins=n_bins, strategy='normal') X_sax = sax.fit_transform(X) # obliczanie interwałów kwantyzacji dla rozkładu Gaussa # ppf = percent point function (odwrotna dystrybuanta) bins = norm.ppf(np.linspace(0, 1, n_bins + 1)[1:-1]) bottom_bool = np.r_[True, X_sax[0, 1:] > X_sax[0, :-1]] # wyświetlenie wykresu plt.figure(figsize=(12, 8)) plt.plot(X[0], 'o--', label='Original') for x, y, s, bottom in zip(range(n_timestamps), X[0], X_sax[0], bottom_bool): va = 'bottom' if bottom else 'top' plt.text(x, y,
def test_actual_results(params, X, arr_desired): """Test that the actual results are the expected ones.""" arr_actual = SymbolicAggregateApproximation(**params).fit_transform(X) np.testing.assert_array_equal(arr_actual, arr_desired)
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" sax = SymbolicAggregateApproximation(**params) with pytest.raises(error, match=re.escape(err_msg)): sax.transform(X)
] cluster_summed = np.zeros_like(conn_norm[0]) cluster = [] # fetch the data for idx in indices: cluster_summed = np.add(cluster_summed, conn_norm[idx]) cluster.append(conn_norm[idx]) ROI = cluster_summed / len(indices) # averaged within ROI conn_matrix = np.vstack(cluster) # downsample conn_norm_ds = np.transpose(ss.resample(np.transpose(conn_matrix), time_point)) # SAX transformer = SymbolicAggregateApproximation(n_bins=8, strategy='uniform') # https://pyts.readthedocs.io/en/stable/modules/approximation.html # strategy='uniform': all bins in each sample have identical widths, # strategy='quantile': all bins in each sample have the same number of points, # strategy='normal': bin edges are quantiles from a standard normal distribution. # MAKE ONE TIME # make a letter dict letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] letter_dict = {} for i, l in enumerate(letter_list): letter_dict[l] = i data = {} # initialize the sax dictionary solution for letter in letter_dict:
window = 24 alphabets = 7 input_path = "../path/of/input/file..." mindist_with_target = [] df = pd.read_csv(input_path) column_names = list(df.columns) df = df.T N = df.shape[1] df_to_numpy = df.values ppa_transformer = PiecewiseAggregateApproximation(window_size=window) paa_output = ppa_transformer.transform(df_to_numpy) sax_transformer = SymbolicAggregateApproximation(n_bins=alphabets, strategy='normal') sax_output = sax_transformer.transform(paa_output) target = sax_output[-1] sax = SAX_trans(window, alphabets) for i in range(len(sax_output) - 1): mindist = sax.compare_strings(sax_output[i], target) mindist_with_target.append(mindist) MinDistances = np.asarray(mindist_with_target) ranking = np.argsort(MinDistances) result = [] for indx in ranking: result.append(column_names[indx])
import numpy as np import matplotlib.pyplot as plt from pyts.approximation import PiecewiseAggregateApproximation from pyts.utils import segmentation import pandas as pd # 1、近似算法 # 1.1 分段聚合近似 # pyts.approximation.PiecewiseAggregateApproximation # PAA n_samples, n_timestamps = 2, 10 rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) print(X) windows_size = 2 paa = PiecewiseAggregateApproximation(window_size=windows_size) X_paa = paa.transform(X) print(X_paa) seg = segmentation(10, window_size=3, overlapping=False) print(seg) # 1.2 符号聚合近似 Symbolic Aggregate approXimation SAX from pyts.approximation import SymbolicAggregateApproximation X = [[0, 4, 2, 1, 7, 6, 3, 5], [2, 5, 4, 5, 3, 4, 2, 3]] sax = SymbolicAggregateApproximation() print(sax.transform(X))