def PAA(self): global X_paa global a try: a = int(self.paaline.text()) paa = PAA(window_size=None, output_size=a, overlapping=True) X_paa = paa.transform(X_standardized) QMessageBox.information(None, 'Information', 'PAA Applied With Success', QMessageBox.Ok) except ValueError: QMessageBox.warning( None, 'ERROR', 'Standardize Your Data_set or define how many interval do you want!', QMessageBox.Ok)
class MultipleKernelAnomalyDetector: """ Multiple Kernel anomaly-detection method implementation """ def __init__(self, nu=0.5, gamma=0.1, tol=1e-3, degree=3, kernel='lcs', sax_size=4, quantiles='gaussian', paa_size=8): """ Constructor accepts some args for sklearn.svm.OneClassSVM and SAX inside. Default params are choosen as the most appropriate for flight-anomaly-detection problem according the original article. """ self.nu = nu self.gamma = gamma self.tol = tol self.degree = degree self.kernel = kernel self.stand_scaler = StandardScaler(epsilon=1e-2) self.paa = PAA(window_size=None, output_size=paa_size, overlapping=True) self.sax = SAX(n_bins=sax_size, quantiles=quantiles) def compute_matrix_of_equals(self, sequence1, sequence2): """ Computes matrix, where at (i, j) coordinate is the lcs for sequence1[:i+1] and sequence2[:j+1] """ lengths = np.zeros((len(sequence1) + 1, len(sequence2) + 1)) for i, element1 in enumerate(sequence1): for j, element2 in enumerate(sequence2): if element1 == element2: lengths[i + 1][j + 1] = lengths[i][j] + 1 else: lengths[i + 1][j + 1] = max(lengths[i + 1][j], lengths[i][j + 1]) return lengths def lcs(self, sequence1, sequence2): """ Computes largest common subsequence of sequence1 and sequence2 """ lengths = self.compute_matrix_of_equals(sequence1, sequence2) result = "" i, j = len(sequence1), len(sequence2) while i != 0 and j != 0: if lengths[i][j] == lengths[i - 1][j]: i -= 1 elif lengths[i][j] == lengths[i][j - 1]: j -= 1 else: assert sequence1[i - 1] == sequence2[j - 1] result = sequence1[i - 1] + result i -= 1 j -= 1 return result def nlcs(self, sequence1, sequence2): """ Computes normalized common subsequence of sequence1 and sequence2 """ return len(self.lcs( sequence1, sequence2)) / (len(sequence1) * len(sequence2))**0.5 def get_sax(self, sequence): sequence = np.reshape(sequence, (1, len(sequence))) return self.sax.transform( self.paa.transform(self.stand_scaler.transform(sequence)))[0] def lcs_kernel_function(self, x1, x2): """ LCS - kernel for Multiple Kernel Anomaly Detector """ res = np.zeros((x1.shape[0], x2.shape[0])) for ind1 in tqdm(range(x1.shape[0])): for ind2 in range(ind1, x2.shape[0]): if len(Counter(x1[ind1])) > 0.3 and len(Counter(x2[ind2])): for i in range(0, len(x1[ind1]), self.x_shape[-1]): res[ind1][ind2] += self.nlcs( self.get_sax(x1[ind1][i:i + self.x_shape[-1]]), self.get_sax(x2[ind2][i:i + self.x_shape[-1]])) res[ind2][ind1] = res[ind1][ind2] else: for i in range(0, len(x1[ind1]), self.x_shape[-1]): res[ind1][ind2] += self.nlcs( x1[ind1][i:i + self.x_shape[-1]], x2[ind2][i:i + self.x_shape[-1]]) res[ind2][ind1] = res[ind1][ind2] return res def transformation(self, x): """ Transforms X from 3D to 2D array for OneClassSVM """ return x.transpose(0, 1, 2).reshape(x.shape[0], -1) def gaussian_kernel(self, x, y): return np.exp((euclidean_distances(x, y)**2) * (-1 / (0.5**2))) def fit(self, x): """ With lcs kernel X must have shape (n, d, l), where n - number of samples, d - number of dimensions, l - feature length. With rbf kernel X must have shape (n, l) where n - number of samples, l - feature length. """ self.x_shape = x.shape if self.kernel == 'lcs': x_transformed = self.transformation(x) kernel = lambda x, y: self.lcs_kernel_function(x, y) self.one_class_svm = OneClassSVM(kernel=kernel, nu=self.nu, gamma='auto', degree=self.degree) self.one_class_svm.fit(x_transformed) else: x_transformed = x self.one_class_svm = OneClassSVM(kernel='rbf', nu=self.nu, gamma=self.gamma, degree=self.degree) self.one_class_svm.fit(x_transformed) def predict(self, x): """ With lcs kernel X must have shape (n, d, l), where n - number of samples, d - number of dimensions, l - feature length. With rbf kernel X must have shape (n, l) where n - number of samples, l - feature length. Function returns y-array with +1;-1 """ if len(x.shape) > 2: x = self.transformation(x) return self.one_class_svm.predict(x)
sensor = 'tangential_strain' explosive_point = 80 window_sizes = [4] for window_size in window_sizes: print "_______________________________________________________________________________" print "window_size", window_size paa = PAA(window_size=window_size, overlapping=True) explosion_expert_reader = pd.read_csv(get_raw_path("training")) explosion_expert_reader = explosion_expert_reader[explosion_expert_reader.label == 1] explosive_train = np.array([np.fromstring(e, dtype=float, sep=',') for e in explosion_expert_reader[sensor]]) print explosive_train.shape explosive_train = paa.transform(explosive_train) print explosive_train.shape explosive_train = np.array([diff(x) for x in explosive_train]) print explosive_train.shape inflations = [] deflations = [] for each in explosive_train: inflation = each[:explosive_point/window_size] deflation = each[explosive_point/window_size:] inflations.append(sum(inflation) / len(inflation)) deflations.append(sum(deflation) / len(deflation)) inflation_median, inflation_mean, inflation_std = get_statistics(inflations) deflation_median, deflation_mean, deflation_std = get_statistics(deflations)
import numpy as np from scipy.stats import norm from pyts.transformation import StandardScaler from pyts.visualization import plot_standardscaler from pyts.transformation import PAA from pyts.visualization import plot_paa n_samples = 10 n_features = 48 n_classes = 2 rng = np.random.RandomState(41) delta = 0.5 dt = 1 X = (norm.rvs(scale=delta ** 2 * dt, size=n_samples * n_features, random_state=rng) .reshape((n_samples, n_features))) X[:, 0] = 0 X = np.cumsum(X, axis=1) y = rng.randint(n_classes, size=n_samples) standardscaler = StandardScaler(epsilon=1e-2) X_standardized = standardscaler.transform(X) plot_standardscaler(X[0]) paa = PAA(window_size=None, output_size=8, overlapping=True) X_paa = paa.transform(X_standardized) plot_paa(X_standardized[0], window_size=None, output_size=8, overlapping=True, marker='o')