def find_discords_brute_force(series, win_size, num_discords=2, znorm_threshold=0.01): """Early-abandoned distance-based discord discovery.""" discords = list() globalRegistry = VisitRegistry(len(series) - win_size + 1) znorms = np.array([ znorm(series[pos:pos + win_size], znorm_threshold) for pos in range(len(series) - win_size + 1) ]) while len(discords) < num_discords: bestDiscord = find_best_discord_brute_force(series, win_size, globalRegistry, znorms) if -1 == bestDiscord[0]: break discords.append(bestDiscord) mark_start = max(0, bestDiscord[0] - win_size + 1) mark_end = bestDiscord[0] + win_size globalRegistry.mark_visited_range(mark_start, mark_end) return discords
def find_discords_brute_force(series, win_size = 1, num_discords=12, z_threshold=0.01): """Early-abandoned distance-based discord discovery.""" discords = list() globalRegistry = VisitRegistry(len(series)) globalRegistry.mark_visited_range(len(series) - win_size, len(series)) while (len(discords) < num_discords): bestDiscord = find_best_discord_brute_force(series, win_size, globalRegistry, z_threshold) if -1 == bestDiscord: break discords.append(bestDiscord) mark_start = bestDiscord - win_size if 0 > mark_start: mark_start = 0 mark_end = bestDiscord + win_size '''if len(series) < mark_end: mark_end = len(series)''' globalRegistry.mark_visited_range(mark_start, mark_end) return discords
def find_best_discord_brute_force(series, win_size, global_registry, z_threshold=0.01): """Early-abandoned distance-based discord discovery.""" best_so_far_distance = -1.0 best_so_far_index = -1 outerRegistry = global_registry.clone() outer_idx = outerRegistry.get_next_unvisited() while ~np.isnan(outer_idx): outerRegistry.mark_visited(outer_idx) candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)], z_threshold) nnDistance = np.inf innerRegistry = VisitRegistry(len(series) - win_size) inner_idx = innerRegistry.get_next_unvisited() while ~np.isnan(inner_idx): innerRegistry.mark_visited(inner_idx) if abs(inner_idx - outer_idx) > win_size: curr_seq = znorm(series[inner_idx:(inner_idx + win_size)], z_threshold) dist = early_abandoned_dist(candidate_seq, curr_seq, nnDistance) if (~np.isnan(dist)) and (dist < nnDistance): nnDistance = dist inner_idx = innerRegistry.get_next_unvisited() if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance): best_so_far_distance = nnDistance best_so_far_index = outer_idx outer_idx = outerRegistry.get_next_unvisited() return (best_so_far_index)
def find_best_discord_brute_force(series, win_size, global_registry, znorms): """Early-abandoned distance-based discord discovery.""" best_so_far_distance = -1.0 best_so_far_index = -1 outer_registry = global_registry.clone() outer_idx = outer_registry.get_next_unvisited() while ~np.isnan(outer_idx): outer_registry.mark_visited(outer_idx) candidate_seq = znorms[outer_idx] nn_distance = np.inf inner_registry = VisitRegistry(len(series) - win_size + 1) inner_idx = inner_registry.get_next_unvisited() while ~np.isnan(inner_idx): inner_registry.mark_visited(inner_idx) if abs(inner_idx - outer_idx) >= win_size: curr_seq = znorms[inner_idx] dist = early_abandoned_euclidean(candidate_seq, curr_seq, nn_distance) if (~np.isnan(dist)) and (dist < nn_distance): nn_distance = dist inner_idx = inner_registry.get_next_unvisited() if ~(np.inf == nn_distance) and (nn_distance > best_so_far_distance): best_so_far_distance = nn_distance best_so_far_index = outer_idx outer_idx = outer_registry.get_next_unvisited() return best_so_far_index, best_so_far_distance
def test_sizing(): """Test the registry.""" reg = VisitRegistry(77) assert 77 == reg.get_unvisited_count() reg.mark_visited(0) assert 76 == reg.get_unvisited_count() reg.mark_visited_range(70, 77) assert 69 == reg.get_unvisited_count() reg.mark_visited(0) assert 69 == reg.get_unvisited_count() reg.mark_visited(1) assert 68 == reg.get_unvisited_count() reg.mark_visited(reg.get_next_unvisited()) assert 67 == reg.get_unvisited_count() reg.mark_visited_range(0, 77) assert np.isnan(reg.get_next_unvisited())
def find_best_discord_brute_force(series, win_size, global_registry, z_threshold=0.01): """Early-abandoned distance-based discord discovery.""" best_so_far_distance = -1.0 best_so_far_index = -1 outerRegistry = global_registry.clone() # 随机找到一个未看过的index outer_idx = outerRegistry.get_next_unvisited() # 若 outer_idx 不是nan值 则进入循环 ~按位取反 while ~np.isnan(outer_idx): # 标记看过outer_idx outerRegistry.mark_visited(outer_idx) # 标准化候选子序列 开始索引outer_indx 结束索引outer_idx+win_size-1 candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)], z_threshold) # 与candidate_seq的开始索引相距不小于窗口大小的 开始索引所代表的子序列 与其的最小距离 (两子序列形状相似) nnDistance = np.inf # 为什么不是 len(series) - win_size + 1 ??? innerRegistry = VisitRegistry(len(series) - win_size) inner_idx = innerRegistry.get_next_unvisited() # 遍历所有开始索引 在两子序列距离大于窗口大小的条件下 找到与candidate_seq的最近距离nnDistance while ~np.isnan(inner_idx): innerRegistry.mark_visited(inner_idx) # 若 inner_indx 与 outer_idx 距离 大于 窗口大小 即两子序列不能有重复部分且不相邻 if abs(inner_idx - outer_idx) > win_size: curr_seq = znorm(series[inner_idx:(inner_idx + win_size)], z_threshold) # 计算 标准化后两序列的欧式距离 dist = early_abandoned_dist(candidate_seq, curr_seq, nnDistance) # 更新 nnDistance 使其逐渐变小 if (~np.isnan(dist)) and (dist < nnDistance): nnDistance = dist inner_idx = innerRegistry.get_next_unvisited() # 更新 best_so_far_distance 和 best_so_far_index """ best_so_far_distance max(min(distance)) 相似性最小的子序列 与 距离最近的子序列 的距离 best_so_far_index 当前时间序列的异常子序列 开始索引 这段子序列在当前时间序列中与其他子序列的相似性是最小的 """ if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance): best_so_far_distance = nnDistance best_so_far_index = outer_idx outer_idx = outerRegistry.get_next_unvisited() return (best_so_far_index, best_so_far_distance)
def clone(self): """Make the array's copy.""" clone = VisitRegistry(self.capacity) setattr(clone, 'visit_array', self.visit_array.copy()) setattr(clone, 'unvisited_count', self.unvisited_count) return clone