Esempio n. 1
0
def test_early_abandoned():
    """Test euclidean distance."""
    assert pytest.approx(np.sqrt(2), 0.0000001) ==\
        distance.early_abandoned_dist(np.array([1., 1.]),
                                      np.array([2., 2.]), np.inf)

    a = np.array([0.5, 0.8, 0.9])
    b = np.array([-0.15, 0.38, 0.92])
    assert pytest.approx(0.7741447, 0.0000001) ==\
        distance.early_abandoned_dist(a, b, np.inf)

    assert 1 == np.isnan(distance.early_abandoned_dist(a, b, 0.1))
Esempio n. 2
0
def find_best_discord_brute_force(series,
                                  win_size,
                                  global_registry,
                                  z_threshold=0.01):
    """Early-abandoned distance-based discord discovery."""
    best_so_far_distance = -1.0
    best_so_far_index = -1

    outerRegistry = global_registry.clone()

    outer_idx = outerRegistry.get_next_unvisited()

    while ~np.isnan(outer_idx):

        outerRegistry.mark_visited(outer_idx)

        candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)],
                              z_threshold)

        nnDistance = np.inf
        innerRegistry = VisitRegistry(len(series) - win_size)

        inner_idx = innerRegistry.get_next_unvisited()

        while ~np.isnan(inner_idx):
            innerRegistry.mark_visited(inner_idx)

            if abs(inner_idx - outer_idx) > win_size:

                curr_seq = znorm(series[inner_idx:(inner_idx + win_size)],
                                 z_threshold)
                dist = early_abandoned_dist(candidate_seq, curr_seq,
                                            nnDistance)

                if (~np.isnan(dist)) and (dist < nnDistance):
                    nnDistance = dist

            inner_idx = innerRegistry.get_next_unvisited()

        if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance):
            best_so_far_distance = nnDistance
            best_so_far_index = outer_idx

        outer_idx = outerRegistry.get_next_unvisited()

    return (best_so_far_index)
Esempio n. 3
0
def find_best_discord_brute_force(series,
                                  win_size,
                                  global_registry,
                                  z_threshold=0.01):
    """Early-abandoned distance-based discord discovery."""
    best_so_far_distance = -1.0
    best_so_far_index = -1

    outerRegistry = global_registry.clone()

    # 随机找到一个未看过的index
    outer_idx = outerRegistry.get_next_unvisited()

    # 若 outer_idx 不是nan值 则进入循环 ~按位取反
    while ~np.isnan(outer_idx):

        # 标记看过outer_idx
        outerRegistry.mark_visited(outer_idx)

        # 标准化候选子序列 开始索引outer_indx 结束索引outer_idx+win_size-1
        candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)],
                              z_threshold)

        # 与candidate_seq的开始索引相距不小于窗口大小的 开始索引所代表的子序列 与其的最小距离 (两子序列形状相似)
        nnDistance = np.inf

        # 为什么不是 len(series) - win_size + 1 ???
        innerRegistry = VisitRegistry(len(series) - win_size)

        inner_idx = innerRegistry.get_next_unvisited()

        # 遍历所有开始索引 在两子序列距离大于窗口大小的条件下 找到与candidate_seq的最近距离nnDistance
        while ~np.isnan(inner_idx):
            innerRegistry.mark_visited(inner_idx)

            # 若 inner_indx 与 outer_idx 距离 大于 窗口大小 即两子序列不能有重复部分且不相邻
            if abs(inner_idx - outer_idx) > win_size:

                curr_seq = znorm(series[inner_idx:(inner_idx + win_size)],
                                 z_threshold)

                # 计算 标准化后两序列的欧式距离
                dist = early_abandoned_dist(candidate_seq, curr_seq,
                                            nnDistance)

                # 更新 nnDistance 使其逐渐变小
                if (~np.isnan(dist)) and (dist < nnDistance):
                    nnDistance = dist

            inner_idx = innerRegistry.get_next_unvisited()

        # 更新 best_so_far_distance 和 best_so_far_index
        """ 
        best_so_far_distance
            max(min(distance))
            相似性最小的子序列 与 距离最近的子序列 的距离
        
        best_so_far_index
            当前时间序列的异常子序列 开始索引
            这段子序列在当前时间序列中与其他子序列的相似性是最小的
        """
        if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance):
            best_so_far_distance = nnDistance
            best_so_far_index = outer_idx

        outer_idx = outerRegistry.get_next_unvisited()

    return (best_so_far_index, best_so_far_distance)