예제 #1
0
def find_best_discord_brute_force(series,
                                  win_size,
                                  global_registry,
                                  z_threshold=0.01):
    """Early-abandoned distance-based discord discovery."""
    best_so_far_distance = -1.0
    best_so_far_index = -1

    outerRegistry = global_registry.clone()

    outer_idx = outerRegistry.get_next_unvisited()

    while ~np.isnan(outer_idx):

        outerRegistry.mark_visited(outer_idx)

        candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)],
                              z_threshold)

        nnDistance = np.inf
        innerRegistry = VisitRegistry(len(series) - win_size)

        inner_idx = innerRegistry.get_next_unvisited()

        while ~np.isnan(inner_idx):
            innerRegistry.mark_visited(inner_idx)

            if abs(inner_idx - outer_idx) > win_size:

                curr_seq = znorm(series[inner_idx:(inner_idx + win_size)],
                                 z_threshold)
                dist = early_abandoned_dist(candidate_seq, curr_seq,
                                            nnDistance)

                if (~np.isnan(dist)) and (dist < nnDistance):
                    nnDistance = dist

            inner_idx = innerRegistry.get_next_unvisited()

        if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance):
            best_so_far_distance = nnDistance
            best_so_far_index = outer_idx

        outer_idx = outerRegistry.get_next_unvisited()

    return (best_so_far_index)
예제 #2
0
파일: discord.py 프로젝트: kedark/saxpy
def find_best_discord_brute_force(series, win_size, global_registry, znorms):
    """Early-abandoned distance-based discord discovery."""
    best_so_far_distance = -1.0
    best_so_far_index = -1

    outer_registry = global_registry.clone()

    outer_idx = outer_registry.get_next_unvisited()

    while ~np.isnan(outer_idx):

        outer_registry.mark_visited(outer_idx)

        candidate_seq = znorms[outer_idx]

        nn_distance = np.inf
        inner_registry = VisitRegistry(len(series) - win_size + 1)

        inner_idx = inner_registry.get_next_unvisited()

        while ~np.isnan(inner_idx):
            inner_registry.mark_visited(inner_idx)

            if abs(inner_idx - outer_idx) >= win_size:

                curr_seq = znorms[inner_idx]

                dist = early_abandoned_euclidean(candidate_seq, curr_seq,
                                                 nn_distance)

                if (~np.isnan(dist)) and (dist < nn_distance):
                    nn_distance = dist

            inner_idx = inner_registry.get_next_unvisited()

        if ~(np.inf == nn_distance) and (nn_distance > best_so_far_distance):
            best_so_far_distance = nn_distance
            best_so_far_index = outer_idx

        outer_idx = outer_registry.get_next_unvisited()

    return best_so_far_index, best_so_far_distance
예제 #3
0
def test_sizing():
    """Test the registry."""
    reg = VisitRegistry(77)
    assert 77 == reg.get_unvisited_count()

    reg.mark_visited(0)
    assert 76 == reg.get_unvisited_count()

    reg.mark_visited_range(70, 77)
    assert 69 == reg.get_unvisited_count()

    reg.mark_visited(0)
    assert 69 == reg.get_unvisited_count()
    reg.mark_visited(1)
    assert 68 == reg.get_unvisited_count()

    reg.mark_visited(reg.get_next_unvisited())
    assert 67 == reg.get_unvisited_count()

    reg.mark_visited_range(0, 77)
    assert np.isnan(reg.get_next_unvisited())
예제 #4
0
def find_best_discord_brute_force(series,
                                  win_size,
                                  global_registry,
                                  z_threshold=0.01):
    """Early-abandoned distance-based discord discovery."""
    best_so_far_distance = -1.0
    best_so_far_index = -1

    outerRegistry = global_registry.clone()

    # 随机找到一个未看过的index
    outer_idx = outerRegistry.get_next_unvisited()

    # 若 outer_idx 不是nan值 则进入循环 ~按位取反
    while ~np.isnan(outer_idx):

        # 标记看过outer_idx
        outerRegistry.mark_visited(outer_idx)

        # 标准化候选子序列 开始索引outer_indx 结束索引outer_idx+win_size-1
        candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)],
                              z_threshold)

        # 与candidate_seq的开始索引相距不小于窗口大小的 开始索引所代表的子序列 与其的最小距离 (两子序列形状相似)
        nnDistance = np.inf

        # 为什么不是 len(series) - win_size + 1 ???
        innerRegistry = VisitRegistry(len(series) - win_size)

        inner_idx = innerRegistry.get_next_unvisited()

        # 遍历所有开始索引 在两子序列距离大于窗口大小的条件下 找到与candidate_seq的最近距离nnDistance
        while ~np.isnan(inner_idx):
            innerRegistry.mark_visited(inner_idx)

            # 若 inner_indx 与 outer_idx 距离 大于 窗口大小 即两子序列不能有重复部分且不相邻
            if abs(inner_idx - outer_idx) > win_size:

                curr_seq = znorm(series[inner_idx:(inner_idx + win_size)],
                                 z_threshold)

                # 计算 标准化后两序列的欧式距离
                dist = early_abandoned_dist(candidate_seq, curr_seq,
                                            nnDistance)

                # 更新 nnDistance 使其逐渐变小
                if (~np.isnan(dist)) and (dist < nnDistance):
                    nnDistance = dist

            inner_idx = innerRegistry.get_next_unvisited()

        # 更新 best_so_far_distance 和 best_so_far_index
        """ 
        best_so_far_distance
            max(min(distance))
            相似性最小的子序列 与 距离最近的子序列 的距离
        
        best_so_far_index
            当前时间序列的异常子序列 开始索引
            这段子序列在当前时间序列中与其他子序列的相似性是最小的
        """
        if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance):
            best_so_far_distance = nnDistance
            best_so_far_index = outer_idx

        outer_idx = outerRegistry.get_next_unvisited()

    return (best_so_far_index, best_so_far_distance)