def mp_top_k_discords(profile, exclusion_zone=None, k=3):
    """
    Find the top K number of discords (anomalies) given a matrix profile,
    exclusion zone and the desired number of discords. The exclusion zone
    nullifies entries on the left and right side of the first and subsequent
    discords to remove non-trivial matches. More specifically, a discord found
    at location X will more than likely have additional discords to the left or
    right of it.

    Parameters
    ----------
    profile : dict
        The MatrixProfile data structure.
    exclusion_zone : int, Default mp algorithm ez
        Desired number of values to exclude on both sides of the anomaly.
    k : int
        Desired number of discords to find.

    Returns
    -------
    dict : profile
        The original input profile with an additional "discords" key containing
        the a np.ndarray of discord indices.

    """
    if not core.is_mp_obj(profile):
        raise ValueError('Expecting MP data structure!')

    found = []
    tmp = np.copy(profile.get('mp', None)).astype('d')
    n = len(tmp)

    # TODO: this is based on STOMP standards when this motif finding algorithm
    # originally came out. Should we default this to 4.0 instead? That seems
    # to be the common value now per new research.
    window_size = profile.get('w', None)
    if exclusion_zone is None:
        exclusion_zone = profile.get('ez', None)
    
    # obtain indices in ascending order
    indices = np.argsort(tmp)
    
    # created flipped view for discords
    indices = indices[::-1]

    for idx in indices:
        if not np.isinf(tmp[idx]):
            found.append(idx)

            # apply exclusion zone
            if exclusion_zone > 0:
                exclusion_zone_start = np.max([0, idx - exclusion_zone])
                exclusion_zone_end = np.min([n, idx + exclusion_zone])
                tmp[exclusion_zone_start:exclusion_zone_end] = np.inf

        if len(found) >= k:
            break


    profile['discords'] = np.array(found, dtype='int')

    return profile
Esempio n. 2
0
def test_is_mp_obj():
    assert (True == core.is_mp_obj({'class': 'MatrixProfile'}))
    assert (False == core.is_mp_obj('s'))
    assert (False == core.is_mp_obj({}))
Esempio n. 3
0
def profile_to_proto(profile):
    """
    Utility function that takes a MatrixProfile or PMP profile data structure
    and converts it to the MPFOutput protobuf message object.

    Parameters
    ----------
    profile : dict
        The profile to convert.

    Returns
    -------
    MPFOutput :
        The MPFOutput protobuf message object.
    """
    output = MPFOutput()

    # add higher level attributes that work for PMP and MP
    output.klass = profile.get('class')
    output.algorithm = profile.get('algorithm')
    output.metric = profile.get('metric')
    output.sample_pct = profile.get('sample_pct')

    # add time series data
    ts = profile.get('data').get('ts')
    query = profile.get('data').get('query')
    rows, cols, data = get_matrix_attributes(ts)
    output.ts.rows = rows
    output.ts.cols = cols
    output.ts.data.extend(data)

    # add query data
    query = profile.get('data').get('query')
    rows, cols, data = get_matrix_attributes(query)

    if rows and cols and core.is_array_like(data):
        output.query.rows = rows
        output.query.cols = cols
        output.query.data.extend(data)

    # add window(s)
    output.windows.extend(get_windows(profile))

    # add motifs
    motifs = profile.get('motifs')
    if not isinstance(motifs, type(None)):
        for motif in motifs:
            output.motifs.append(get_proto_motif(motif))

    # add discords
    discords = profile.get('discords')
    if not isinstance(discords, type(None)):
        for discord in discords:
            output.discords.append(get_proto_discord(discord))

    # add cmp
    cmp = profile.get('cmp')
    if not isinstance(cmp, type(None)):
        rows, cols, data = get_matrix_attributes(cmp)

        output.cmp.rows = rows
        output.cmp.cols = cols
        output.cmp.data.extend(data)

    # add av
    av = profile.get('av')
    if not isinstance(av, type(None)):
        rows, cols, data = get_matrix_attributes(av)

        output.av.rows = rows
        output.av.cols = cols
        output.av.data.extend(data)

    # add av_type
    av_type = profile.get('av_type')
    if not isinstance(av_type, type(None)) and len(av_type) > 0:
        output.av_type = av_type

    # add the matrix profile specific attributes
    if core.is_mp_obj(profile):
        output.mp.ez = profile.get('ez')
        output.mp.join = profile.get('join')

        # add mp
        rows, cols, data = get_matrix_attributes(profile.get('mp'))
        output.mp.mp.rows = rows
        output.mp.mp.cols = cols
        output.mp.mp.data.extend(data)

        # add pi
        rows, cols, data = get_matrix_attributes(profile.get('pi'))
        output.mp.pi.rows = rows
        output.mp.pi.cols = cols
        output.mp.pi.data.extend(data)

        # add lmp
        rows, cols, data = get_matrix_attributes(profile.get('lmp'))
        if rows and cols and core.is_array_like(data):
            output.mp.lmp.rows = rows
            output.mp.lmp.cols = cols
            output.mp.lmp.data.extend(data)

        # add lpi
        rows, cols, data = get_matrix_attributes(profile.get('lpi'))
        if rows and cols and core.is_array_like(data):
            output.mp.lpi.rows = rows
            output.mp.lpi.cols = cols
            output.mp.lpi.data.extend(data)

        # add rmp
        rows, cols, data = get_matrix_attributes(profile.get('rmp'))
        if rows and cols and core.is_array_like(data):
            output.mp.rmp.rows = rows
            output.mp.rmp.cols = cols
            output.mp.rmp.data.extend(data)

        # add rpi
        rows, cols, data = get_matrix_attributes(profile.get('rpi'))
        if rows and cols and core.is_array_like(data):
            output.mp.rpi.rows = rows
            output.mp.rpi.cols = cols
            output.mp.rpi.data.extend(data)

    # add the pan matrix profile specific attributes
    elif core.is_pmp_obj(profile):
        # add pmp
        rows, cols, data = get_matrix_attributes(profile.get('pmp'))
        output.pmp.pmp.rows = rows
        output.pmp.pmp.cols = cols
        output.pmp.pmp.data.extend(data)

        # add pmpi
        rows, cols, data = get_matrix_attributes(profile.get('pmpi'))
        output.pmp.pmpi.rows = rows
        output.pmp.pmpi.cols = cols
        output.pmp.pmpi.data.extend(data)

    else:
        raise ValueError('Expecting Pan-MatrixProfile or MatrixProfile!')

    return output
Esempio n. 4
0
def mp_top_k_motifs(profile,
                    exclusion_zone=None,
                    k=3,
                    max_neighbors=10,
                    radius=3):
    """
    Find the top K number of motifs (patterns) given a matrix profile. By
    default the algorithm will find up to 3 motifs (k) and up to 10 of their
    neighbors with a radius of 3 * min_dist.

    Parameters
    ----------
    profile : dict
        The output from one of the matrix profile algorithms.
    exclusion_zone : int, Default to algorithm ez
        Desired number of values to exclude on both sides of the motif. This
        avoids trivial matches. It defaults to half of the computed window
        size. Setting the exclusion zone to 0 makes it not apply.
    k : int, Default = 3
        Desired number of motifs to find.
    neighbor_count : int, Default = 10
        The maximum number of neighbors to include for a given motif.
    radius : int, Default = 3
        The radius is used to associate a neighbor by checking if the
        neighbor's distance is less than or equal to dist * radius

    Returns
    -------
    The original input obj with the addition of the "motifs" key. The motifs
    key consists of the following structure.

    A list of dicts containing motif indices and their corresponding neighbor
    indices.

    [
        {
            'motifs': [first_index, second_index],
            'neighbors': [index, index, index ...max_neighbors]
        }
    ]
    """
    if not core.is_mp_obj(profile):
        raise ValueError('Expecting MP data structure!')

    window_size = profile['w']
    data = profile.get('data', None)
    if data:
        ts = data.get('ts', None)

    data_len = len(ts)
    motifs = []
    mp = np.copy(profile['mp'])
    mpi = profile['pi']

    # TODO: this is based on STOMP standards when this motif finding algorithm
    # originally came out. Should we default this to 4.0 instead? That seems
    # to be the common value now per new research.
    if exclusion_zone is None:
        exclusion_zone = profile.get('ez', None)

    for i in range(k):
        min_idx = np.argmin(mp)
        min_dist = mp[min_idx]

        # we no longer have any motifs to find as all values are nan/inf
        if core.is_nan_inf(min_dist):
            break

        # create a motif pair corresponding to the first appearance and
        # second appearance
        first_idx = np.min([min_idx, mpi[min_idx]])
        second_idx = np.max([min_idx, mpi[min_idx]])

        # compute distance profile using mass2 for first appearance
        query = ts[first_idx:first_idx + window_size]
        distance_profile = mass2(ts, query)

        # exclude already picked motifs and neighbors
        mask = core.nan_inf_indices(mp)
        distance_profile[mask] = np.inf

        # apply exclusion zone for motif pair
        for j in (first_idx, second_idx):
            distance_profile = core.apply_exclusion_zone(
                exclusion_zone, False, window_size, data_len, j,
                distance_profile)
            mp = core.apply_exclusion_zone(exclusion_zone, False, window_size,
                                           data_len, j, mp)

        # find up to max_neighbors
        neighbors = []
        for j in range(max_neighbors):
            neighbor_idx = np.argmin(distance_profile)
            neighbor_dist = distance_profile[neighbor_idx]
            not_in_radius = not ((radius * min_dist) >= neighbor_dist)

            # no more neighbors exist based on radius
            if core.is_nan_inf(neighbor_dist) or not_in_radius:
                break

            # add neighbor and apply exclusion zone
            neighbors.append(neighbor_idx)
            distance_profile = core.apply_exclusion_zone(
                exclusion_zone, False, window_size, data_len, neighbor_idx,
                distance_profile)
            mp = core.apply_exclusion_zone(exclusion_zone, False, window_size,
                                           data_len, neighbor_idx, mp)

        # add motifs and neighbors to results
        motifs.append({
            'motifs': [first_idx, second_idx],
            'neighbors': neighbors
        })

    profile['motifs'] = motifs

    return profile
Esempio n. 5
0
def apply_av(profile, av="default", custom_av=None):
    """
    Utility function that returns a MatrixProfile data structure
    with a calculated annotation vector that has been applied
    to correct the matrix profile.

    Parameters
    ----------
    profile : dict
        A MatrixProfile structure.
    av : str, Default = "default"
        The type of annotation vector to apply.
    custom_av : array_like, Default = None
        Custom annotation vector (will only be applied if av is "custom").

    Returns
    -------
    dict : profile
        A MatrixProfile data structure with a calculated annotation vector
        and a corrected matrix profile.

    Raises
    ------
    ValueError
        If profile is not a MatrixProfile data structure.
        If custom_av parameter is not array-like when using a custom av.
        If av paramter is invalid.
        If lengths of annotation vector and matrix profile are different.
        If values in annotation vector are outside [0.0, 1.0].

    """
    if not core.is_mp_obj(profile):
        raise ValueError('apply_av expects profile as an MP data structure')

    temp_av = None
    av_type = None

    if av == "default":
        temp_av = make_default_av(profile['data']['ts'], profile['w'])
        av_type = av
    elif av == "complexity":
        temp_av = make_complexity_av(profile['data']['ts'], profile['w'])
        av_type = av
    elif av == "meanstd":
        temp_av = make_meanstd_av(profile['data']['ts'], profile['w'])
        av_type = av
    elif av == "clipping":
        temp_av = make_clipping_av(profile['data']['ts'], profile['w'])
        av_type = av
    elif av == "custom":
        try:
            temp_av = core.to_np_array(custom_av)
        except ValueError:
            raise ValueError('apply_av expects custom_av to be array-like')

        av_type = av
    else:
        raise ValueError("av parameter is invalid")

    if len(temp_av) != len(profile['mp']):
        raise ValueError("Lengths of annotation vector and mp are different")

    if (temp_av < 0.0).any() or (temp_av > 1.0).any():
        raise ValueError("Annotation vector values must be between 0 and 1")

    max_val = np.max(profile['mp'])
    temp_cmp = profile['mp'] + (np.ones(len(temp_av)) - temp_av) * max_val

    profile['cmp'] = temp_cmp
    profile['av'] = temp_av
    profile['av_type'] = av_type

    return profile