Example #1
0
def pick_mp(profile, window):
    """
    Utility function that extracts a MatrixProfile from a Pan-MatrixProfile
    placing it into the MatrixProfile data structure.

    Parameters
    ----------
    profile : dict
        A Pan-MatrixProfile data structure.
    window : int
        The specific window size used to compute the desired MatrixProfile.

    Returns
    -------
    dict : profile
        A MatrixProfile data structure.

    Raises
    ------
    ValueError
        If profile is not a Pan-MatrixProfile data structure.
        If window is not an integer.
        If desired MatrixProfile is not found based on window.

    """

    if not core.is_pmp_obj(profile):
        raise ValueError('pluck_mp expects profile as a PMP data structure!')

    if not isinstance(window, int):
        raise ValueError('pluck_mp expects window to be an int!')

    mp_profile = empty_mp()

    # find the window index
    windows = profile.get('windows')
    window_index = np.argwhere(windows == window)

    if len(window_index) < 1:
        raise RuntimeError(
            'Unable to find window {} in the provided PMP!'.format(window))

    window_index = window_index.flatten()[0]

    window = windows[window_index]
    mp = profile['pmp'][window_index]
    n = len(mp)
    mp_profile['mp'] = mp[0:n - window + 1]
    mp_profile['pi'] = profile['pmpi'][window_index][0:n - window + 1]
    mp_profile['metric'] = profile['metric']
    mp_profile['data']['ts'] = profile['data']['ts']
    mp_profile['join'] = False
    mp_profile['w'] = int(window)
    mp_profile['ez'] = int(np.floor(windows[window_index] / 4))
    mp_profile['algorithm'] = 'mpx'

    return mp_profile
Example #2
0
def visualize(profile):
    """
    Automatically creates plots for the provided data structure. In some cases
    many plots are created. For example, when a MatrixProfile is passed with
    corresponding motifs and discords, the matrix profile, discords and motifs
    will be plotted.

    Parameters
    ----------
    profile : dict_like
        A MatrixProfile, Pan-MatrixProfile or Statistics data structure.

    Returns
    -------
    list : figures
        A list of matplotlib figures.

    """
    figures = []

    if not is_visualizable(profile):
        raise ValueError(
            'MatrixProfile, Pan-MatrixProfile or Statistics data structure expected!'
        )

    # plot MP
    if core.is_mp_obj(profile):
        figures = __combine(figures, plot_mp(profile))

        if 'cmp' in profile and len(profile['cmp']) > 0:
            figures = __combine(figures, plot_cmp_mp(profile))

        if 'av' in profile and len(profile['av']) > 0:
            figures = __combine(figures, plot_av_mp(profile))

        if 'motifs' in profile and len(profile['motifs']) > 0:
            figures = __combine(figures, plot_motifs_mp(profile))

        if 'discords' in profile and len(profile['discords']) > 0:
            figures = __combine(figures, plot_discords_mp(profile))

    # plot PMP
    if core.is_pmp_obj(profile):
        figures = __combine(figures, plot_pmp(profile))

        if 'motifs' in profile and len(profile['motifs']) > 0:
            figures = __combine(figures, plot_motifs_pmp(profile))

        if 'discords' in profile and len(profile['discords']) > 0:
            figures = __combine(figures, plot_discords_pmp(profile))

    # plot stats
    if core.is_stats_obj(profile):
        figures = __combine(figures, plot_stats(profile))

    return figures
Example #3
0
def from_json(profile):
    """
    Converts a JSON formatted string into a profile data structure.

    Parameters
    ----------
    profile : str
        The profile as a JSON formatted string.

    Returns
    -------
    profile : dict_like
        A MatrixProfile or Pan-MatrixProfile data structure.
    """
    dct = json.load(profile)

    # handle pmp and convert to appropriate types
    if core.is_pmp_obj(dct):
        dct['pmp'] = np.array(dct['pmp'], dtype='float64')
        dct['pmpi'] = np.array(dct['pmpi'], dtype=int)
        dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64')
        dct['windows'] = np.array(dct['windows'], dtype=int)

    # handle mp
    elif core.is_mp_obj(dct):
        dct['mp'] = np.array(dct['mp'], dtype='float64')
        dct['pi'] = np.array(dct['pi'], dtype=int)

        has_l = isinstance(dct['lmp'], list)
        has_l = has_l and isinstance(dct['lpi'], list)

        if has_l:
            dct['lmp'] = np.array(dct['lmp'], dtype='float64')
            dct['lpi'] = np.array(dct['lpi'], dtype=int)

        has_r = isinstance(dct['rmp'], list)
        has_r = has_r and isinstance(dct['rpi'], list)

        if has_r:
            dct['rmp'] = np.array(dct['rmp'], dtype='float64')
            dct['rpi'] = np.array(dct['rpi'], dtype=int)

        dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64')

        if isinstance(dct['data']['query'], list):
            dct['data']['query'] = np.array(dct['data']['query'],
                                            dtype='float64')
    else:
        raise ValueError('File is not of type profile!')

    return dct
Example #4
0
def is_visualizable(obj):
    """
    Helper function to determine if the passed in object can be visualized or
    not based on the data structure.

    Parameters
    ----------
    obj : Object
        The object to test.

    Returns
    -------
    A list of matplotlib figures.
    """
    return core.is_mp_obj(obj) or core.is_pmp_obj(obj) or core.is_stats_obj(obj)
Example #5
0
def get_windows(profile):
    """
    Utility function to format the windows from a profile structure ensuring
    that the windows are in an array.

    Parameters
    ----------
    profile : dict
        The MatrixProfile or PMP profile.

    Returns
    -------
    list :
        The window(s) in a list.
    """
    windows = []

    if core.is_mp_obj(profile):
        windows.append(profile.get('w'))
    elif core.is_pmp_obj(profile):
        windows = profile.get('windows')

    return windows
def pmp_top_k_discords(profile, exclusion_zone=None, k=3):
    """
    Computes the top K discords for the given Pan-MatrixProfile. The return
    values is a list of row by col indices.

    Notes
    -----
    This algorithm is written to work with Euclidean distance. If you submit
    a PMP of Pearson metrics, then it is first converted to Euclidean.

    Parameters
    ----------
    profile : dict
        Data structure from a PMP algorithm.
    exclusion_zone : int, Default window / 2
        The zone to exclude around the found discords to reduce trivial
        findings. By default we use the row-wise window / 2.
    k : int
        Maximum number of discords to find.

    Returns
    -------
    dict : profile
        A 2D array of indices. The first column corresponds to the row index
        and the second column corresponds to the column index of the 
        submitted PMP. It is placed back on the original object passed in as
        'discords' key.

    """
    if not core.is_pmp_obj(profile):
        raise ValueError('Expecting PMP data structure!')

    # this function requires euclidean distance
    # convert if the metric is pearson
    metric = profile.get('metric', None)
    pmp = profile.get('pmp', None)
    windows = profile.get('windows', None)
    
    tmp = None
    if metric == 'pearson':
        tmp = core.pearson_to_euclidean(pmp, windows)
    else:
        tmp = np.copy(pmp).astype('d')        
    
    # replace nan and infs with -infinity
    # for whatever reason numpy argmax finds infinity as max so
    # this is a way to get around it by converting to -infinity
    tmp[core.nan_inf_indices(tmp)] = -np.inf
            
    # iterate finding the max value k times or until negative
    # infinity is obtained
    found = []
    
    for _ in range(k):
        max_idx = np.unravel_index(np.argmax(tmp), tmp.shape)
        window = windows[max_idx[0]]
        
        if tmp[max_idx] == -np.inf:
            break
        
        found.append(max_idx)
        
        # apply exclusion zone
        # the exclusion zone is based on 1/2 of the window size
        # used to compute that specific matrix profile
        n = tmp[max_idx[0]].shape[0]
        if exclusion_zone is None:
            exclusion_zone = int(np.floor(window / 2))

        ez_start = np.max([0, max_idx[1] - exclusion_zone])
        ez_stop = np.min([n, max_idx[1] + exclusion_zone])
        tmp[max_idx[0]][ez_start:ez_stop] = -np.inf
    
    profile['discords'] = np.array(found)

    return profile
Example #7
0
def test_is_pmp_obj():
    assert (True == core.is_pmp_obj({'class': 'PMP'}))
    assert (False == core.is_pmp_obj('s'))
    assert (False == core.is_pmp_obj({}))
Example #8
0
def pmp_top_k_motifs(profile,
                     exclusion_zone=None,
                     k=3,
                     max_neighbors=10,
                     radius=3):
    """
    Find the top K number of motifs (patterns) given a pan matrix profile. By
    default the algorithm will find up to 3 motifs (k) and up to 10 of their
    neighbors with a radius of 3 * min_dist.

    Parameters
    ----------
    profile : dict
        The output from one of the pan matrix profile algorithms.
    exclusion_zone : int, Default to algorithm ez
        Desired number of values to exclude on both sides of the motif. This
        avoids trivial matches. It defaults to half of the computed window
        size. Setting the exclusion zone to 0 makes it not apply.
    k : int, Default = 3
        Desired number of motifs to find.
    neighbor_count : int, Default = 10
        The maximum number of neighbors to include for a given motif.
    radius : int, Default = 3
        The radius is used to associate a neighbor by checking if the
        neighbor's distance is less than or equal to dist * radius

    Returns
    -------
    The original input obj with the addition of the "motifs" key. The motifs
    key consists of the following structure.

    A list of dicts containing motif indices and their corresponding neighbor
    indices. Note that each index is a (row, col) index corresponding to the
    pan matrix profile.

    [
        {
            'motifs': [first_index, second_index],
            'neighbors': [index, index, index ...max_neighbors]
        }
    ]
    """
    if not core.is_pmp_obj(profile):
        raise ValueError('Expecting PMP data structure!')

    data = profile.get('data', None)
    ts = data.get('ts', None)
    data_len = len(ts)

    pmp = profile.get('pmp', None)
    profile_len = pmp.shape[1]
    pmpi = profile.get('pmpi', None)
    windows = profile.get('windows', None)

    # make sure we are working with Euclidean distances
    tmp = None
    if core.is_pearson_array(pmp):
        tmp = core.pearson_to_euclidean(pmp, windows)
    else:
        tmp = np.copy(pmp).astype('d')

    # replace nan and infs with infinity
    tmp[core.nan_inf_indices(tmp)] = np.inf

    motifs = []
    for _ in range(k):
        min_idx = np.unravel_index(np.argmin(tmp), tmp.shape)
        min_dist = tmp[min_idx]

        # nothing else to find...
        if core.is_nan_inf(min_dist):
            break

        # create the motif pair
        min_row_idx = min_idx[0]
        min_col_idx = min_idx[1]

        # motif pairs are respective to the column of the matching row
        first_idx = np.min([min_col_idx, pmpi[min_row_idx][min_col_idx]])
        second_idx = np.max([min_col_idx, pmpi[min_row_idx][min_col_idx]])

        # compute distance profile for first appearance
        window_size = windows[min_row_idx]
        query = ts[first_idx:first_idx + window_size]
        distance_profile = mass2(ts, query)

        # extend the distance profile to be as long as the original
        infs = np.full(profile_len - len(distance_profile), np.inf)
        distance_profile = np.append(distance_profile, infs)

        # exclude already picked motifs and neighbors
        mask = core.nan_inf_indices(pmp[min_row_idx])
        distance_profile[mask] = np.inf

        # determine the exclusion zone if not set
        if not exclusion_zone:
            exclusion_zone = int(np.floor(window_size / 2))

        # apply exclusion zone for motif pair
        for j in (first_idx, second_idx):
            distance_profile = core.apply_exclusion_zone(
                exclusion_zone, False, window_size, data_len, j,
                distance_profile)
            tmp2 = core.apply_exclusion_zone(exclusion_zone, False,
                                             window_size, data_len, j,
                                             tmp[min_row_idx])
            tmp[min_row_idx] = tmp2

        # find up to max_neighbors
        neighbors = []
        for j in range(max_neighbors):
            neighbor_idx = np.argmin(distance_profile)
            neighbor_dist = np.real(distance_profile[neighbor_idx])
            not_in_radius = not ((radius * min_dist) >= neighbor_dist)

            # no more neighbors exist based on radius
            if core.is_nan_inf(neighbor_dist) or not_in_radius:
                break

            # add neighbor and apply exclusion zone
            neighbors.append((min_row_idx, neighbor_idx))
            distance_profile = core.apply_exclusion_zone(
                exclusion_zone, False, window_size, data_len, neighbor_idx,
                distance_profile)
            tmp2 = core.apply_exclusion_zone(exclusion_zone, False,
                                             window_size, data_len,
                                             neighbor_idx, tmp[min_row_idx])
            tmp[min_row_idx] = tmp2

        # add the motifs and neighbors
        # note that they are (row, col) indices
        motifs.append({
            'motifs': [(min_row_idx, first_idx), (min_row_idx, second_idx)],
            'neighbors':
            neighbors
        })

    profile['motifs'] = motifs

    return profile
Example #9
0
def profile_to_proto(profile):
    """
    Utility function that takes a MatrixProfile or PMP profile data structure
    and converts it to the MPFOutput protobuf message object.

    Parameters
    ----------
    profile : dict
        The profile to convert.

    Returns
    -------
    MPFOutput :
        The MPFOutput protobuf message object.
    """
    output = MPFOutput()

    # add higher level attributes that work for PMP and MP
    output.klass = profile.get('class')
    output.algorithm = profile.get('algorithm')
    output.metric = profile.get('metric')
    output.sample_pct = profile.get('sample_pct')

    # add time series data
    ts = profile.get('data').get('ts')
    query = profile.get('data').get('query')
    rows, cols, data = get_matrix_attributes(ts)
    output.ts.rows = rows
    output.ts.cols = cols
    output.ts.data.extend(data)

    # add query data
    query = profile.get('data').get('query')
    rows, cols, data = get_matrix_attributes(query)

    if rows and cols and core.is_array_like(data):
        output.query.rows = rows
        output.query.cols = cols
        output.query.data.extend(data)

    # add window(s)
    output.windows.extend(get_windows(profile))

    # add motifs
    motifs = profile.get('motifs')
    if not isinstance(motifs, type(None)):
        for motif in motifs:
            output.motifs.append(get_proto_motif(motif))

    # add discords
    discords = profile.get('discords')
    if not isinstance(discords, type(None)):
        for discord in discords:
            output.discords.append(get_proto_discord(discord))

    # add cmp
    cmp = profile.get('cmp')
    if not isinstance(cmp, type(None)):
        rows, cols, data = get_matrix_attributes(cmp)

        output.cmp.rows = rows
        output.cmp.cols = cols
        output.cmp.data.extend(data)

    # add av
    av = profile.get('av')
    if not isinstance(av, type(None)):
        rows, cols, data = get_matrix_attributes(av)

        output.av.rows = rows
        output.av.cols = cols
        output.av.data.extend(data)

    # add av_type
    av_type = profile.get('av_type')
    if not isinstance(av_type, type(None)) and len(av_type) > 0:
        output.av_type = av_type

    # add the matrix profile specific attributes
    if core.is_mp_obj(profile):
        output.mp.ez = profile.get('ez')
        output.mp.join = profile.get('join')

        # add mp
        rows, cols, data = get_matrix_attributes(profile.get('mp'))
        output.mp.mp.rows = rows
        output.mp.mp.cols = cols
        output.mp.mp.data.extend(data)

        # add pi
        rows, cols, data = get_matrix_attributes(profile.get('pi'))
        output.mp.pi.rows = rows
        output.mp.pi.cols = cols
        output.mp.pi.data.extend(data)

        # add lmp
        rows, cols, data = get_matrix_attributes(profile.get('lmp'))
        if rows and cols and core.is_array_like(data):
            output.mp.lmp.rows = rows
            output.mp.lmp.cols = cols
            output.mp.lmp.data.extend(data)

        # add lpi
        rows, cols, data = get_matrix_attributes(profile.get('lpi'))
        if rows and cols and core.is_array_like(data):
            output.mp.lpi.rows = rows
            output.mp.lpi.cols = cols
            output.mp.lpi.data.extend(data)

        # add rmp
        rows, cols, data = get_matrix_attributes(profile.get('rmp'))
        if rows and cols and core.is_array_like(data):
            output.mp.rmp.rows = rows
            output.mp.rmp.cols = cols
            output.mp.rmp.data.extend(data)

        # add rpi
        rows, cols, data = get_matrix_attributes(profile.get('rpi'))
        if rows and cols and core.is_array_like(data):
            output.mp.rpi.rows = rows
            output.mp.rpi.cols = cols
            output.mp.rpi.data.extend(data)

    # add the pan matrix profile specific attributes
    elif core.is_pmp_obj(profile):
        # add pmp
        rows, cols, data = get_matrix_attributes(profile.get('pmp'))
        output.pmp.pmp.rows = rows
        output.pmp.pmp.cols = cols
        output.pmp.pmp.data.extend(data)

        # add pmpi
        rows, cols, data = get_matrix_attributes(profile.get('pmpi'))
        output.pmp.pmpi.rows = rows
        output.pmp.pmpi.cols = cols
        output.pmp.pmpi.data.extend(data)

    else:
        raise ValueError('Expecting Pan-MatrixProfile or MatrixProfile!')

    return output