Beispiel #1
0
def _batch_compute(args):
    """
    Internal function to compute a batch of the time series in parallel.

    Parameters
    ----------
    args : tuple
        Various attributes used for computing the batch.
        (
            batch_start : int
                The starting index for this batch.
            batch_end : int
                The ending index for this batch.
            ts : array_like
                The time series to compute the matrix profile for.
            query : array_like
                The query.
            window_size : int
                The size of the window to compute the profile over.
            data_length : int
                The number of elements in the time series.
            profile_length : int
                The number of elements that will be in the final matrix
                profile.
            exclusion_zone : int
                Used to exclude trivial matches.
            is_join : bool
                Flag to indicate if an AB join or self join is occuring.
            data_mu : array_like
                The moving average over the time series for the given window
                size.
            data_sig : array_like
                The moving standard deviation over the time series for the
                given window size.
            first_product : array_like
                The first sliding dot product for the time series over index
                0 to window_size.
            skip_locs : array_like
                Indices that should be skipped for distance profile calculation
                due to a nan or inf.
        )

    Returns
    -------
    dict : profile
        The matrix profile, left and right matrix profiles and their respective
        profile indices.

        >>> {
        >>>     'mp': The matrix profile,
        >>>     'pi': The matrix profile 1NN indices,
        >>>     'rmp': The right matrix profile,
        >>>     'rpi': The right matrix profile 1NN indices,
        >>>     'lmp': The left matrix profile,
        >>>     'lpi': The left matrix profile 1NN indices,
        >>> }

    """
    batch_start, batch_end, ts, query, window_size, data_length, \
    profile_length, exclusion_zone, is_join, data_mu, data_sig, \
    first_product, skip_locs = args

    # initialize matrices
    matrix_profile = np.full(profile_length, np.inf)
    profile_index = np.full(profile_length, 0)

    left_matrix_profile = None
    right_matrix_profile = None
    left_profile_index = None
    right_profile_index = None

    if not is_join:
        left_matrix_profile = np.copy(matrix_profile)
        right_matrix_profile = np.copy(matrix_profile)
        left_profile_index = np.copy(profile_index)
        right_profile_index = np.copy(profile_index)

    # with batch 0 we do not need to recompute the dot product
    # however with other batch windows, we need the previous iterations sliding
    # dot product
    last_product = None
    if batch_start is 0:
        first_window = query[batch_start:batch_start + window_size]
        last_product = np.copy(first_product)
    else:
        first_window = query[batch_start - 1:batch_start + window_size - 1]
        last_product = core.fft_convolve(ts, first_window)

    query_sum = np.sum(first_window)
    query_2sum = np.sum(first_window**2)
    query_mu, query_sig = core.moving_avg_std(first_window, window_size)

    drop_value = first_window[0]

    # only compute the distance profile for index 0 and update
    if batch_start is 0:
        distance_profile = core.distance_profile(last_product, window_size,
                                                 data_mu, data_sig, query_mu,
                                                 query_sig)

        # apply exclusion zone
        distance_profile = core.apply_exclusion_zone(exclusion_zone, is_join,
                                                     window_size, data_length,
                                                     0, distance_profile)

        # update the matrix profile
        indices = (distance_profile < matrix_profile)
        matrix_profile[indices] = distance_profile[indices]
        profile_index[indices] = 0

        batch_start += 1

    # make sure to compute inclusively from batch start to batch end
    # otherwise there are gaps in the profile
    if batch_end < profile_length:
        batch_end += 1

    # iteratively compute distance profile and update with element-wise mins
    for i in range(batch_start, batch_end):

        # check for nan or inf and skip
        if skip_locs[i]:
            continue

        query_window = query[i:i + window_size]
        query_sum = query_sum - drop_value + query_window[-1]
        query_2sum = query_2sum - drop_value**2 + query_window[-1]**2
        query_mu = query_sum / window_size
        query_sig2 = query_2sum / window_size - query_mu**2
        query_sig = np.sqrt(query_sig2)
        last_product[1:] = last_product[0:data_length - window_size] \
            - ts[0:data_length - window_size] * drop_value \
            + ts[window_size:] * query_window[-1]
        last_product[0] = first_product[i]
        drop_value = query_window[0]

        distance_profile = core.distance_profile(last_product, window_size,
                                                 data_mu, data_sig, query_mu,
                                                 query_sig)

        # apply the exclusion zone
        distance_profile = core.apply_exclusion_zone(exclusion_zone, is_join,
                                                     window_size, data_length,
                                                     i, distance_profile)

        # update the matrix profile
        indices = (distance_profile < matrix_profile)
        matrix_profile[indices] = distance_profile[indices]
        profile_index[indices] = i

        # update the left and right matrix profiles
        if not is_join:
            # find differences, shift left and update
            indices = distance_profile[i:] < left_matrix_profile[i:]
            falses = np.zeros(i).astype('bool')
            indices = np.append(falses, indices)
            left_matrix_profile[indices] = distance_profile[indices]
            left_profile_index[np.argwhere(indices)] = i

            # find differences, shift right and update
            indices = distance_profile[0:i] < right_matrix_profile[0:i]
            falses = np.zeros(profile_length - i).astype('bool')
            indices = np.append(indices, falses)
            right_matrix_profile[indices] = distance_profile[indices]
            right_profile_index[np.argwhere(indices)] = i

    return {
        'mp': matrix_profile,
        'pi': profile_index,
        'rmp': right_matrix_profile,
        'rpi': right_profile_index,
        'lmp': left_matrix_profile,
        'lpi': left_profile_index,
    }
Beispiel #2
0
def _batch_compute(args):
    """
    Internal function to compute a batch of the time series in parallel.

    Parameters
    ----------
    args : tuple
        Various attributes used for computing the batch.
        (
            batch_start : int
                The starting index for this batch.
            batch_end : int
                The ending index for this batch.
            ts : array_like
                The time series to compute the matrix profile for.
            query : array_like
                The query.
            window_size : int
                The size of the window to compute the profile over.
            data_length : int
                The number of elements in the time series.
            profile_length : int
                The number of elements that will be in the final matrix
                profile.
            exclusion_zone : int
                Used to exclude trivial matches.
            data_mu : array_like
                The moving average over the time series for the given window
                size.
            data_sig : array_like
                The moving standard deviation over the time series for the
                given window size.
            first_product : array_like
                The first sliding dot product for the time series over index
                0 to window_size.
            skip_locs : array_like
                Indices that should be skipped for distance profile calculation
                due to a nan or inf.
        )

    Returns
    -------
    dict : profile
        The matrix profile, left and right matrix profiles and their respective
        profile indices.

        >>> {
        >>>     'mp': The matrix profile,
        >>>     'pi': The matrix profile 1NN indices,
        >>>     'rmp': The right matrix profile,
        >>>     'rpi': The right matrix profile 1NN indices,
        >>>     'lmp': The left matrix profile,
        >>>     'lpi': The left matrix profile 1NN indices,
        >>> }

    """
    num_dim, batch_start, batch_end, ts, query, window_size, data_length, \
    profile_length, exclusion_zone, data_mu, data_sig, \
    first_product, skip_locs, profile_dimension, return_dimension = args

    # initialize matrices
    matrix_profile = np.full((num_dim, profile_length), np.inf)
    profile_index = np.full((num_dim, profile_length), 0)

    left_matrix_profile = None
    right_matrix_profile = None
    left_profile_index = None
    right_profile_index = None

    left_matrix_profile = np.copy(matrix_profile)
    right_matrix_profile = np.copy(matrix_profile)
    left_profile_index = np.copy(profile_index)
    right_profile_index = np.copy(profile_index)

    # with batch 0 we do not need to recompute the dot product
    # however with other batch windows, we need the previous iterations sliding
    # dot product
    last_product = np.copy(first_product)
    if batch_start is 0:
        first_window = query[:, batch_start:batch_start + window_size]
    else:
        first_window = query[:, batch_start - 1:batch_start + window_size - 1]
        for i in range(num_dim):
            last_product[i, :] = core.fft_convolve(ts[i, :], first_window[i, :])

    query_sum = np.sum(first_window, axis=1)
    query_2sum = np.sum(first_window**2, axis=1)
    query_mu, query_sig = np.empty(num_dim), np.empty(num_dim)
    for i in range(num_dim):
        query_mu[i], query_sig[i] = core.moving_avg_std(first_window[i, :], window_size)

    drop_value = np.empty(num_dim)
    for i in range(num_dim):
        drop_value[i] = first_window[i, 0]
    distance_profile = np.empty((num_dim, profile_length))

    # make sure to compute inclusively from batch start to batch end
    # otherwise there are gaps in the profile
    if batch_end < profile_length:
        batch_end += 1

    # iteratively compute distance profile and update with element-wise mins
    for i in range(batch_start, batch_end):
        # check for nan or inf and skip
        if skip_locs[i]:
            continue
        for j in range(num_dim):
            if i == 0:
                query_window = query[j, i:i + window_size]
                distance_profile[j, :] = core.distance_profile(last_product[j, :], window_size, data_mu[j, :],
                                                               data_sig[j, :], query_mu[j], query_sig[j])

                # apply exclusion zone
                distance_profile[j, :] = core.apply_exclusion_zone(exclusion_zone, 0, window_size, data_length, 0,
                                                                   distance_profile[j, :])
            else:
                query_window = query[j, i:i + window_size]
                query_sum[j] = query_sum[j] - drop_value[j] + query_window[-1]
                query_2sum[j] = query_2sum[j] - drop_value[j]**2 + query_window[-1]**2
                query_mu[j] = query_sum[j] / window_size
                query_sig2 = query_2sum[j] / window_size - query_mu[j]**2
                if query_sig2 < _EPS:
                    query_sig2 = _EPS
                query_sig[j] = np.sqrt(query_sig2)
                last_product[j, 1:] = last_product[j, 0:data_length - window_size] \
                - ts[j, 0:data_length - window_size] * drop_value[j] \
                + ts[j, window_size:] * query_window[-1]
                last_product[j, 0] = first_product[j, i]

                distance_profile[j, :] = core.distance_profile(last_product[j, :], window_size, data_mu[j, :],
                                                               data_sig[j, :], query_mu[j], query_sig[j])

                # apply the exclusion zone
                distance_profile[j, :] = core.apply_exclusion_zone(exclusion_zone, 0, window_size, data_length, i,
                                                                   distance_profile[j, :])
            distance_profile[j, distance_profile[j, :] < _EPS] = 0
            drop_value[j] = query_window[0]
        if np.any(query_sig < _EPS):
            continue
        distance_profile[:, skip_locs] = np.inf
        distance_profile[data_sig < np.sqrt(_EPS)] = np.inf

        distance_profile_dim = np.argsort(distance_profile, axis=0)
        distance_profile_sort = np.sort(distance_profile, axis=0)
        distance_profile_cumsum = np.zeros(profile_length)
        for j in range(num_dim):
            distance_profile_cumsum += distance_profile_sort[j, :]
            distance_profile_mean = distance_profile_cumsum / (j + 1)

            # update the matrix profile
            indices = (distance_profile_mean < matrix_profile[j, :])
            matrix_profile[j, indices] = distance_profile_mean[indices]
            profile_index[j, indices] = i
            if return_dimension:
                profile_dimension[j][:, indices] = distance_profile_dim[:j + 1, indices]

            # update the left and right matrix profiles
            # find differences, shift left and update
            indices = distance_profile_mean[i:] < left_matrix_profile[j, i:]
            falses = np.zeros(i).astype('bool')
            indices = np.append(falses, indices)
            left_matrix_profile[j, indices] = distance_profile_mean[indices]
            left_profile_index[j, np.argwhere(indices)] = i

            # find differences, shift right and update
            indices = distance_profile_mean[0:i] < right_matrix_profile[j, 0:i]
            falses = np.zeros(profile_length - i).astype('bool')
            indices = np.append(indices, falses)
            right_matrix_profile[j, indices] = distance_profile_mean[indices]
            right_profile_index[j, np.argwhere(indices)] = i
    return {
        'mp': matrix_profile,
        'pi': profile_index,
        'pd': profile_dimension,
        'rmp': right_matrix_profile,
        'rpi': right_profile_index,
        'lmp': left_matrix_profile,
        'lpi': left_profile_index,
    }