def _batch_compute(args): """ Internal function to compute a batch of the time series in parallel. Parameters ---------- args : tuple Various attributes used for computing the batch. ( batch_start : int The starting index for this batch. batch_end : int The ending index for this batch. ts : array_like The time series to compute the matrix profile for. query : array_like The query. window_size : int The size of the window to compute the profile over. data_length : int The number of elements in the time series. profile_length : int The number of elements that will be in the final matrix profile. exclusion_zone : int Used to exclude trivial matches. is_join : bool Flag to indicate if an AB join or self join is occuring. data_mu : array_like The moving average over the time series for the given window size. data_sig : array_like The moving standard deviation over the time series for the given window size. first_product : array_like The first sliding dot product for the time series over index 0 to window_size. skip_locs : array_like Indices that should be skipped for distance profile calculation due to a nan or inf. ) Returns ------- dict : profile The matrix profile, left and right matrix profiles and their respective profile indices. >>> { >>> 'mp': The matrix profile, >>> 'pi': The matrix profile 1NN indices, >>> 'rmp': The right matrix profile, >>> 'rpi': The right matrix profile 1NN indices, >>> 'lmp': The left matrix profile, >>> 'lpi': The left matrix profile 1NN indices, >>> } """ batch_start, batch_end, ts, query, window_size, data_length, \ profile_length, exclusion_zone, is_join, data_mu, data_sig, \ first_product, skip_locs = args # initialize matrices matrix_profile = np.full(profile_length, np.inf) profile_index = np.full(profile_length, 0) left_matrix_profile = None right_matrix_profile = None left_profile_index = None right_profile_index = None if not is_join: left_matrix_profile = np.copy(matrix_profile) right_matrix_profile = np.copy(matrix_profile) left_profile_index = np.copy(profile_index) right_profile_index = np.copy(profile_index) # with batch 0 we do not need to recompute the dot product # however with other batch windows, we need the previous iterations sliding # dot product last_product = None if batch_start is 0: first_window = query[batch_start:batch_start + window_size] last_product = np.copy(first_product) else: first_window = query[batch_start - 1:batch_start + window_size - 1] last_product = core.fft_convolve(ts, first_window) query_sum = np.sum(first_window) query_2sum = np.sum(first_window**2) query_mu, query_sig = core.moving_avg_std(first_window, window_size) drop_value = first_window[0] # only compute the distance profile for index 0 and update if batch_start is 0: distance_profile = core.distance_profile(last_product, window_size, data_mu, data_sig, query_mu, query_sig) # apply exclusion zone distance_profile = core.apply_exclusion_zone(exclusion_zone, is_join, window_size, data_length, 0, distance_profile) # update the matrix profile indices = (distance_profile < matrix_profile) matrix_profile[indices] = distance_profile[indices] profile_index[indices] = 0 batch_start += 1 # make sure to compute inclusively from batch start to batch end # otherwise there are gaps in the profile if batch_end < profile_length: batch_end += 1 # iteratively compute distance profile and update with element-wise mins for i in range(batch_start, batch_end): # check for nan or inf and skip if skip_locs[i]: continue query_window = query[i:i + window_size] query_sum = query_sum - drop_value + query_window[-1] query_2sum = query_2sum - drop_value**2 + query_window[-1]**2 query_mu = query_sum / window_size query_sig2 = query_2sum / window_size - query_mu**2 query_sig = np.sqrt(query_sig2) last_product[1:] = last_product[0:data_length - window_size] \ - ts[0:data_length - window_size] * drop_value \ + ts[window_size:] * query_window[-1] last_product[0] = first_product[i] drop_value = query_window[0] distance_profile = core.distance_profile(last_product, window_size, data_mu, data_sig, query_mu, query_sig) # apply the exclusion zone distance_profile = core.apply_exclusion_zone(exclusion_zone, is_join, window_size, data_length, i, distance_profile) # update the matrix profile indices = (distance_profile < matrix_profile) matrix_profile[indices] = distance_profile[indices] profile_index[indices] = i # update the left and right matrix profiles if not is_join: # find differences, shift left and update indices = distance_profile[i:] < left_matrix_profile[i:] falses = np.zeros(i).astype('bool') indices = np.append(falses, indices) left_matrix_profile[indices] = distance_profile[indices] left_profile_index[np.argwhere(indices)] = i # find differences, shift right and update indices = distance_profile[0:i] < right_matrix_profile[0:i] falses = np.zeros(profile_length - i).astype('bool') indices = np.append(indices, falses) right_matrix_profile[indices] = distance_profile[indices] right_profile_index[np.argwhere(indices)] = i return { 'mp': matrix_profile, 'pi': profile_index, 'rmp': right_matrix_profile, 'rpi': right_profile_index, 'lmp': left_matrix_profile, 'lpi': left_profile_index, }
def _batch_compute(args): """ Internal function to compute a batch of the time series in parallel. Parameters ---------- args : tuple Various attributes used for computing the batch. ( batch_start : int The starting index for this batch. batch_end : int The ending index for this batch. ts : array_like The time series to compute the matrix profile for. query : array_like The query. window_size : int The size of the window to compute the profile over. data_length : int The number of elements in the time series. profile_length : int The number of elements that will be in the final matrix profile. exclusion_zone : int Used to exclude trivial matches. data_mu : array_like The moving average over the time series for the given window size. data_sig : array_like The moving standard deviation over the time series for the given window size. first_product : array_like The first sliding dot product for the time series over index 0 to window_size. skip_locs : array_like Indices that should be skipped for distance profile calculation due to a nan or inf. ) Returns ------- dict : profile The matrix profile, left and right matrix profiles and their respective profile indices. >>> { >>> 'mp': The matrix profile, >>> 'pi': The matrix profile 1NN indices, >>> 'rmp': The right matrix profile, >>> 'rpi': The right matrix profile 1NN indices, >>> 'lmp': The left matrix profile, >>> 'lpi': The left matrix profile 1NN indices, >>> } """ num_dim, batch_start, batch_end, ts, query, window_size, data_length, \ profile_length, exclusion_zone, data_mu, data_sig, \ first_product, skip_locs, profile_dimension, return_dimension = args # initialize matrices matrix_profile = np.full((num_dim, profile_length), np.inf) profile_index = np.full((num_dim, profile_length), 0) left_matrix_profile = None right_matrix_profile = None left_profile_index = None right_profile_index = None left_matrix_profile = np.copy(matrix_profile) right_matrix_profile = np.copy(matrix_profile) left_profile_index = np.copy(profile_index) right_profile_index = np.copy(profile_index) # with batch 0 we do not need to recompute the dot product # however with other batch windows, we need the previous iterations sliding # dot product last_product = np.copy(first_product) if batch_start is 0: first_window = query[:, batch_start:batch_start + window_size] else: first_window = query[:, batch_start - 1:batch_start + window_size - 1] for i in range(num_dim): last_product[i, :] = core.fft_convolve(ts[i, :], first_window[i, :]) query_sum = np.sum(first_window, axis=1) query_2sum = np.sum(first_window**2, axis=1) query_mu, query_sig = np.empty(num_dim), np.empty(num_dim) for i in range(num_dim): query_mu[i], query_sig[i] = core.moving_avg_std(first_window[i, :], window_size) drop_value = np.empty(num_dim) for i in range(num_dim): drop_value[i] = first_window[i, 0] distance_profile = np.empty((num_dim, profile_length)) # make sure to compute inclusively from batch start to batch end # otherwise there are gaps in the profile if batch_end < profile_length: batch_end += 1 # iteratively compute distance profile and update with element-wise mins for i in range(batch_start, batch_end): # check for nan or inf and skip if skip_locs[i]: continue for j in range(num_dim): if i == 0: query_window = query[j, i:i + window_size] distance_profile[j, :] = core.distance_profile(last_product[j, :], window_size, data_mu[j, :], data_sig[j, :], query_mu[j], query_sig[j]) # apply exclusion zone distance_profile[j, :] = core.apply_exclusion_zone(exclusion_zone, 0, window_size, data_length, 0, distance_profile[j, :]) else: query_window = query[j, i:i + window_size] query_sum[j] = query_sum[j] - drop_value[j] + query_window[-1] query_2sum[j] = query_2sum[j] - drop_value[j]**2 + query_window[-1]**2 query_mu[j] = query_sum[j] / window_size query_sig2 = query_2sum[j] / window_size - query_mu[j]**2 if query_sig2 < _EPS: query_sig2 = _EPS query_sig[j] = np.sqrt(query_sig2) last_product[j, 1:] = last_product[j, 0:data_length - window_size] \ - ts[j, 0:data_length - window_size] * drop_value[j] \ + ts[j, window_size:] * query_window[-1] last_product[j, 0] = first_product[j, i] distance_profile[j, :] = core.distance_profile(last_product[j, :], window_size, data_mu[j, :], data_sig[j, :], query_mu[j], query_sig[j]) # apply the exclusion zone distance_profile[j, :] = core.apply_exclusion_zone(exclusion_zone, 0, window_size, data_length, i, distance_profile[j, :]) distance_profile[j, distance_profile[j, :] < _EPS] = 0 drop_value[j] = query_window[0] if np.any(query_sig < _EPS): continue distance_profile[:, skip_locs] = np.inf distance_profile[data_sig < np.sqrt(_EPS)] = np.inf distance_profile_dim = np.argsort(distance_profile, axis=0) distance_profile_sort = np.sort(distance_profile, axis=0) distance_profile_cumsum = np.zeros(profile_length) for j in range(num_dim): distance_profile_cumsum += distance_profile_sort[j, :] distance_profile_mean = distance_profile_cumsum / (j + 1) # update the matrix profile indices = (distance_profile_mean < matrix_profile[j, :]) matrix_profile[j, indices] = distance_profile_mean[indices] profile_index[j, indices] = i if return_dimension: profile_dimension[j][:, indices] = distance_profile_dim[:j + 1, indices] # update the left and right matrix profiles # find differences, shift left and update indices = distance_profile_mean[i:] < left_matrix_profile[j, i:] falses = np.zeros(i).astype('bool') indices = np.append(falses, indices) left_matrix_profile[j, indices] = distance_profile_mean[indices] left_profile_index[j, np.argwhere(indices)] = i # find differences, shift right and update indices = distance_profile_mean[0:i] < right_matrix_profile[j, 0:i] falses = np.zeros(profile_length - i).astype('bool') indices = np.append(indices, falses) right_matrix_profile[j, indices] = distance_profile_mean[indices] right_profile_index[j, np.argwhere(indices)] = i return { 'mp': matrix_profile, 'pi': profile_index, 'pd': profile_dimension, 'rmp': right_matrix_profile, 'rpi': right_profile_index, 'lmp': left_matrix_profile, 'lpi': left_profile_index, }