def search(self, content, query, window_frac=None): # type: (Iterable[float], Sequence[float], Union[float, None]) -> Tuple[int, float, dict] """ Search `query` in `content`, find its nearest match period; returning that period's starting index and DTW_distance and running details :param content: a sequence of floats, which can be used to compute distance by `self.dist_cb()` :param query: a (typically) shorter sequence than content, which need to be searched for :param window_frac: overwrite object variable if needed; see `__init__()` for detail :return: tuple of: location, DTW_distance, running_details_as_dict """ Q = len(query) self.best_so_far = INF # reset best cost for a new search q_norm = StandardScaler().fit_transform( query[:, None]).flatten() # z-norm the q # create envelops for normalized query (viz. LB_Keogh_EQ) window_size = int(Q * (window_frac or self.window_frac)) q_norm_L, q_norm_U = self._lower_upper_lemire(q_norm, r=window_size) q_argidx = q_norm.__abs__().argsort()[::-1] # decreasing order q_norm_dec, q_norm_L_dec, q_norm_U_dec = q_norm[q_argidx], q_norm_L[ q_argidx], q_norm_U[q_argidx] idx_buf = 0 done = False prune_cnt = Counter(kim=0, eg=0, ec=0) # pruning counters for each phase while not done: # use the last `m-1` points if available self.buffer = [] if idx_buf == 0 else self.buffer[-(Q - 1):] self.buffer += seq(content).take(self.reset_period - len(self.buffer)).to_list() # CAUTION: `self.buffer` is huge, DO NOT PUT IT INNER LOOP buf_L, buf_U = self._lower_upper_lemire( self.buffer, r=window_size) # for calc LB_Keogh_EC if len(self.buffer) <= Q - 1: break C_stat = MovingStatistics( ) # start calculating online z-norm for points in buffer # a circular array for keeping current content region; double size for avoiding "%" operator C = np.zeros(Q * 2) # candidate C sequence for idx_p, p in enumerate(self.buffer): C_stat.feed(p) C[(idx_p % Q) + Q] = C[idx_p % Q] = p if idx_p < Q - 1: continue C_stat.snapshot() i = (idx_p + 1) % Q # index in C # ----- LB_KimFL lb_kim = self._lb_kim_hierarchy(C, i, C_stat, q_norm) if lb_kim >= self.best_so_far: prune_cnt['kim'] += 1 # reduce obsolute points from sum and sum square C_stat.drop( C[i] ) # recall: `i = (idx_p + 1) % Q` ; circularly map to the left neighbor continue # CAUTION: DO NOT FORGET TO `drop` BEFORE `continue` # ----- LB_Keogh_EQ lb_keogh_eg, cb_eg = self._lb_keogh_online(C_stat, q_argidx, q_norm_L_dec, q_norm_U_dec, C=C[i:]) if lb_keogh_eg >= self.best_so_far: prune_cnt['eg'] += 1 C_stat.drop(C[i]) continue # ----- LB_Keogh_EC idx_in_query = idx_p - ( Q - 1) # start location of the data in `query` lb_keogh_ec, cb_ec = self._lb_keogh_online( C_stat, q_argidx, # CAUTION: keep ordered beforehand buf_L[idx_in_query:][q_argidx], buf_U[idx_in_query:][q_argidx], q_norm=q_norm) if lb_keogh_ec >= self.best_so_far: prune_cnt['ec'] += 1 C_stat.drop(C[i]) continue # ----- DTW # backward cumsum cb_eg & cb_ec to use in early abandoning DTW cb_backcum = np.cumsum((cb_ec if lb_keogh_ec > lb_keogh_eg else cb_eg)[::-1])[::-1] c = self.dtw_distance(C_stat.znorm(C[i:i + Q]), q_norm, max_stray=window_size, cb_backcum=cb_backcum) if c < self.best_so_far: self.best_so_far = c self.loc = idx_buf * (self.reset_period - Q + 1) + idx_p - Q + 1 C_stat.drop(C[i]) logger.debug((idx_buf, idx_p, c, self.best_so_far)) # if idx_buf >= 2: # for debug # done = True if len(self.buffer) < self.reset_period: done = True else: idx_buf += 1 logger.info("#################### %d %d ####################", idx_buf, len(self.buffer)) n_scanned = idx_buf * (self.reset_period - Q + 1) + len(self.buffer) result_json = { "location": self.loc, "dtw_distance": np.sqrt(self.best_so_far), "n_scanned: ": n_scanned, "n_prunes": prune_cnt, "n_calc_dtw": (n_scanned - sum(prune_cnt.values())) } return self.loc, np.sqrt(self.best_so_far), result_json