Example #1
0
    def search(self, content, query, window_frac=None):
        # type: (Iterable[float], Sequence[float], Union[float, None]) -> Tuple[int, float, dict]
        """
        Search `query` in `content`, find its nearest match period;
        returning that period's starting index and DTW_distance and running details

        :param content: a sequence of floats, which can be used to compute distance by `self.dist_cb()`
        :param query: a (typically) shorter sequence than content, which need to be searched for
        :param window_frac: overwrite object variable if needed; see `__init__()` for detail
        :return: tuple of: location, DTW_distance, running_details_as_dict
        """
        Q = len(query)
        self.best_so_far = INF  # reset best cost for a new search
        q_norm = StandardScaler().fit_transform(
            query[:, None]).flatten()  # z-norm the q

        # create envelops for normalized query (viz. LB_Keogh_EQ)
        window_size = int(Q * (window_frac or self.window_frac))
        q_norm_L, q_norm_U = self._lower_upper_lemire(q_norm, r=window_size)
        q_argidx = q_norm.__abs__().argsort()[::-1]  # decreasing order
        q_norm_dec, q_norm_L_dec, q_norm_U_dec = q_norm[q_argidx], q_norm_L[
            q_argidx], q_norm_U[q_argidx]

        idx_buf = 0
        done = False
        prune_cnt = Counter(kim=0, eg=0,
                            ec=0)  # pruning counters for each phase
        while not done:
            # use the last `m-1` points if available
            self.buffer = [] if idx_buf == 0 else self.buffer[-(Q - 1):]
            self.buffer += seq(content).take(self.reset_period -
                                             len(self.buffer)).to_list()
            # CAUTION: `self.buffer` is huge, DO NOT PUT IT INNER LOOP
            buf_L, buf_U = self._lower_upper_lemire(
                self.buffer, r=window_size)  # for calc LB_Keogh_EC

            if len(self.buffer) <= Q - 1:
                break

            C_stat = MovingStatistics(
            )  # start calculating online z-norm for points in buffer
            # a circular array for keeping current content region; double size for avoiding "%" operator
            C = np.zeros(Q * 2)  # candidate C sequence

            for idx_p, p in enumerate(self.buffer):
                C_stat.feed(p)
                C[(idx_p % Q) + Q] = C[idx_p % Q] = p
                if idx_p < Q - 1:
                    continue

                C_stat.snapshot()
                i = (idx_p + 1) % Q  # index in C

                # ----- LB_KimFL
                lb_kim = self._lb_kim_hierarchy(C, i, C_stat, q_norm)
                if lb_kim >= self.best_so_far:
                    prune_cnt['kim'] += 1
                    # reduce obsolute points from sum and sum square
                    C_stat.drop(
                        C[i]
                    )  # recall: `i = (idx_p + 1) % Q` ; circularly map to the left neighbor
                    continue  # CAUTION: DO NOT FORGET TO `drop` BEFORE `continue`

                # ----- LB_Keogh_EQ
                lb_keogh_eg, cb_eg = self._lb_keogh_online(C_stat,
                                                           q_argidx,
                                                           q_norm_L_dec,
                                                           q_norm_U_dec,
                                                           C=C[i:])
                if lb_keogh_eg >= self.best_so_far:
                    prune_cnt['eg'] += 1
                    C_stat.drop(C[i])
                    continue

                # ----- LB_Keogh_EC
                idx_in_query = idx_p - (
                    Q - 1)  # start location of the data in `query`
                lb_keogh_ec, cb_ec = self._lb_keogh_online(
                    C_stat,
                    q_argidx,  # CAUTION: keep ordered beforehand
                    buf_L[idx_in_query:][q_argidx],
                    buf_U[idx_in_query:][q_argidx],
                    q_norm=q_norm)
                if lb_keogh_ec >= self.best_so_far:
                    prune_cnt['ec'] += 1
                    C_stat.drop(C[i])
                    continue

                # ----- DTW
                # backward cumsum cb_eg & cb_ec to use in early abandoning DTW
                cb_backcum = np.cumsum((cb_ec if lb_keogh_ec > lb_keogh_eg else
                                        cb_eg)[::-1])[::-1]
                c = self.dtw_distance(C_stat.znorm(C[i:i + Q]),
                                      q_norm,
                                      max_stray=window_size,
                                      cb_backcum=cb_backcum)
                if c < self.best_so_far:
                    self.best_so_far = c
                    self.loc = idx_buf * (self.reset_period - Q +
                                          1) + idx_p - Q + 1
                C_stat.drop(C[i])
                logger.debug((idx_buf, idx_p, c, self.best_so_far))

            # if idx_buf >= 2: # for debug
            #     done = True
            if len(self.buffer) < self.reset_period:
                done = True
            else:
                idx_buf += 1

            logger.info("#################### %d %d ####################",
                        idx_buf, len(self.buffer))

        n_scanned = idx_buf * (self.reset_period - Q + 1) + len(self.buffer)
        result_json = {
            "location": self.loc,
            "dtw_distance": np.sqrt(self.best_so_far),
            "n_scanned: ": n_scanned,
            "n_prunes": prune_cnt,
            "n_calc_dtw": (n_scanned - sum(prune_cnt.values()))
        }
        return self.loc, np.sqrt(self.best_so_far), result_json