Example #1
0
 def __init__(self, t_dataset_path: str, validation=False):
     """
     :param t_dataset_path: original dataset path
     """
     self.norm_ds_path = t_dataset_path
     self.norm_ds = DatasetH5(t_dataset_path)
     self.UB = np.full(shape=(len(self.norm_ds), len(self.norm_ds)),
                       fill_value=sys.maxsize,
                       dtype="float32",
                       order="C")
     self.LB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)),
                        dtype="float32",
                        order="C")
     self.CB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)),
                        dtype="b1",
                        order="C")
     self.cache = [None] * len(self.norm_ds)
     self.logger = logging.getLogger("Correlation2")
     if validation:
         self.c = PearsonCorrelation(self.norm_ds_path)
     self.validation = validation
 def __init__(self, t_dataset_path: str, validation=False):
     """
     :param t_dataset_path: original dataset path
     """
     self.norm_ds_path = t_dataset_path
     self.norm_ds = DatasetH5(t_dataset_path)
     self.UB = np.full(
         shape=(len(self.norm_ds), len(self.norm_ds)), fill_value=sys.maxsize, dtype="float32", order="C"
     )
     self.LB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)), dtype="float32", order="C")
     self.CB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)), dtype="b1", order="C")
     self.cache = [None] * len(self.norm_ds)
     self.logger = logging.getLogger("Correlation2")
     if validation:
         self.c = PearsonCorrelation(self.norm_ds_path)
     self.validation = validation
def corr(args):
    if args.alg == 0:
        c = PearsonCorrelation(args.h5database)
        corr_matrix = c.find_correlations()
        if args.out is not None:
            with open(args.out, 'wb') as f:
                pickle.dump(corr_matrix, f)
    elif args.alg == 1:
        c = FourierApproximation(args.h5database)
        corr_matrix = c.find_correlations(args.k, args.T, args.B, args.e)
        if args.out is not None:
            with open(args.out, 'wb') as f:
                pickle.dump(corr_matrix, f)
    elif args.alg == 2:
        c = BooleanCorrelation(args.h5database, args.validate)
        boolean_corr_matrix = c.boolean_approximation(args.T)
        if args.out is not None:
            with open(args.out, 'wb') as f:
                pickle.dump(boolean_corr_matrix, f)
def corr(args):
    if args.alg == 0:
        c = PearsonCorrelation(args.h5database)
        corr_matrix = c.find_correlations()
        if args.out is not None:
            with open(args.out, 'wb') as f:
                pickle.dump(corr_matrix, f)
    elif args.alg == 1:
        c = FourierApproximation(args.h5database)
        corr_matrix = c.find_correlations(args.k, args.T, args.B, args.e)
        if args.out is not None:
            with open(args.out, 'wb') as f:
                pickle.dump(corr_matrix, f)
    elif args.alg == 2:
        c = BooleanCorrelation(args.h5database, args.validate)
        boolean_corr_matrix = c.boolean_approximation(args.T)
        if args.out is not None:
            with open(args.out, 'wb') as f:
                pickle.dump(boolean_corr_matrix, f)
Example #5
0
class BooleanCorrelation:
    def __init__(self, t_dataset_path: str, validation=False):
        """
        :param t_dataset_path: original dataset path
        """
        self.norm_ds_path = t_dataset_path
        self.norm_ds = DatasetH5(t_dataset_path)
        self.UB = np.full(shape=(len(self.norm_ds), len(self.norm_ds)),
                          fill_value=sys.maxsize,
                          dtype="float32",
                          order="C")
        self.LB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)),
                           dtype="float32",
                           order="C")
        self.CB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)),
                           dtype="b1",
                           order="C")
        self.cache = [None] * len(self.norm_ds)
        self.logger = logging.getLogger("Correlation2")
        if validation:
            self.c = PearsonCorrelation(self.norm_ds_path)
        self.validation = validation

    def get_ts(self, i):
        if self.cache[i] is None:
            self.cache[i] = self.norm_ds[i].value

        return self.cache[i]

    def boolean_approximation(self, T: float):
        m = len(self.norm_ds[0])
        n = len(self.norm_ds)
        theta = np.sqrt(2 * m * (1 - T))

        self.logger.debug("m: %d  n: %d  theta:%f" % (m, n, theta))

        UB = self.UB
        LB = self.LB
        CB = self.CB
        d = self.d

        self.logger.debug("Processing diagonal... (n: %d)" % n)

        for i in range(n - 1):
            # self.logger.debug("Processing %d,%d..." % (i, i + 1))
            ed = d(i, i + 1)
            UB[i, i + 1] = LB[i, i + 1] = ed
            # self.logger.debug("%f <= %f" % (ed, theta))
            if ed <= theta:
                CB[i, i + 1] = 1
                if self.validation and self.c.corr(i, i + 1) < T:
                    print("[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)" %
                          (i, i + 1, self.c.corr(
                              i, i + 1), CB[i, i + 1], ed, theta))
            else:
                if self.validation and self.c.corr(i, i + 1) >= T:
                    print("[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)" %
                          (i, i + 1, self.c.corr(
                              i, i + 1), CB[i, i + 1], ed, theta))
        self.logger.debug("Initial Processing of diagonal finished")
        s = 0
        total = 0
        for k in range(2, n):
            self.logger.debug("Processing diagonal %d/%d..." % (k, n - 1))
            for i in range(n - k):
                total += 1
                j = i + k
                UB[i, j] = min([UB[i, u] + UB[u, j] for u in range(i + 1, j)])
                LB[i, j] = max([
                    max(LB[i, u] - UB[u, j], LB[u, j] - UB[i, u])
                    for u in range(i + 1, j)
                ])
                if UB[i, j] <= theta:
                    CB[i, j] = 1
                    if self.validation and self.c.corr(i, j) < T:
                        print("[%d,%d]:%f bool:%d  (UB)%f <= %f(theta)" %
                              (i, j, self.c.corr(
                                  i, j), CB[i, j], UB[i, j], theta))
                elif LB[i, j] > theta:
                    CB[i, j] = 0
                    if self.validation and self.c.corr(i, j) >= T:
                        print("[%d,%d]:%f bool:%d  (LB)%f > %f(theta)" %
                              (i, j, self.c.corr(
                                  i, j), CB[i, j], LB[i, j], theta))
                else:
                    s += 1
                    ed = d(i, j)
                    UB[i, j] = LB[i, j] = ed
                    if ed <= theta:
                        CB[i, j] = 1
                        if self.validation and self.c.corr(i, j) < T:
                            print(
                                "[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)" %
                                (i, j, self.c.corr(i, j), CB[i, j], ed, theta))
                    else:
                        if self.validation and self.c.corr(i, j) >= T:
                            print(
                                "[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)" %
                                (i, j, self.c.corr(i, j), CB[i, j], ed, theta))
        self.logger.debug("Exact distance computations: %d/%d" % (s, total))
        self.logger.debug("Avg Euclidean distance computation time: %.3f ms" %
                          (BooleanCorrelation.avg * 1000))
        return CB

    avg = 0
    n = 0

    def d(self, t1: int, t2: int):
        BooleanCorrelation.n += 1
        ts1 = self.get_ts(t1)
        ts2 = self.get_ts(t2)
        begin = time.time()
        euclidean_distance = np.linalg.norm(ts1 - ts2)
        end = time.time()
        dur = end - begin
        BooleanCorrelation.avg = (
            BooleanCorrelation.n - 1
        ) * BooleanCorrelation.avg / BooleanCorrelation.n + dur / BooleanCorrelation.n

        return euclidean_distance
class BooleanCorrelation:
    def __init__(self, t_dataset_path: str, validation=False):
        """
        :param t_dataset_path: original dataset path
        """
        self.norm_ds_path = t_dataset_path
        self.norm_ds = DatasetH5(t_dataset_path)
        self.UB = np.full(
            shape=(len(self.norm_ds), len(self.norm_ds)), fill_value=sys.maxsize, dtype="float32", order="C"
        )
        self.LB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)), dtype="float32", order="C")
        self.CB = np.zeros(shape=(len(self.norm_ds), len(self.norm_ds)), dtype="b1", order="C")
        self.cache = [None] * len(self.norm_ds)
        self.logger = logging.getLogger("Correlation2")
        if validation:
            self.c = PearsonCorrelation(self.norm_ds_path)
        self.validation = validation

    def get_ts(self, i):
        if self.cache[i] is None:
            self.cache[i] = self.norm_ds[i].value

        return self.cache[i]

    def boolean_approximation(self, T: float):
        m = len(self.norm_ds[0])
        n = len(self.norm_ds)
        theta = np.sqrt(2 * m * (1 - T))

        self.logger.debug("m: %d  n: %d  theta:%f" % (m, n, theta))

        UB = self.UB
        LB = self.LB
        CB = self.CB
        d = self.d

        self.logger.debug("Processing diagonal... (n: %d)" % n)

        for i in range(n - 1):
            # self.logger.debug("Processing %d,%d..." % (i, i + 1))
            ed = d(i, i + 1)
            UB[i, i + 1] = LB[i, i + 1] = ed
            # self.logger.debug("%f <= %f" % (ed, theta))
            if ed <= theta:
                CB[i, i + 1] = 1
                if self.validation and self.c.corr(i, i + 1) < T:
                    print(
                        "[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)"
                        % (i, i + 1, self.c.corr(i, i + 1), CB[i, i + 1], ed, theta)
                    )
            else:
                if self.validation and self.c.corr(i, i + 1) >= T:
                    print(
                        "[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)"
                        % (i, i + 1, self.c.corr(i, i + 1), CB[i, i + 1], ed, theta)
                    )
        self.logger.debug("Initial Processing of diagonal finished")
        s = 0
        total = 0
        for k in range(2, n):
            self.logger.debug("Processing diagonal %d/%d..." % (k, n - 1))
            for i in range(n - k):
                total += 1
                j = i + k
                UB[i, j] = min([UB[i, u] + UB[u, j] for u in range(i + 1, j)])
                LB[i, j] = max([max(LB[i, u] - UB[u, j], LB[u, j] - UB[i, u]) for u in range(i + 1, j)])
                if UB[i, j] <= theta:
                    CB[i, j] = 1
                    if self.validation and self.c.corr(i, j) < T:
                        print(
                            "[%d,%d]:%f bool:%d  (UB)%f <= %f(theta)"
                            % (i, j, self.c.corr(i, j), CB[i, j], UB[i, j], theta)
                        )
                elif LB[i, j] > theta:
                    CB[i, j] = 0
                    if self.validation and self.c.corr(i, j) >= T:
                        print(
                            "[%d,%d]:%f bool:%d  (LB)%f > %f(theta)"
                            % (i, j, self.c.corr(i, j), CB[i, j], LB[i, j], theta)
                        )
                else:
                    s += 1
                    ed = d(i, j)
                    UB[i, j] = LB[i, j] = ed
                    if ed <= theta:
                        CB[i, j] = 1
                        if self.validation and self.c.corr(i, j) < T:
                            print(
                                "[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)"
                                % (i, j, self.c.corr(i, j), CB[i, j], ed, theta)
                            )
                    else:
                        if self.validation and self.c.corr(i, j) >= T:
                            print(
                                "[%d,%d]:%f bool:%d  (ed)%f <= %f(theta)"
                                % (i, j, self.c.corr(i, j), CB[i, j], ed, theta)
                            )
        self.logger.debug("Exact distance computations: %d/%d" % (s, total))
        self.logger.debug("Avg Euclidean distance computation time: %.3f ms" % (BooleanCorrelation.avg * 1000))
        return CB

    avg = 0
    n = 0

    def d(self, t1: int, t2: int):
        BooleanCorrelation.n += 1
        ts1 = self.get_ts(t1)
        ts2 = self.get_ts(t2)
        begin = time.time()
        euclidean_distance = np.linalg.norm(ts1 - ts2)
        end = time.time()
        dur = end - begin
        BooleanCorrelation.avg = (
            BooleanCorrelation.n - 1
        ) * BooleanCorrelation.avg / BooleanCorrelation.n + dur / BooleanCorrelation.n

        return euclidean_distance