Example #1
def multi_knee(get_knee: typing.Callable, points: np.ndarray, t1: float = 0.99, t2: int = 3) -> np.ndarray:
    Wrapper that convert a single knee point detection into a multi knee point detector.

    It uses recursion on the left and right parts of the curve after detecting the current knee.

        get_knee (typing.Callable): method that returns a single knee point
        points (np.ndarray): numpy array with the points (x, y)
        t1 (float): the coefficient of determination used as a threshold (default 0.99)
        t2 (int): the mininum number of points used as a threshold (default 3)

        np.ndarray: knee points on the curve

    stack = [(0, len(points))]
    knees = []

    while stack:
        left, right = stack.pop()
        pt = points[left:right]
        if len(pt) > t2:
            coef = linear_fit_points(pt)
            if linear_r2_points(pt, coef) < t1:
                rv = get_knee(pt)
                if rv is not None:
                    idx = rv + left
                    stack.append((left, idx+1))
                    stack.append((idx+1, right))
    return np.array(knees)
Example #2
def multi_knee(get_knee: typing.Callable, points: np.ndarray, t1: float = 0.01, t2: int = 3, cost: lf.Linear_Metrics = lf.Linear_Metrics.rmspe) -> np.ndarray:
    Wrapper that convert a single knee point detection into a multi knee point detector.

    It uses recursion on the left and right parts of the curve after detecting the current knee.

        get_knee (typing.Callable): method that returns a single knee point
        points (np.ndarray): numpy array with the points (x, y)
        t1 (float): the coefficient of determination used as a threshold (default 0.01)
        t2 (int): the mininum number of points used as a threshold (default 3)
        cost (lf.Linear_Metrics): the cost method used to evaluate a point set (default: lf.Linear_Metrics.rmspe)

        np.ndarray: knee points on the curve

    stack = [(0, len(points))]
    knees = []

    while stack:
        left, right = stack.pop()
        pt = points[left:right]
        if len(pt) > t2:
            if len(pt) <= 2:
                if cost is lf.Linear_Metrics.rmspe:
                    r = 0.0
                    r = 1.0
                coef = lf.linear_fit_points(pt)
                if cost is lf.Linear_Metrics.rmspe:
                    r = lf.rmspe_points(pt, coef)
                    r = lf.linear_r2_points(pt, coef)

            curved = r >= t1 if cost is lf.Linear_Metrics.rmspe else r < t1

            #coef = lf.linear_fit_points(pt)
            # if lf.linear_r2_points(pt, coef) < t1:
            if curved:
                rv = get_knee(pt)
                if rv is not None:
                    idx = rv + left
                    stack.append((left, idx+1))
                    stack.append((idx+1, right))
    return np.array(knees)
Example #3
def compute_cost_sequence(points: np.ndarray,
                          cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd,
                          distance: RDP_Distance = RDP_Distance.shortest):
    # sort indexes

    # select the distance metric to be used
    distance_points = None
    if distance is RDP_Distance.shortest:
        distance_points = lf.shortest_distance_points
    elif distance is RDP_Distance.perpendicular:
        distance_points = lf.perpendicular_distance_points
        distance_points = lf.shortest_distance_points

    left = 0
    for right in reduced:
        pt = points[left:right]
        coef = lf.linear_fit_points(pt)
Example #4
def auto_knees(points: np.ndarray,
               t: float = 1.0,
               sensitivity: float = 1.0,
               p: PeakDetection = PeakDetection.Kneedle) -> np.ndarray:
    """Returns the index of the knees point based on the Kneedle method.

    This implementation uses an heuristic to automatically define
    the direction and rotation of the concavity.

    Furthermore, it support three different methods to select the 
    relevant knees:
    1. Kneedle    : classical algorithm
    2. Significant: significant knee peak detection
    3. ZScore     : significant knee peak detection based on zscore

        points (np.ndarray): numpy array with the points (x, y)
        t (float): tau of the side window used to smooth the curve
        sensitivity (float): controls the sensitivity of the peak detection
        p (PeakDetection): selects the peak detection method

        np.ndarray: the indexes of the knee points
    _, m = lf.linear_fit_points(points)

    if m > 0.0:
        cd = Direction.Increasing
        cd = Direction.Decreasing

    knees_1 = knees(points, sensitivity, t, cd, Concavity.Counterclockwise, p)
    knees_2 = knees(points, sensitivity, t, cd, Concavity.Clockwise, p)

    knees_idx = np.concatenate((knees_1, knees_2))
    # np.concatenate generates float array when one is empty (see https://github.com/numpy/numpy/issues/8878)
    knees_idx = knees_idx.astype(int)
    knees_idx = np.unique(knees_idx)

    return knees_idx
Example #5
def rdp(points: np.ndarray, r: float = 0.9) -> tuple:
    Ramer–Douglas–Peucker (RDP) algorithm.

    Is an algorithm that decimates a curve composed of line segments 
    to a similar curve with fewer points. This version uses the 
    coefficient of determination to decided whenever to keep or remove 
    a line segment.

        points (np.ndarray): numpy array with the points (x, y)
        r(float): the coefficient of determination threshold (default 0.9)

        tuple: the reduced space, the points that were removed

    if len(points) <= 2:
        determination = 1.0
        coef = lf.linear_fit_points(points)
        determination = lf.linear_r2_points(points, coef)

    if determination < r:
        d = perpendicular_distance_points(points, points[0], points[-1])
        index = np.argmax(d)

        left, left_points = rdp(points[0:index + 1], r)
        right, right_points = rdp(points[index:len(points)], r)
        points_removed = np.concatenate((left_points, right_points), axis=0)
        return np.concatenate((left[0:len(left) - 1], right)), points_removed
        rv = np.empty([2, 2])
        rv[0] = points[0]
        rv[1] = points[-1]
        points_removed = np.array([[points[0][0], len(points) - 2.0]])
        return rv, points_removed
Example #6
def auto_knee(points: np.ndarray, t: float = 1.0) -> int:
    """Returns the index of the knee point based on the Kneedle method.

    This implementation uses an heuristic to automatically define
    the direction and rotation of the concavity.

        points (np.ndarray): numpy array with the points (x, y)
        t (float): tau of the side window used to smooth the curve

        int: the index of the knee point
    b, m = lf.linear_fit_points(points)

    if m > 0.0:
        cd = Direction.Increasing
        cd = Direction.Decreasing

    y = points[:, 1]
    yhat = np.empty(len(points))
    for i in range(0, len(points)):
        yhat[i] = points[i][0] * m + b

    vote = np.sum(y - yhat)

    if cd is Direction.Increasing and vote > 0:
        cc = Concavity.Clockwise
    elif cd is Direction.Increasing and vote <= 0:
        cc = Concavity.Counterclockwise
    elif cd is Direction.Decreasing and vote > 0:
        cc = Concavity.Clockwise
        cc = Concavity.Counterclockwise

    return single_knee(points, t, cd, cc)
Example #7
 def test_r2_two(self):
     points = np.array([[0.0, 1.0], [1.0, 5.0]])
     coef = lf.linear_fit_points(points)
     result = lf.linear_r2_points(points, coef)
     desired = 1.0
     self.assertEqual(result, desired)
Example #8
def filter_clusters(
        points: np.ndarray,
        knees: np.ndarray,
        clustering: typing.Callable[[np.ndarray, float], np.ndarray],
        t: float = 0.01,
        method: kr.ClusterRanking = kr.ClusterRanking.linear) -> np.ndarray:
    Filter the knee points based on clustering.

    For each cluster a single point is selected based on the ranking.
    The ranking is computed based on the slope and the improvement (on the y axis).

        points (np.ndarray): numpy array with the points (x, y)
        knees (np.ndarray): knees indexes
        clustering (typing.Callable[[np.ndarray, float]): the clustering function
        t (float): the threshold for merging (in percentage, default 0.01)
        method (ranking.ClusterRanking): represents the direction of the ranking within a cluster (default ranking.ClusterRanking.linear)

        np.ndarray: the filtered knees
    if method is kr.ClusterRanking.hull:
        hull = ch.graham_scan_lower(points)
        logger.info(f'hull {len(hull)}')

    x = points[:, 0]
    y = points[:, 1]

    if len(knees) <= 1:
        return knees
        knee_points = points[knees]
        clusters = clustering(knee_points, t)

        max_cluster = clusters.max()
        filtered_knees = []
        for i in range(0, max_cluster + 1):
            current_cluster = knees[clusters == i]
            #logger.info(f'Cluster {i} with {len(current_cluster)} elements')

            if len(current_cluster) > 1:
                if method is kr.ClusterRanking.hull:
                    # select the hull points that exist within the cluster
                    a, b = current_cluster[[0, -1]]
                    #logger.info(f'Bounds [{a}, {b}]')
                    idx = (hull >= a) * (hull <= b)
                    hull_within_cluster = hull[idx]
                    #logger.info(f'Hull (W\\C) {hull_within_cluster} ({len(hull_within_cluster)})')
                    # only consider clusters with at least a single hull point
                    rankings = np.zeros(len(current_cluster))

                    if len(hull_within_cluster) > 1:
                        length = x[b + 1] - x[a - 1]
                        for cluster_idx in range(len(current_cluster)):
                            j = current_cluster[cluster_idx]
                            if j in hull_within_cluster:
                                length_l = (x[j] - x[a - 1]) / length
                                length_r = (x[b + 1] - x[j]) / length
                                left = points[a - 1:j + 1]
                                right = points[j:b + 2]
                                coef_l = lf.linear_fit_points(left)
                                coef_r = lf.linear_fit_points(right)
                                #r_l = lf.linear_residuals(x[a-1:j+1], y[a-1:j+1], coef_l)
                                #r_r = lf.linear_residuals(x[j:b+2], y[j:b+2], coef_r)
                                #r_l = lf.rmse_points(left, coef_l)
                                #r_r = lf.rmse_points(right, coef_r)

                                r_l = np.sum(
                                        left, left[0], left[-1]))
                                r_r = np.sum(
                                        right, right[0], right[-1]))

                                current_error = r_l * length_l + r_r * length_r
                                rankings[cluster_idx] = current_error
                                rankings[cluster_idx] = -1.0
                        # replace all -1 with maximum distance
                        #logger.info(f'CHR {rankings}')
                        rankings[rankings < 0] = np.amax(rankings)
                        rankings = kr.distance_to_similarity(rankings)
                        #logger.info(f'CHRF {rankings}')
                    elif len(hull_within_cluster) == 1:
                        for cluster_idx in range(len(current_cluster)):
                            j = current_cluster[cluster_idx]
                            if j in hull_within_cluster:
                                rankings[cluster_idx] = 1.0
                        rankings = None
                    rankings = kr.smooth_ranking(points, current_cluster,

                # Compute relative ranking
                if rankings is None:
                    best_knee = None
                    rankings = kr.rank(rankings)
                    #logger.info(f'Rankings {rankings}')
                    # Min Max normalization
                    #rankings = (rankings - np.min(rankings))/np.ptp(rankings)
                    idx = np.argmax(rankings)
                    best_knee = knees[clusters == i][idx]
                if method is kr.ClusterRanking.hull:
                    knee = knees[clusters == i][0]
                    if knee in hull:
                        best_knee = knee
                        best_knee = None
                    best_knee = knees[clusters == i][0]

            if best_knee is not None:
                """# plot clusters within the points
                plt.plot(x, y)
                plt.plot(x[current_cluster], y[current_cluster], 'ro')
                if method is kr.ClusterRanking.hull:
                    plt.plot(x[hull], y[hull], 'g+')
                plt.plot(x[best_knee], y[best_knee], 'yx')

        return np.array(filtered_knees)
Example #9
def rdp(points: np.ndarray,
        t: float = 0.01,
        cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd,
        distance: RDP_Distance = RDP_Distance.shortest) -> tuple:
    Ramer–Douglas–Peucker (RDP) algorithm.

    Is an algorithm that decimates a curve composed of line segments to a similar curve with fewer points.
    This version uses different cost functions to decided whenever to keep or remove a line segment.

        points (np.ndarray): numpy array with the points (x, y)
        t (float): the coefficient of determination threshold (default 0.01)
        cost (lf.Linear_Metrics): the cost method used to evaluate a point set (default: lf.Linear_Metrics.rmspe)
        distance (RDP_Distance): the distance metric used to decide the split point (default: RDP_Distance.shortest)

        tuple: the index of the reduced space, the points that were removed
    stack = [(0, len(points))]

    reduced = []
    removed = []

    # select the distance metric to be used
    distance_points = None
    if distance is RDP_Distance.shortest:
        distance_points = lf.shortest_distance_points
    elif distance is RDP_Distance.perpendicular:
        distance_points = lf.perpendicular_distance_points
        distance_points = lf.shortest_distance_points

    while stack:
        left, right = stack.pop()
        pt = points[left:right]

        if len(pt) <= 2:
            if cost is lf.Linear_Metrics.r2:
                r = 1.0
                r = 0.0
            coef = lf.linear_fit_points(pt)
            if cost is lf.Linear_Metrics.r2:
                r = lf.linear_r2_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmspe:
                r = lf.rmspe_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmsle:
                r = lf.rmsle_points(pt, coef)
                r = lf.rpd_points(pt, coef)

        curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t

        if curved:
            d = distance_points(pt, pt[0], pt[-1])
            index = np.argmax(d)
            stack.append((left + index, left + len(pt)))
            stack.append((left, left + index + 1))
            removed.append([left, len(pt) - 2.0])

    reduced.append(len(points) - 1)
    return np.array(reduced), np.array(removed)
Example #10
def grdp(points: np.ndarray,
         t: float = 0.01,
         cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd,
         distance: RDP_Distance = RDP_Distance.shortest) -> tuple:
    stack = [(0, len(points))]

    reduced = []
    removed = []

    curved = True

    while curved:
        _, left, right = stack.pop()
        pt = points[left:right]

        d = distance_points(pt, pt[0], pt[-1])
        index = np.argmax(d)
        # add the relevant point to the reduced set
        reduced.append(left + index)
        # compute the cost of the left and right parts
        left_cost = np.max(distance_points(pt[0:index + 1], pt[0], pt[index]))
        right_cost = np.max(distance_points(pt[index:len(pt)], pt[0], pt[-1]))
        # Add the points to the stack
        stack.append((right_cost, left + index, left + len(pt)))
        stack.append((left_cost, left, left + index + 1))
        # Sort the stack based on the cost
        stack.sort(key=lambda t: t[0])
        length -= 1

        # compute the cost of the current solution

        curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t

    # add first and last points
    reduced.append(len(points) - 1)

    # sort indexes

    return np.array(reduced)

    while stack:
        left, right = stack.pop()
        pt = points[left:right]

        if len(pt) <= 2:
            if cost is lf.Linear_Metrics.r2:
                r = 1.0
                r = 0.0
            coef = lf.linear_fit_points(pt)
            if cost is lf.Linear_Metrics.r2:
                r = lf.linear_r2_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmspe:
                r = lf.rmspe_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmsle:
                r = lf.rmsle_points(pt, coef)
                r = lf.rpd_points(pt, coef)

        curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t

        if curved:
            d = distance_points(pt, pt[0], pt[-1])
            index = np.argmax(d)
            stack.append((left + index, left + len(pt)))
            stack.append((left, left + index + 1))
            removed.append([left, len(pt) - 2.0])

    reduced.append(len(points) - 1)
    return np.array(reduced), np.array(removed)