Ejemplo n.º 1
0
def multi_knee(get_knee: typing.Callable, points: np.ndarray, t1: float = 0.99, t2: int = 3) -> np.ndarray:
    """
    Wrapper that convert a single knee point detection into a multi knee point detector.

    It uses recursion on the left and right parts of the curve after detecting the current knee.

    Args:
        get_knee (typing.Callable): method that returns a single knee point
        points (np.ndarray): numpy array with the points (x, y)
        t1 (float): the coefficient of determination used as a threshold (default 0.99)
        t2 (int): the mininum number of points used as a threshold (default 3)

    Returns:
        np.ndarray: knee points on the curve
    """

    stack = [(0, len(points))]
    knees = []

    while stack:
        left, right = stack.pop()
        
        pt = points[left:right]
        if len(pt) > t2:
            coef = linear_fit_points(pt)
            if linear_r2_points(pt, coef) < t1:
                rv = get_knee(pt)
                if rv is not None:
                    idx = rv + left
                    knees.append(idx)
                    stack.append((left, idx+1))
                    stack.append((idx+1, right))
    knees.sort()
    return np.array(knees)
Ejemplo n.º 2
0
def multi_knee(get_knee: typing.Callable, points: np.ndarray, t1: float = 0.01, t2: int = 3, cost: lf.Linear_Metrics = lf.Linear_Metrics.rmspe) -> np.ndarray:
    """
    Wrapper that convert a single knee point detection into a multi knee point detector.

    It uses recursion on the left and right parts of the curve after detecting the current knee.

    Args:
        get_knee (typing.Callable): method that returns a single knee point
        points (np.ndarray): numpy array with the points (x, y)
        t1 (float): the coefficient of determination used as a threshold (default 0.01)
        t2 (int): the mininum number of points used as a threshold (default 3)
        cost (lf.Linear_Metrics): the cost method used to evaluate a point set (default: lf.Linear_Metrics.rmspe)

    Returns:
        np.ndarray: knee points on the curve
    """

    stack = [(0, len(points))]
    knees = []

    while stack:
        left, right = stack.pop()
        pt = points[left:right]
        
        if len(pt) > t2:
            if len(pt) <= 2:
                if cost is lf.Linear_Metrics.rmspe:
                    r = 0.0
                else:
                    r = 1.0
            else:
                coef = lf.linear_fit_points(pt)
                if cost is lf.Linear_Metrics.rmspe:
                    r = lf.rmspe_points(pt, coef)
                else:
                    r = lf.linear_r2_points(pt, coef)

            curved = r >= t1 if cost is lf.Linear_Metrics.rmspe else r < t1

            #coef = lf.linear_fit_points(pt)
            # if lf.linear_r2_points(pt, coef) < t1:
            if curved:
                rv = get_knee(pt)
                if rv is not None:
                    idx = rv + left
                    knees.append(idx)
                    stack.append((left, idx+1))
                    stack.append((idx+1, right))
    knees.sort()
    return np.array(knees)
Ejemplo n.º 3
0
def compute_cost_sequence(points: np.ndarray,
                          reduced,
                          cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd,
                          distance: RDP_Distance = RDP_Distance.shortest):
    # sort indexes
    reduced.sort()

    # select the distance metric to be used
    distance_points = None
    if distance is RDP_Distance.shortest:
        distance_points = lf.shortest_distance_points
    elif distance is RDP_Distance.perpendicular:
        distance_points = lf.perpendicular_distance_points
    else:
        distance_points = lf.shortest_distance_points

    left = 0
    for right in reduced:
        pt = points[left:right]
        coef = lf.linear_fit_points(pt)
Ejemplo n.º 4
0
def auto_knees(points: np.ndarray,
               t: float = 1.0,
               sensitivity: float = 1.0,
               p: PeakDetection = PeakDetection.Kneedle) -> np.ndarray:
    """Returns the index of the knees point based on the Kneedle method.

    This implementation uses an heuristic to automatically define
    the direction and rotation of the concavity.

    Furthermore, it support three different methods to select the 
    relevant knees:
    1. Kneedle    : classical algorithm
    2. Significant: significant knee peak detection
    3. ZScore     : significant knee peak detection based on zscore

    Args:
        points (np.ndarray): numpy array with the points (x, y)
        t (float): tau of the side window used to smooth the curve
        sensitivity (float): controls the sensitivity of the peak detection
        p (PeakDetection): selects the peak detection method

    Returns:
        np.ndarray: the indexes of the knee points
    """
    _, m = lf.linear_fit_points(points)

    if m > 0.0:
        cd = Direction.Increasing
    else:
        cd = Direction.Decreasing

    knees_1 = knees(points, sensitivity, t, cd, Concavity.Counterclockwise, p)
    knees_2 = knees(points, sensitivity, t, cd, Concavity.Clockwise, p)

    knees_idx = np.concatenate((knees_1, knees_2))
    # np.concatenate generates float array when one is empty (see https://github.com/numpy/numpy/issues/8878)
    knees_idx = knees_idx.astype(int)
    knees_idx = np.unique(knees_idx)
    knees_idx.sort()

    return knees_idx
Ejemplo n.º 5
0
def rdp(points: np.ndarray, r: float = 0.9) -> tuple:
    """
    Ramer–Douglas–Peucker (RDP) algorithm.

    Is an algorithm that decimates a curve composed of line segments 
    to a similar curve with fewer points. This version uses the 
    coefficient of determination to decided whenever to keep or remove 
    a line segment.

    Args:
        points (np.ndarray): numpy array with the points (x, y)
        r(float): the coefficient of determination threshold (default 0.9)

    Returns:
        tuple: the reduced space, the points that were removed
    """

    if len(points) <= 2:
        determination = 1.0
    else:
        coef = lf.linear_fit_points(points)
        determination = lf.linear_r2_points(points, coef)

    if determination < r:
        d = perpendicular_distance_points(points, points[0], points[-1])
        index = np.argmax(d)

        left, left_points = rdp(points[0:index + 1], r)
        right, right_points = rdp(points[index:len(points)], r)
        points_removed = np.concatenate((left_points, right_points), axis=0)
        return np.concatenate((left[0:len(left) - 1], right)), points_removed
    else:
        rv = np.empty([2, 2])
        rv[0] = points[0]
        rv[1] = points[-1]
        points_removed = np.array([[points[0][0], len(points) - 2.0]])
        return rv, points_removed
Ejemplo n.º 6
0
def auto_knee(points: np.ndarray, t: float = 1.0) -> int:
    """Returns the index of the knee point based on the Kneedle method.

    This implementation uses an heuristic to automatically define
    the direction and rotation of the concavity.

    Args:
        points (np.ndarray): numpy array with the points (x, y)
        t (float): tau of the side window used to smooth the curve

    Returns:
        int: the index of the knee point
    """
    b, m = lf.linear_fit_points(points)

    if m > 0.0:
        cd = Direction.Increasing
    else:
        cd = Direction.Decreasing

    y = points[:, 1]
    yhat = np.empty(len(points))
    for i in range(0, len(points)):
        yhat[i] = points[i][0] * m + b

    vote = np.sum(y - yhat)

    if cd is Direction.Increasing and vote > 0:
        cc = Concavity.Clockwise
    elif cd is Direction.Increasing and vote <= 0:
        cc = Concavity.Counterclockwise
    elif cd is Direction.Decreasing and vote > 0:
        cc = Concavity.Clockwise
    else:
        cc = Concavity.Counterclockwise

    return single_knee(points, t, cd, cc)
Ejemplo n.º 7
0
 def test_r2_two(self):
     points = np.array([[0.0, 1.0], [1.0, 5.0]])
     coef = lf.linear_fit_points(points)
     result = lf.linear_r2_points(points, coef)
     desired = 1.0
     self.assertEqual(result, desired)
Ejemplo n.º 8
0
def filter_clusters(
        points: np.ndarray,
        knees: np.ndarray,
        clustering: typing.Callable[[np.ndarray, float], np.ndarray],
        t: float = 0.01,
        method: kr.ClusterRanking = kr.ClusterRanking.linear) -> np.ndarray:
    """
    Filter the knee points based on clustering.

    For each cluster a single point is selected based on the ranking.
    The ranking is computed based on the slope and the improvement (on the y axis).

    Args:
        points (np.ndarray): numpy array with the points (x, y)
        knees (np.ndarray): knees indexes
        clustering (typing.Callable[[np.ndarray, float]): the clustering function
        t (float): the threshold for merging (in percentage, default 0.01)
        method (ranking.ClusterRanking): represents the direction of the ranking within a cluster (default ranking.ClusterRanking.linear)

    Returns:
        np.ndarray: the filtered knees
    """
    if method is kr.ClusterRanking.hull:
        hull = ch.graham_scan_lower(points)
        logger.info(f'hull {len(hull)}')

    x = points[:, 0]
    y = points[:, 1]

    if len(knees) <= 1:
        return knees
    else:
        knee_points = points[knees]
        clusters = clustering(knee_points, t)

        max_cluster = clusters.max()
        filtered_knees = []
        for i in range(0, max_cluster + 1):
            current_cluster = knees[clusters == i]
            #logger.info(f'Cluster {i} with {len(current_cluster)} elements')

            if len(current_cluster) > 1:
                if method is kr.ClusterRanking.hull:
                    # select the hull points that exist within the cluster
                    a, b = current_cluster[[0, -1]]
                    #logger.info(f'Bounds [{a}, {b}]')
                    idx = (hull >= a) * (hull <= b)
                    hull_within_cluster = hull[idx]
                    #logger.info(f'Hull (W\\C) {hull_within_cluster} ({len(hull_within_cluster)})')
                    # only consider clusters with at least a single hull point
                    rankings = np.zeros(len(current_cluster))

                    if len(hull_within_cluster) > 1:
                        length = x[b + 1] - x[a - 1]
                        for cluster_idx in range(len(current_cluster)):
                            j = current_cluster[cluster_idx]
                            if j in hull_within_cluster:
                                length_l = (x[j] - x[a - 1]) / length
                                length_r = (x[b + 1] - x[j]) / length
                                left = points[a - 1:j + 1]
                                right = points[j:b + 2]
                                coef_l = lf.linear_fit_points(left)
                                coef_r = lf.linear_fit_points(right)
                                #r_l = lf.linear_residuals(x[a-1:j+1], y[a-1:j+1], coef_l)
                                #r_r = lf.linear_residuals(x[j:b+2], y[j:b+2], coef_r)
                                #r_l = lf.rmse_points(left, coef_l)
                                #r_r = lf.rmse_points(right, coef_r)

                                r_l = np.sum(
                                    lf.shortest_distance_points(
                                        left, left[0], left[-1]))
                                r_r = np.sum(
                                    lf.shortest_distance_points(
                                        right, right[0], right[-1]))

                                current_error = r_l * length_l + r_r * length_r
                                rankings[cluster_idx] = current_error
                            else:
                                rankings[cluster_idx] = -1.0
                        # replace all -1 with maximum distance
                        #logger.info(f'CHR {rankings}')
                        rankings[rankings < 0] = np.amax(rankings)
                        rankings = kr.distance_to_similarity(rankings)
                        #logger.info(f'CHRF {rankings}')
                    elif len(hull_within_cluster) == 1:
                        for cluster_idx in range(len(current_cluster)):
                            j = current_cluster[cluster_idx]
                            if j in hull_within_cluster:
                                rankings[cluster_idx] = 1.0
                    else:
                        rankings = None
                else:
                    rankings = kr.smooth_ranking(points, current_cluster,
                                                 method)

                # Compute relative ranking
                if rankings is None:
                    best_knee = None
                else:
                    rankings = kr.rank(rankings)
                    #logger.info(f'Rankings {rankings}')
                    # Min Max normalization
                    #rankings = (rankings - np.min(rankings))/np.ptp(rankings)
                    idx = np.argmax(rankings)
                    best_knee = knees[clusters == i][idx]
            else:
                if method is kr.ClusterRanking.hull:
                    knee = knees[clusters == i][0]
                    if knee in hull:
                        best_knee = knee
                    else:
                        best_knee = None
                else:
                    best_knee = knees[clusters == i][0]

            if best_knee is not None:
                filtered_knees.append(best_knee)
                """# plot clusters within the points
                plt.plot(x, y)
                plt.plot(x[current_cluster], y[current_cluster], 'ro')
                if method is kr.ClusterRanking.hull:
                    plt.plot(x[hull], y[hull], 'g+')
                plt.plot(x[best_knee], y[best_knee], 'yx')
                plt.show()"""

        return np.array(filtered_knees)
Ejemplo n.º 9
0
def rdp(points: np.ndarray,
        t: float = 0.01,
        cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd,
        distance: RDP_Distance = RDP_Distance.shortest) -> tuple:
    """
    Ramer–Douglas–Peucker (RDP) algorithm.

    Is an algorithm that decimates a curve composed of line segments to a similar curve with fewer points.
    This version uses different cost functions to decided whenever to keep or remove a line segment.

    Args:
        points (np.ndarray): numpy array with the points (x, y)
        t (float): the coefficient of determination threshold (default 0.01)
        cost (lf.Linear_Metrics): the cost method used to evaluate a point set (default: lf.Linear_Metrics.rmspe)
        distance (RDP_Distance): the distance metric used to decide the split point (default: RDP_Distance.shortest)

    Returns:
        tuple: the index of the reduced space, the points that were removed
    """
    stack = [(0, len(points))]

    reduced = []
    removed = []

    # select the distance metric to be used
    distance_points = None
    if distance is RDP_Distance.shortest:
        distance_points = lf.shortest_distance_points
    elif distance is RDP_Distance.perpendicular:
        distance_points = lf.perpendicular_distance_points
    else:
        distance_points = lf.shortest_distance_points

    while stack:
        left, right = stack.pop()
        pt = points[left:right]

        if len(pt) <= 2:
            if cost is lf.Linear_Metrics.r2:
                r = 1.0
            else:
                r = 0.0
        else:
            coef = lf.linear_fit_points(pt)
            if cost is lf.Linear_Metrics.r2:
                r = lf.linear_r2_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmspe:
                r = lf.rmspe_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmsle:
                r = lf.rmsle_points(pt, coef)
            else:
                r = lf.rpd_points(pt, coef)

        curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t

        if curved:
            d = distance_points(pt, pt[0], pt[-1])
            index = np.argmax(d)
            stack.append((left + index, left + len(pt)))
            stack.append((left, left + index + 1))
        else:
            reduced.append(left)
            removed.append([left, len(pt) - 2.0])

    reduced.append(len(points) - 1)
    return np.array(reduced), np.array(removed)
Ejemplo n.º 10
0
def grdp(points: np.ndarray,
         t: float = 0.01,
         cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd,
         distance: RDP_Distance = RDP_Distance.shortest) -> tuple:
    stack = [(0, len(points))]

    reduced = []
    removed = []

    curved = True

    while curved:
        _, left, right = stack.pop()
        pt = points[left:right]

        d = distance_points(pt, pt[0], pt[-1])
        index = np.argmax(d)
        # add the relevant point to the reduced set
        reduced.append(left + index)
        # compute the cost of the left and right parts
        left_cost = np.max(distance_points(pt[0:index + 1], pt[0], pt[index]))
        right_cost = np.max(distance_points(pt[index:len(pt)], pt[0], pt[-1]))
        # Add the points to the stack
        stack.append((right_cost, left + index, left + len(pt)))
        stack.append((left_cost, left, left + index + 1))
        # Sort the stack based on the cost
        stack.sort(key=lambda t: t[0])
        length -= 1

        # compute the cost of the current solution

        curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t

    # add first and last points
    reduced.append(0)
    reduced.append(len(points) - 1)

    # sort indexes
    reduced.sort()

    return np.array(reduced)

    while stack:
        left, right = stack.pop()
        pt = points[left:right]

        if len(pt) <= 2:
            if cost is lf.Linear_Metrics.r2:
                r = 1.0
            else:
                r = 0.0
        else:
            coef = lf.linear_fit_points(pt)
            if cost is lf.Linear_Metrics.r2:
                r = lf.linear_r2_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmspe:
                r = lf.rmspe_points(pt, coef)
            elif cost is lf.Linear_Metrics.rmsle:
                r = lf.rmsle_points(pt, coef)
            else:
                r = lf.rpd_points(pt, coef)

        curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t

        if curved:
            d = distance_points(pt, pt[0], pt[-1])
            index = np.argmax(d)
            stack.append((left + index, left + len(pt)))
            stack.append((left, left + index + 1))
        else:
            reduced.append(left)
            removed.append([left, len(pt) - 2.0])

    reduced.append(len(points) - 1)
    return np.array(reduced), np.array(removed)