Python unique_label_indices Beispiele, pandas._libs.hashtable.unique_label_indices Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_unique_label_indices():

    a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp)

    left = ht.unique_label_indices(a)
    right = np.unique(a, return_index=True)[1]

    tm.assert_numpy_array_equal(left, right, check_dtype=False)

    a[np.random.choice(len(a), 10)] = -1
    left = ht.unique_label_indices(a)
    right = np.unique(a, return_index=True)[1][1:]
    tm.assert_numpy_array_equal(left, right, check_dtype=False)

Beispiel #2

0

Datei anzeigen

def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool):
    """
    Reconstruct labels from observed group ids.

    Parameters
    ----------
    comp_ids : np.ndarray[np.intp]
    xnull : bool
        If nulls are excluded; i.e. -1 labels are passed through.
    """
    if not xnull:
        lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")
        shape = np.asarray(shape, dtype="i8") + lift

    if not is_int64_overflow_possible(shape):
        # obs ids are deconstructable! take the fast route!
        out = decons_group_index(obs_ids, shape)
        return out if xnull or not lift.any() else [
            x - y for x, y in zip(out, lift)
        ]

    # TODO: unique_label_indices only used here, should take ndarray[np.intp]
    i = unique_label_indices(ensure_int64(comp_ids))
    i8copy = lambda a: a.astype("i8", subok=False, copy=True)
    return [i8copy(lab[i]) for lab in labels]

Beispiel #3

0

Datei anzeigen

def decons_obs_group_ids(
    comp_ids: npt.NDArray[np.intp],
    obs_ids: npt.NDArray[np.intp],
    shape: Shape,
    labels: Sequence[npt.NDArray[np.signedinteger]],
    xnull: bool,
) -> list[npt.NDArray[np.intp]]:
    """
    Reconstruct labels from observed group ids.

    Parameters
    ----------
    comp_ids : np.ndarray[np.intp]
    obs_ids: np.ndarray[np.intp]
    shape : tuple[int]
    labels : Sequence[np.ndarray[np.signedinteger]]
    xnull : bool
        If nulls are excluded; i.e. -1 labels are passed through.
    """
    if not xnull:
        lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp)
        arr_shape = np.asarray(shape, dtype=np.intp) + lift
        shape = tuple(arr_shape)

    if not is_int64_overflow_possible(shape):
        # obs ids are deconstructable! take the fast route!
        out = _decons_group_index(obs_ids, shape)
        return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)]

    indexer = unique_label_indices(comp_ids)
    return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels]

Beispiel #4

0

Datei anzeigen

Datei: sorting.py Projekt: stevens515/pandas

def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
    """
    reconstruct labels from observed group ids

    Parameters
    ----------
    xnull: boolean,
        if nulls are excluded; i.e. -1 labels are passed through
    """

    if not xnull:
        lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
        shape = np.asarray(shape, dtype='i8') + lift

    if not is_int64_overflow_possible(shape):
        # obs ids are deconstructable! take the fast route!
        out = decons_group_index(obs_ids, shape)
        return out if xnull or not lift.any() \
            else [x - y for x, y in zip(out, lift)]

    i = unique_label_indices(comp_ids)
    i8copy = lambda a: a.astype('i8', subok=False, copy=True)
    return [i8copy(lab[i]) for lab in labels]

Beispiel #5

0

Datei anzeigen

Datei: sorting.py Projekt: ankostis/pandas

def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
    """
    reconstruct labels from observed group ids

    Parameters
    ----------
    xnull: boolean,
        if nulls are excluded; i.e. -1 labels are passed through
    """

    if not xnull:
        lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
        shape = np.asarray(shape, dtype='i8') + lift

    if not is_int64_overflow_possible(shape):
        # obs ids are deconstructable! take the fast route!
        out = decons_group_index(obs_ids, shape)
        return out if xnull or not lift.any() \
            else [x - y for x, y in zip(out, lift)]

    i = unique_label_indices(comp_ids)
    i8copy = lambda a: a.astype('i8', subok=False, copy=True)
    return [i8copy(lab[i]) for lab in labels]

Beispiel #6

0

Datei anzeigen

def test_unique_label_indices_intp(writable):
    keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)
    keys.flags.writeable = writable
    result = ht.unique_label_indices(keys)
    expected = np.array([0, 1, 5], dtype=np.intp)
    tm.assert_numpy_array_equal(result, expected)

Beispiel #7

0

Datei anzeigen

def get_map_for_class(zipped_data_arr,
                      min_ious=np.linspace(.50, 0.95, 10, endpoint=True),
                      avg_recalls=np.linspace(0.00, 1.00, 101, endpoint=True),
                      nms_iou=.7):
    # Used linspace over arange for min_ious/avg_recalls due to issues with endpoints
    all_confs = []
    all_correct_preds = []
    num_total_detections = 0
    num_total_gtruths = 0
    for ground_arr, detector_arr in zipped_data_arr:
        num_gtruths = len(ground_arr)
        if not detector_arr:
            num_total_gtruths += num_gtruths
            continue
        detector_arr = np.asarray(detector_arr, dtype=np.float64)
        # Sort by descending confidence, use mergesort to match COCO evaluation
        detector_arr = detector_arr[detector_arr[:, -1].argsort(
            kind='mergesort')[::-1]]
        det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose(
        )
        if nms_iou is not None:
            # Code for NMS
            all_indices_to_keep = []
            cur_indices_to_keep = np.arange(len(detector_arr))
            # Repeat until no detections left below overlap threshold
            while cur_indices_to_keep.size > 1:
                # Add the most confident element
                all_indices_to_keep.append(cur_indices_to_keep[0])
                cur_x_min = det_x_min[cur_indices_to_keep]
                cur_x_max = det_x_max[cur_indices_to_keep]
                cur_y_min = det_y_min[cur_indices_to_keep]
                cur_y_max = det_y_max[cur_indices_to_keep]
                intersect_widths = (
                    np.minimum(cur_x_max[0], cur_x_max[1:]) -
                    np.maximum(cur_x_min[0], cur_x_min[1:])).clip(min=0)
                intersect_heights = (
                    np.minimum(cur_y_max[0], cur_y_max[1:]) -
                    np.maximum(cur_y_min[0], cur_y_min[1:])).clip(min=0)
                intersect_areas = intersect_widths * intersect_heights
                # Inclusion exclusion principle!
                union_areas = (
                    (cur_x_max[0] - cur_x_min[0]) *
                    (cur_y_max[0] - cur_y_min[0]) +
                    (cur_x_max[1:] - cur_x_min[1:]) *
                    (cur_y_max[1:] - cur_y_min[1:])) - intersect_areas
                # Just in case a ground truth has zero area
                cur_ious = np.divide(intersect_areas,
                                     union_areas,
                                     out=union_areas,
                                     where=union_areas != 0)
                # Keep appending [0] to a list
                # Just say cur_indices = np where cur_ious < nms_iou
                cur_indices_to_keep = cur_indices_to_keep[1:]
                cur_indices_to_keep = np.intersect1d(
                    cur_indices_to_keep,
                    cur_indices_to_keep[np.nonzero(cur_ious < nms_iou)[0]],
                    assume_unique=True)
            if cur_indices_to_keep.size == 1:
                all_indices_to_keep.append(cur_indices_to_keep[0])
            detector_arr = detector_arr[np.asarray(all_indices_to_keep)]
            det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose(
            )
        num_detections = len(detector_arr)
        if not ground_arr:
            num_total_detections += num_detections
            all_confs.append(confs)
            continue
        ground_arr = np.asarray(ground_arr, dtype=np.float64)
        ground_x_min, ground_x_max, ground_y_min, ground_y_max = ground_arr.transpose(
        )
        # Clip negative since negative implies no overlap
        intersect_widths = (
            np.minimum(det_x_max[:, np.newaxis], ground_x_max) -
            np.maximum(det_x_min[:, np.newaxis], ground_x_min)).clip(min=0)
        intersect_heights = (
            np.minimum(det_y_max[:, np.newaxis], ground_y_max) -
            np.maximum(det_y_min[:, np.newaxis], ground_y_min)).clip(min=0)
        intersect_areas = intersect_widths * intersect_heights
        # Inclusion exclusion principle!
        union_areas = ((det_x_max - det_x_min) *
                       (det_y_max - det_y_min))[:, np.newaxis] + (
                           (ground_x_max - ground_x_min) *
                           (ground_y_max - ground_y_min)) - intersect_areas
        # Just in case a ground truth has zero area
        iou = np.divide(intersect_areas,
                        union_areas,
                        out=union_areas,
                        where=union_areas != 0)
        # Defined best ground truth as one with highest IOU. This is an array of size num_detections, where
        # best_gtruths[i] is the index of the ground truth to which prediction i is most similar (highest IOU)
        best_gtruths = np.argmax(iou, axis=1)
        # valid_preds is a generator where each element is a numpy int array. Each numpy array corresponds to
        # a min_iou in the min_ious array, and has indices corresponding to the predictions whose
        # prediction-ground truth pairs have IOU greater than that min_iou.
        valid_preds = map(
            np.nonzero, iou[np.arange(num_detections), best_gtruths] >
            min_ious[:, np.newaxis])
        #
        ## Useful for standard precision/recall metrics
        # num_true_positives = np.count_nonzero(np.bincount(best_gtruths[valid_preds]))
        # num_false_positives = num_detections - detected_gtruths
        # num_false_negatives = num_gtruths - detected_gtruths
        #
        # best_gtruths[valid_preds] uses the previously calculated valid_preds array to return an array
        # containing the ground truths indices for each prediction whenever the ground truth-prediction
        # IOU was greater than min_iou. Then unique_label_indices is used to find the leftmost occuring
        # ground truth index for each ground truth index, which corresponds to finding the true positives
        # (since we only consider the highest confidence prediction for each ground truth to be a true
        # positive, rest are false positives)
        # Note that pandas unique_label_indices is equivalent to np.unique(labels, return_index=True)[1] but
        # is considerably faster due to using a hashtable instead of sorting
        # Once the indices of the true positive predictions are found in the smaller array containing only
        # predictions with IOU > min_iou, they are converted back into indices for the original array
        # using valid_pred.
        correct_preds = [
            valid_pred[0][unique_label_indices(best_gtruths[valid_pred[0]])] +
            num_total_detections for valid_pred in valid_preds
        ]
        all_correct_preds.append(correct_preds)
        all_confs.append(confs)
        num_total_detections += num_detections
        num_total_gtruths += num_gtruths
    # Edge case of no predictions for a class
    if not all_confs:
        return 0
    # Concatenates all predictions and confidences together to find class MAP
    all_confs = np.concatenate(all_confs)
    all_correct_preds = [
        np.concatenate(cur_pred) for cur_pred in zip(*all_correct_preds)
    ]
    # Sets only correct prediction indices to true, rest to false.
    true_positives = np.zeros((len(min_ious), num_total_detections),
                              dtype=bool)
    for iou_index, positive_locs in enumerate(all_correct_preds):
        true_positives[iou_index][positive_locs] = True
    # Mergesort is chosen to be consistent with coco/matlab results
    sort_order = all_confs.argsort(kind='mergesort')[::-1]
    true_positives = true_positives[:, sort_order]
    # Keeps track of number of true positives until each given point
    all_true_positives = np.cumsum(true_positives, axis=1)
    # PASCAL VOC 2012
    if avg_recalls is None:
        # Zero pad both sides to calculate area under curve
        precision = np.zeros((len(min_ious), num_total_detections + 2),
                             dtype=np.float64)
        # Pad one side with zeros and the other with ones for area under curve
        recall = np.zeros((len(min_ious), num_total_detections + 2),
                          dtype=np.float64)
        recall[:, -1] = np.ones(len(min_ious), dtype=np.float64)
        # In python >=3 this is equivalent to np.true_divide
        precision[:, 1:-1] = all_true_positives / np.arange(
            1, num_total_detections + 1)
        # Makes each element in precision list max of all elements to right (ignores endpoints)
        precision[:, 1:-1] = np.maximum.accumulate(precision[:, -2:0:-1],
                                                   axis=1)[:, ::-1]
        recall[:, 1:-1] = all_true_positives / num_total_gtruths
        # Calculate area under P-R curve for each IOU
        # Should only be one IOU at .5 for PASCAL
        all_areas = []
        for cur_recall, cur_precision in zip(recall, precision):
            # Find indices where value of recall changes
            change_points = np.nonzero(cur_recall[1:] != cur_recall[:-1])[0]
            # Calculate sum of dw * dh as area and append to all areas
            all_areas.append(
                np.sum(
                    (cur_recall[change_points + 1] - cur_recall[change_points])
                    * cur_precision[change_points + 1]))
        return np.mean(all_areas)
    # PASCAL VOC 2007
    else:
        # The extra zero is to deal with a recall larger than is achieved by model
        precision = np.zeros((len(min_ious), num_total_detections + 1),
                             dtype=np.float64)
        # In python >=3 this is equivalent to np.true_divide
        precision[:, :-1] = all_true_positives / np.arange(
            1, num_total_detections + 1)
        # Makes each element in precision list max of all elements to right (extra zero at right doesn't matter)
        precision = np.maximum.accumulate(precision[:, ::-1], axis=1)[:, ::-1]
        recall = all_true_positives / num_total_gtruths
        # For each recall, finds leftmost index (i.e. largest precision) greater than it
        indices_to_average = np.apply_along_axis(np.searchsorted, 1, recall,
                                                 avg_recalls)
        # Finds matching largest prediction for each recall and turns it into an array
        precs_to_average = precision[np.arange(len(precision))[:, np.newaxis],
                                     indices_to_average]
        # Returns average precision over each recall and over each IOU. Can specify an axis
        # if separate average precision is wanted for each IOU (e.g. to do more precise statistics)
        return np.mean(precs_to_average)