Esempio n. 1
0
    def _forward(self, X, y=None, compute_loss_grad=True, state='training'):
        """

        Make a forward pass.
        Note that if y is None, then loss score and loss grads will
            be None in output tuple.

        :param X: numpy.array, input data
        :param y: numpy.array, input labels. Default None. If None loss and loss gradient won`t be computed.
        :param compute_loss_grad: bool, indicates whether to compute loss derivatives
        :return: tuple, (loss_score, predictions, loss_grads).
            If y is None only predictions are not None.
        """
        for layer in self.layers:
            X = layer.forward(layer_input=X, state=state, seed=self.seed)

        if y is None:
            if self.loss and self.loss.name == "CrossEntropy":
                return softmax(X)
            return None, X, None

        loss, preds, dX = self.loss.build(X,
                                          y,
                                          compute_derivative=compute_loss_grad)

        return loss, preds, dX
Esempio n. 2
0
def head_pose_postprocess(preds_hp, theta):
    """
    Postprocesses the raw head pose predictions (scores for yaw, pitch, roll)
    and returns the head poses (roll, yaw, pitch) in radians.

    Parameters
    ----------
    preds_hp: NumPy array
        Raw head pose predictions.
    theta: NumPy array
        rotation angle(s) in radians of the cropping bounding boxes.

    Returns
    -------
    head_pose: NumPy array
        Roll (left+), yaw (right+), pitch (down+) in radians in the input
        image coordinates (of the head pose network).
    """
    head_pose = np.empty((len(preds_hp[0]),3), dtype=np.float32)
    for i_new, i in enumerate([2, 0, 1]):
        score = preds_hp[i]
        pred = softmax(score)
        tmp = (pred * np.arange(66)[np.newaxis]).sum(axis=1)
        head_pose[:, i_new] = (tmp * 3 - 99)
    # At this point, we have roll left+, yaw right+, pitch up+ in degrees
    head_pose *= np.pi / 180
    head_pose[:, 2] *= -1 # pitch down+

    head_pose_orig = head_pose.copy()
    head_pose_orig[:, 0] += theta
    return head_pose, head_pose_orig
def get_policy(action_values, strategy, k):
    x = None
    if strategy == 'pow':
        x = softmax_pow(action_values, k)
    elif strategy == 'exp':
        x = softmax(action_values, k)
    x[x < 1e-6] = 0
    x[x > 1 - 1e-6] = 1
    return x
def predict(net, img):
    img = preprocess(img)

    # feedforward
    output = net.predict([img])
    logits = output[0]

    pred = softmax(logits, axis=1)

    return pred
Esempio n. 5
0
def predict(net, img):
    img = preprocess(img)

    # feedforward
    output = net.predict([img])
    output = output[0]

    prob = softmax(output)

    return prob[0]
def recognize_from_frame(net, detector, frame):
    spoof_thresh = args.spoof_thresh

    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)

    # crop, preprocess
    images = []
    detections = []
    for obj in new_detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        img = preprocess(crop_img)
        images.append(img)
        detections.append(
            (top_left[0], top_left[1], bottom_right[0], bottom_right[1]))

    if not images:
        return frame

    images = np.concatenate(images)

    # feedforward
    output = net.predict([images])
    logits = output[0]
    preds = softmax(logits, axis=1)

    frame = draw_detections(frame, detections, preds, spoof_thresh)

    return frame
Esempio n. 7
0
 def build(self, layer_input, true_labels, compute_derivative=True):
     batch_size = layer_input.shape[0]
     probs_unnorm, Z, shifted_input = softmax(layer_input,
                                              return_separately=True)
     log_probs = shifted_input - np.log(Z)
     loss = -np.sum(log_probs[range(batch_size),
                              true_labels.flatten()]) / float(batch_size)
     loss_derivative = None
     predicted_labels = np.argmax(log_probs,
                                  axis=1).reshape(log_probs.shape[0], 1)
     if compute_derivative:
         loss_derivative = probs_unnorm / Z
         loss_derivative[range(batch_size), true_labels.flatten()] += -1.
         loss_derivative /= float(batch_size)
     return loss, predicted_labels, loss_derivative
    def get_gradients(self, inputs, expected_outputs):
        bias_gradients = [np.zeros(bias.shape) for bias in self.biases]
        weight_gradients = [np.zeros(weight.shape) for weight in self.weights]

        # forward pass
        activations = [inputs]
        activation = activations[0]
        sigmoid_z_caches = []

        for i in range(len(self.biases)):
            bias = self.biases[i]
            weight = self.weights[i]
            z = np.dot(weight, activation) + bias

            if i == len(self.biases) - 1:
                # output layer
                activation = math_utils.softmax(z)
            else:
                # hidden layers
                activation = math_utils.sigmoid(z)

            activations.append(activation)
            sigmoid_z_caches.append(z)

        # backward propagation

        # err for the output layer
        err = math_utils.cross_entropy(activations[-1], expected_outputs)

        bias_gradients[-1] = err
        weight_gradients[-1] = np.dot(err, activations[-2].T)

        # err for the hidden layers
        for i in range(len(self.layer_sizes) - 1, 1, -1):
            err = np.dot(self.weights[i-1].T, err) * \
                  math_utils.sigmoid_derivative(sigmoid_z_caches[i-2])
            bias_gradients[i - 2] = err
            weight_gradients[i - 2] = np.dot(err, activations[i - 2].T)

        return {
            'bias_gradients': bias_gradients,
            'weight_gradients': weight_gradients
        }
Esempio n. 9
0
def predict(net, img, text_feature):
    img = preprocess(img)

    # feedforward
    if not args.onnx:
        output = net.predict([img])
    else:
        output = net.run(None, {'image': img})

    image_feature = output[0]

    image_feature = image_feature / np.linalg.norm(
        image_feature, ord=2, axis=-1, keepdims=True)

    logit_scale = 100
    logits_per_image = (image_feature * logit_scale).dot(text_feature.T)

    pred = softmax(logits_per_image, axis=1)

    return pred[0]
Esempio n. 10
0
def decode_response(cls_logits,
                    center_logits,
                    reg_logits,
                    locations,
                    boxes,
                    use_centerness=True,
                    sigma=0.4):
    cls_logits = softmax(cls_logits, axis=1)
    cls_logits = cls_logits[:, 1:2, :, :]
    if use_centerness:
        centerness = sigmoid(center_logits)
        obj_confidence = cls_logits * centerness
    else:
        obj_confidence = cls_logits

    num_track_objects = obj_confidence.shape[0]
    obj_confidence = obj_confidence.reshape((num_track_objects, -1))
    tlbr = reg_logits.reshape((num_track_objects, 4, -1))

    scale_penalty = _get_scale_penalty(tlbr, boxes)
    cos_window = _get_cosine_window_penalty(tlbr)
    p_obj_confidence = (obj_confidence *
                        scale_penalty) * (1 - sigma) + sigma * cos_window

    idxs = np.argmax(p_obj_confidence, axis=1)

    target_ids = np.arange(num_track_objects)
    bb_c = locations[target_ids, idxs, :]
    shift_tlbr = tlbr[target_ids, :, idxs]

    bb_tl_x = bb_c[:, 0:1] - shift_tlbr[:, 0:1]
    bb_tl_y = bb_c[:, 1:2] - shift_tlbr[:, 1:2]
    bb_br_x = bb_c[:, 0:1] + shift_tlbr[:, 2:3]
    bb_br_y = bb_c[:, 1:2] + shift_tlbr[:, 3:4]
    bb = np.concatenate((bb_tl_x, bb_tl_y, bb_br_x, bb_br_y), axis=1)

    cls_logits = cls_logits.reshape((num_track_objects, -1))
    bb_conf = cls_logits[target_ids, idxs]

    return bb, bb_conf
Esempio n. 11
0
def post_processing(
        class_logits, box_regression, bbox,
        ids=None, labels=None):
    prob = softmax(class_logits, -1)

    proposals = box_decode(
        box_regression, bbox,
        weights=(10.0, 10.0, 5.0, 5.0)
    )

    num_classes = prob.shape[1]

    # deafult id is -1
    ids = ids if ids is not None else np.zeros(len(bbox), dtype=int) - 1

    # this only happens for tracks
    if labels is not None and 0 < len(labels):
        # tracks
        track_inds = np.nonzero(ids >= 0)[0]

        # avoid track bbs be suppressed during nms
        if 0 < len(track_inds):
            prob_cp = np.array(prob)
            prob[track_inds, :] = 0.
            prob[track_inds, labels] = prob_cp[track_inds, labels] + 1.

    boxes = BBox(
        bbox=proposals.reshape(-1, 4),
        scores=prob.reshape(-1),
        ids=ids
    )
    boxes.bbox[:, 0] = boxes.bbox[:, 0].clip(0, max=IMAGE_WIDTH - 1)
    boxes.bbox[:, 1] = boxes.bbox[:, 1].clip(0, max=IMAGE_HEIGHT - 1)
    boxes.bbox[:, 2] = boxes.bbox[:, 2].clip(0, max=IMAGE_WIDTH - 1)
    boxes.bbox[:, 3] = boxes.bbox[:, 3].clip(0, max=IMAGE_HEIGHT - 1)

    boxes = filter_results(boxes, num_classes)

    return boxes
Esempio n. 12
0
    def get_policy(self, observations):
        policies = [[] for _ in range(self.city.N)]
        for road in self.city.roads:
            policy = np.zeros((len(road.reachable_roads, )))
            for i, road_index in enumerate(road.reachable_roads):
                v = observations[road_index][1]
                if not self.order_proportional:
                    v = max(v-observations[road_index][0], 0)
                policy[i] = v
            if policy.sum() == 0:
                policy.fill(1)
            if self.strategy == 0:
                policy = np.where(policy == np.amax(policy), 1.0, 0.0)
                policy /= policy.sum()
            elif self.strategy == 1:
                policy /= policy.sum()
                if self.policy_pow != 1:
                    policy = softmax_pow(policy, self.policy_pow)
            else:
                policy = softmax(policy, self.policy_pow)

            policies[road.uuid] = policy
        return policies
Esempio n. 13
0
 def forward(self, layer_input, *args, **kwargs):
     self.current_layer_input = layer_input
     self.current_layer_output = softmax(layer_input)
     return self.current_layer_output
Esempio n. 14
0
    def postprocess(self, scores, raw_boxes, ResizeM, raw_shape):
        # generate centers
        decode_boxes = []
        select_scores = []
        for stride, box_distribute, score in zip(self.strides, raw_boxes, scores):
            # centers
            fm_h = self.input_shape[0] / stride
            fm_w = self.input_shape[1] / stride
            h_range = np.arange(fm_h)
            w_range = np.arange(fm_w)
            ww, hh = np.meshgrid(w_range, h_range)
            ct_row = (hh.flatten() + 0.5) * stride
            ct_col = (ww.flatten() + 0.5) * stride
            center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)

            # box distribution to distance
            reg_range = np.arange(self.reg_max + 1)
            box_distance = box_distribute.reshape((-1, self.reg_max + 1))
            box_distance = softmax(box_distance, axis=1)
            box_distance = box_distance * np.expand_dims(reg_range, axis=0)
            box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
            box_distance = box_distance * stride

            # top K candidate
            topk_idx = np.argsort(score.max(axis=1))[::-1]
            topk_idx = topk_idx[:self.num_candidate]
            center = center[topk_idx]
            score = score[topk_idx]
            box_distance = box_distance[topk_idx]

            # decode box
            decode_box = center + [-1, -1, 1, 1] * box_distance

            select_scores.append(score)
            decode_boxes.append(decode_box)

        # nms
        bboxes = np.concatenate(decode_boxes, axis=0)
        confidences = np.concatenate(select_scores, axis=0)
        picked_box_probs = []
        picked_labels = []
        for class_index in range(0, confidences.shape[1]):
            probs = confidences[:, class_index]
            mask = probs > self.prob_threshold
            probs = probs[mask]
            if probs.shape[0] == 0:
                continue
            subset_boxes = bboxes[mask, :]
            box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
            box_probs = hard_nms(
                box_probs,
                iou_threshold=self.iou_threshold,
                top_k=self.top_k,
            )
            picked_box_probs.append(box_probs)
            picked_labels.extend([class_index] * box_probs.shape[0])
        if not picked_box_probs:
            return np.array([]), np.array([]), np.array([])
        picked_box_probs = np.concatenate(picked_box_probs)

        # resize output boxes
        picked_box_probs[:, :4] = warp_boxes(
            picked_box_probs[:, :4], np.linalg.inv(ResizeM), raw_shape[1], raw_shape[0])
        return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
def postprocess(output, arch):
    if arch == "erfnet":
        output = softmax(output, axis=1)
    elif arch == "scnn":
        output = output.transpose((0, 3, 1, 2))
    return output
def recognize_from_video(enc, dec):
    video_file = args.video if args.video else args.input[0]
    capture = webcamera_utils.get_capture(video_file)
    assert capture.isOpened(), 'Cannot capture source'

    # create video writer if savepath is specified as video format
    if args.savepath != None:
        logger.warning(
            'currently, video results cannot be output correctly...'
        )
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    sequence_size = 16

    embeddings = []
    while True:
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img = preprocess(frame)

        # feedforward
        output = enc.predict([img])
        embedding = output[0]
        embedding = embedding.reshape((1, -1))

        embeddings.append(embedding)
        embeddings = embeddings[-sequence_size:]
        if len(embeddings) == sequence_size:
            decoder_input = np.concatenate(embeddings, axis=0)
            decoder_input = np.expand_dims(decoder_input, axis=0)

            output = dec.predict([decoder_input])
            logits = output[0]

            probs = softmax(logits)
            probs = probs[0]

            i = np.argmax(probs)
            display_text = '{} - {:.2f}%'.format(
                LABELS[i],
                probs[np.argmax(probs)] * 100
            )
        else:
            display_text = 'Preparing...'

        frame = render_frame(frame, display_text)

        cv2.imshow('frame', frame)

        # save results
        if writer is not None:
            writer.write(frame)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()

    logger.info('Script finished successfully.')